From 1813a68457bb45b378d5bbec615b167deff3bcfc Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 20 Jul 2010 15:22:54 -0700 Subject: x86: Move alloc_desk_mask variables inside ifdef They are only useful with CONFIG_CPUMASK_OFFSTACK Avoids hundreds of warnings with a gcc 4.6 -Wall build. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index c03243ad84b4..fff1d77c3753 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -439,12 +439,12 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); static inline bool alloc_desc_masks(struct irq_desc *desc, int node, bool boot) { +#ifdef CONFIG_CPUMASK_OFFSTACK gfp_t gfp = GFP_ATOMIC; if (boot) gfp = GFP_NOWAIT; -#ifdef CONFIG_CPUMASK_OFFSTACK if (!alloc_cpumask_var_node(&desc->affinity, gfp, node)) return false; -- cgit v1.2.3 From e3239ff92a17976ac5d26fa0fe40ef3a9daf2523 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 4 Aug 2010 14:06:41 +1000 Subject: memblock: Rename memblock_region to memblock_type and memblock_property to memblock_region Signed-off-by: Benjamin Herrenschmidt --- arch/arm/mm/init.c | 2 +- arch/arm/plat-omap/fb.c | 2 +- arch/microblaze/mm/init.c | 4 +- arch/powerpc/mm/hash_utils_64.c | 2 +- arch/powerpc/mm/mem.c | 26 ++--- arch/powerpc/platforms/embedded6xx/wii.c | 2 +- arch/sparc/mm/init_64.c | 6 +- drivers/video/omap2/vram.c | 2 +- include/linux/memblock.h | 24 ++--- mm/memblock.c | 168 +++++++++++++++---------------- 10 files changed, 118 insertions(+), 120 deletions(-) (limited to 'include') diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 7185b00650fe..d1496e65dc2d 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -237,7 +237,7 @@ static void __init arm_bootmem_free(struct meminfo *mi, unsigned long min, #ifndef CONFIG_SPARSEMEM int pfn_valid(unsigned long pfn) { - struct memblock_region *mem = &memblock.memory; + struct memblock_type *mem = &memblock.memory; unsigned int left = 0, right = mem->cnt; do { diff --git a/arch/arm/plat-omap/fb.c b/arch/arm/plat-omap/fb.c index 0054b9501a53..05bf22827404 100644 --- a/arch/arm/plat-omap/fb.c +++ b/arch/arm/plat-omap/fb.c @@ -173,7 +173,7 @@ static int check_fbmem_region(int region_idx, struct omapfb_mem_region *rg, static int valid_sdram(unsigned long addr, unsigned long size) { - struct memblock_property res; + struct memblock_region res; res.base = addr; res.size = size; diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index db5934989926..afd6494fbbc6 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -77,8 +77,8 @@ void __init setup_memory(void) /* Find main memory where is the kernel */ for (i = 0; i < memblock.memory.cnt; i++) { - memory_start = (u32) memblock.memory.region[i].base; - memory_end = (u32) memblock.memory.region[i].base + memory_start = (u32) memblock.memory.regions[i].base; + memory_end = (u32) memblock.memory.regions[i].base + (u32) memblock.memory.region[i].size; if ((memory_start <= (u32)_text) && ((u32)_text <= memory_end)) { diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 09dffe6efa46..b1a3784744db 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -660,7 +660,7 @@ static void __init htab_initialize(void) /* create bolted the linear mapping in the hash table */ for (i=0; i < memblock.memory.cnt; i++) { - base = (unsigned long)__va(memblock.memory.region[i].base); + base = (unsigned long)__va(memblock.memory.regions[i].base); size = memblock.memory.region[i].size; DBG("creating mapping for region: %lx..%lx (prot: %lx)\n", diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 1a84a8d00005..a33f5c186fb7 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -86,10 +86,10 @@ int page_is_ram(unsigned long pfn) for (i=0; i < memblock.memory.cnt; i++) { unsigned long base; - base = memblock.memory.region[i].base; + base = memblock.memory.regions[i].base; if ((paddr >= base) && - (paddr < (base + memblock.memory.region[i].size))) { + (paddr < (base + memblock.memory.regions[i].size))) { return 1; } } @@ -149,7 +149,7 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)) { - struct memblock_property res; + struct memblock_region res; unsigned long pfn, len; u64 end; int ret = -1; @@ -206,7 +206,7 @@ void __init do_init_bootmem(void) /* Add active regions with valid PFNs */ for (i = 0; i < memblock.memory.cnt; i++) { unsigned long start_pfn, end_pfn; - start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT; + start_pfn = memblock.memory.regions[i].base >> PAGE_SHIFT; end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i); add_active_range(0, start_pfn, end_pfn); } @@ -219,16 +219,16 @@ void __init do_init_bootmem(void) /* reserve the sections we're already using */ for (i = 0; i < memblock.reserved.cnt; i++) { - unsigned long addr = memblock.reserved.region[i].base + + unsigned long addr = memblock.reserved.regions[i].base + memblock_size_bytes(&memblock.reserved, i) - 1; if (addr < lowmem_end_addr) - reserve_bootmem(memblock.reserved.region[i].base, + reserve_bootmem(memblock.reserved.regions[i].base, memblock_size_bytes(&memblock.reserved, i), BOOTMEM_DEFAULT); - else if (memblock.reserved.region[i].base < lowmem_end_addr) { + else if (memblock.reserved.regions[i].base < lowmem_end_addr) { unsigned long adjusted_size = lowmem_end_addr - - memblock.reserved.region[i].base; - reserve_bootmem(memblock.reserved.region[i].base, + memblock.reserved.regions[i].base; + reserve_bootmem(memblock.reserved.regions[i].base, adjusted_size, BOOTMEM_DEFAULT); } } @@ -237,7 +237,7 @@ void __init do_init_bootmem(void) /* reserve the sections we're already using */ for (i = 0; i < memblock.reserved.cnt; i++) - reserve_bootmem(memblock.reserved.region[i].base, + reserve_bootmem(memblock.reserved.regions[i].base, memblock_size_bytes(&memblock.reserved, i), BOOTMEM_DEFAULT); @@ -257,10 +257,10 @@ static int __init mark_nonram_nosave(void) for (i = 0; i < memblock.memory.cnt - 1; i++) { memblock_region_max_pfn = - (memblock.memory.region[i].base >> PAGE_SHIFT) + - (memblock.memory.region[i].size >> PAGE_SHIFT); + (memblock.memory.regions[i].base >> PAGE_SHIFT) + + (memblock.memory.regions[i].size >> PAGE_SHIFT); memblock_next_region_start_pfn = - memblock.memory.region[i+1].base >> PAGE_SHIFT; + memblock.memory.regions[i+1].base >> PAGE_SHIFT; if (memblock_region_max_pfn < memblock_next_region_start_pfn) register_nosave_region(memblock_region_max_pfn, diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 5cdcc7c8d973..8450c29e9b2f 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -65,7 +65,7 @@ static int __init page_aligned(unsigned long x) void __init wii_memory_fixups(void) { - struct memblock_property *p = memblock.memory.region; + struct memblock_region *p = memblock.memory.region; /* * This is part of a workaround to allow the use of two diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index f0434513df15..16d8bee889ba 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -978,7 +978,7 @@ static void __init add_node_ranges(void) unsigned long size = memblock_size_bytes(&memblock.memory, i); unsigned long start, end; - start = memblock.memory.region[i].base; + start = memblock.memory.regions[i].base; end = start + size; while (start < end) { unsigned long this_end; @@ -1299,7 +1299,7 @@ static void __init bootmem_init_nonnuma(void) if (!size) continue; - start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT; + start_pfn = memblock.memory.regions[i].base >> PAGE_SHIFT; end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i); add_active_range(0, start_pfn, end_pfn); } @@ -1339,7 +1339,7 @@ static void __init trim_reserved_in_node(int nid) numadbg(" trim_reserved_in_node(%d)\n", nid); for (i = 0; i < memblock.reserved.cnt; i++) { - unsigned long start = memblock.reserved.region[i].base; + unsigned long start = memblock.reserved.regions[i].base; unsigned long size = memblock_size_bytes(&memblock.reserved, i); unsigned long end = start + size; diff --git a/drivers/video/omap2/vram.c b/drivers/video/omap2/vram.c index f6fdc2085f3e..0f2532bf0f04 100644 --- a/drivers/video/omap2/vram.c +++ b/drivers/video/omap2/vram.c @@ -554,7 +554,7 @@ void __init omap_vram_reserve_sdram_memblock(void) size = PAGE_ALIGN(size); if (paddr) { - struct memblock_property res; + struct memblock_region res; res.base = paddr; res.size = size; diff --git a/include/linux/memblock.h b/include/linux/memblock.h index a59faf2b5edd..86e7daf742f2 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -18,22 +18,22 @@ #define MAX_MEMBLOCK_REGIONS 128 -struct memblock_property { +struct memblock_region { u64 base; u64 size; }; -struct memblock_region { +struct memblock_type { unsigned long cnt; u64 size; - struct memblock_property region[MAX_MEMBLOCK_REGIONS+1]; + struct memblock_region regions[MAX_MEMBLOCK_REGIONS+1]; }; struct memblock { unsigned long debug; u64 rmo_size; - struct memblock_region memory; - struct memblock_region reserved; + struct memblock_type memory; + struct memblock_type reserved; }; extern struct memblock memblock; @@ -56,27 +56,27 @@ extern u64 memblock_end_of_DRAM(void); extern void __init memblock_enforce_memory_limit(u64 memory_limit); extern int __init memblock_is_reserved(u64 addr); extern int memblock_is_region_reserved(u64 base, u64 size); -extern int memblock_find(struct memblock_property *res); +extern int memblock_find(struct memblock_region *res); extern void memblock_dump_all(void); static inline u64 -memblock_size_bytes(struct memblock_region *type, unsigned long region_nr) +memblock_size_bytes(struct memblock_type *type, unsigned long region_nr) { - return type->region[region_nr].size; + return type->regions[region_nr].size; } static inline u64 -memblock_size_pages(struct memblock_region *type, unsigned long region_nr) +memblock_size_pages(struct memblock_type *type, unsigned long region_nr) { return memblock_size_bytes(type, region_nr) >> PAGE_SHIFT; } static inline u64 -memblock_start_pfn(struct memblock_region *type, unsigned long region_nr) +memblock_start_pfn(struct memblock_type *type, unsigned long region_nr) { - return type->region[region_nr].base >> PAGE_SHIFT; + return type->regions[region_nr].base >> PAGE_SHIFT; } static inline u64 -memblock_end_pfn(struct memblock_region *type, unsigned long region_nr) +memblock_end_pfn(struct memblock_type *type, unsigned long region_nr) { return memblock_start_pfn(type, region_nr) + memblock_size_pages(type, region_nr); diff --git a/mm/memblock.c b/mm/memblock.c index 43840b305ecb..6f407ccf604e 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -29,7 +29,7 @@ static int __init early_memblock(char *p) } early_param("memblock", early_memblock); -static void memblock_dump(struct memblock_region *region, char *name) +static void memblock_dump(struct memblock_type *region, char *name) { unsigned long long base, size; int i; @@ -37,8 +37,8 @@ static void memblock_dump(struct memblock_region *region, char *name) pr_info(" %s.cnt = 0x%lx\n", name, region->cnt); for (i = 0; i < region->cnt; i++) { - base = region->region[i].base; - size = region->region[i].size; + base = region->regions[i].base; + size = region->regions[i].size; pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n", name, i, base, base + size - 1, size); @@ -74,34 +74,34 @@ static long memblock_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2) return 0; } -static long memblock_regions_adjacent(struct memblock_region *rgn, +static long memblock_regions_adjacent(struct memblock_type *type, unsigned long r1, unsigned long r2) { - u64 base1 = rgn->region[r1].base; - u64 size1 = rgn->region[r1].size; - u64 base2 = rgn->region[r2].base; - u64 size2 = rgn->region[r2].size; + u64 base1 = type->regions[r1].base; + u64 size1 = type->regions[r1].size; + u64 base2 = type->regions[r2].base; + u64 size2 = type->regions[r2].size; return memblock_addrs_adjacent(base1, size1, base2, size2); } -static void memblock_remove_region(struct memblock_region *rgn, unsigned long r) +static void memblock_remove_region(struct memblock_type *type, unsigned long r) { unsigned long i; - for (i = r; i < rgn->cnt - 1; i++) { - rgn->region[i].base = rgn->region[i + 1].base; - rgn->region[i].size = rgn->region[i + 1].size; + for (i = r; i < type->cnt - 1; i++) { + type->regions[i].base = type->regions[i + 1].base; + type->regions[i].size = type->regions[i + 1].size; } - rgn->cnt--; + type->cnt--; } /* Assumption: base addr of region 1 < base addr of region 2 */ -static void memblock_coalesce_regions(struct memblock_region *rgn, +static void memblock_coalesce_regions(struct memblock_type *type, unsigned long r1, unsigned long r2) { - rgn->region[r1].size += rgn->region[r2].size; - memblock_remove_region(rgn, r2); + type->regions[r1].size += type->regions[r2].size; + memblock_remove_region(type, r2); } void __init memblock_init(void) @@ -109,13 +109,13 @@ void __init memblock_init(void) /* Create a dummy zero size MEMBLOCK which will get coalesced away later. * This simplifies the memblock_add() code below... */ - memblock.memory.region[0].base = 0; - memblock.memory.region[0].size = 0; + memblock.memory.regions[0].base = 0; + memblock.memory.regions[0].size = 0; memblock.memory.cnt = 1; /* Ditto. */ - memblock.reserved.region[0].base = 0; - memblock.reserved.region[0].size = 0; + memblock.reserved.regions[0].base = 0; + memblock.reserved.regions[0].size = 0; memblock.reserved.cnt = 1; } @@ -126,24 +126,24 @@ void __init memblock_analyze(void) memblock.memory.size = 0; for (i = 0; i < memblock.memory.cnt; i++) - memblock.memory.size += memblock.memory.region[i].size; + memblock.memory.size += memblock.memory.regions[i].size; } -static long memblock_add_region(struct memblock_region *rgn, u64 base, u64 size) +static long memblock_add_region(struct memblock_type *type, u64 base, u64 size) { unsigned long coalesced = 0; long adjacent, i; - if ((rgn->cnt == 1) && (rgn->region[0].size == 0)) { - rgn->region[0].base = base; - rgn->region[0].size = size; + if ((type->cnt == 1) && (type->regions[0].size == 0)) { + type->regions[0].base = base; + type->regions[0].size = size; return 0; } /* First try and coalesce this MEMBLOCK with another. */ - for (i = 0; i < rgn->cnt; i++) { - u64 rgnbase = rgn->region[i].base; - u64 rgnsize = rgn->region[i].size; + for (i = 0; i < type->cnt; i++) { + u64 rgnbase = type->regions[i].base; + u64 rgnsize = type->regions[i].size; if ((rgnbase == base) && (rgnsize == size)) /* Already have this region, so we're done */ @@ -151,61 +151,59 @@ static long memblock_add_region(struct memblock_region *rgn, u64 base, u64 size) adjacent = memblock_addrs_adjacent(base, size, rgnbase, rgnsize); if (adjacent > 0) { - rgn->region[i].base -= size; - rgn->region[i].size += size; + type->regions[i].base -= size; + type->regions[i].size += size; coalesced++; break; } else if (adjacent < 0) { - rgn->region[i].size += size; + type->regions[i].size += size; coalesced++; break; } } - if ((i < rgn->cnt - 1) && memblock_regions_adjacent(rgn, i, i+1)) { - memblock_coalesce_regions(rgn, i, i+1); + if ((i < type->cnt - 1) && memblock_regions_adjacent(type, i, i+1)) { + memblock_coalesce_regions(type, i, i+1); coalesced++; } if (coalesced) return coalesced; - if (rgn->cnt >= MAX_MEMBLOCK_REGIONS) + if (type->cnt >= MAX_MEMBLOCK_REGIONS) return -1; /* Couldn't coalesce the MEMBLOCK, so add it to the sorted table. */ - for (i = rgn->cnt - 1; i >= 0; i--) { - if (base < rgn->region[i].base) { - rgn->region[i+1].base = rgn->region[i].base; - rgn->region[i+1].size = rgn->region[i].size; + for (i = type->cnt - 1; i >= 0; i--) { + if (base < type->regions[i].base) { + type->regions[i+1].base = type->regions[i].base; + type->regions[i+1].size = type->regions[i].size; } else { - rgn->region[i+1].base = base; - rgn->region[i+1].size = size; + type->regions[i+1].base = base; + type->regions[i+1].size = size; break; } } - if (base < rgn->region[0].base) { - rgn->region[0].base = base; - rgn->region[0].size = size; + if (base < type->regions[0].base) { + type->regions[0].base = base; + type->regions[0].size = size; } - rgn->cnt++; + type->cnt++; return 0; } long memblock_add(u64 base, u64 size) { - struct memblock_region *_rgn = &memblock.memory; - /* On pSeries LPAR systems, the first MEMBLOCK is our RMO region. */ if (base == 0) memblock.rmo_size = size; - return memblock_add_region(_rgn, base, size); + return memblock_add_region(&memblock.memory, base, size); } -static long __memblock_remove(struct memblock_region *rgn, u64 base, u64 size) +static long __memblock_remove(struct memblock_type *type, u64 base, u64 size) { u64 rgnbegin, rgnend; u64 end = base + size; @@ -214,34 +212,34 @@ static long __memblock_remove(struct memblock_region *rgn, u64 base, u64 size) rgnbegin = rgnend = 0; /* supress gcc warnings */ /* Find the region where (base, size) belongs to */ - for (i=0; i < rgn->cnt; i++) { - rgnbegin = rgn->region[i].base; - rgnend = rgnbegin + rgn->region[i].size; + for (i=0; i < type->cnt; i++) { + rgnbegin = type->regions[i].base; + rgnend = rgnbegin + type->regions[i].size; if ((rgnbegin <= base) && (end <= rgnend)) break; } /* Didn't find the region */ - if (i == rgn->cnt) + if (i == type->cnt) return -1; /* Check to see if we are removing entire region */ if ((rgnbegin == base) && (rgnend == end)) { - memblock_remove_region(rgn, i); + memblock_remove_region(type, i); return 0; } /* Check to see if region is matching at the front */ if (rgnbegin == base) { - rgn->region[i].base = end; - rgn->region[i].size -= size; + type->regions[i].base = end; + type->regions[i].size -= size; return 0; } /* Check to see if the region is matching at the end */ if (rgnend == end) { - rgn->region[i].size -= size; + type->regions[i].size -= size; return 0; } @@ -249,8 +247,8 @@ static long __memblock_remove(struct memblock_region *rgn, u64 base, u64 size) * We need to split the entry - adjust the current one to the * beginging of the hole and add the region after hole. */ - rgn->region[i].size = base - rgn->region[i].base; - return memblock_add_region(rgn, end, rgnend - end); + type->regions[i].size = base - type->regions[i].base; + return memblock_add_region(type, end, rgnend - end); } long memblock_remove(u64 base, u64 size) @@ -265,25 +263,25 @@ long __init memblock_free(u64 base, u64 size) long __init memblock_reserve(u64 base, u64 size) { - struct memblock_region *_rgn = &memblock.reserved; + struct memblock_type *_rgn = &memblock.reserved; BUG_ON(0 == size); return memblock_add_region(_rgn, base, size); } -long memblock_overlaps_region(struct memblock_region *rgn, u64 base, u64 size) +long memblock_overlaps_region(struct memblock_type *type, u64 base, u64 size) { unsigned long i; - for (i = 0; i < rgn->cnt; i++) { - u64 rgnbase = rgn->region[i].base; - u64 rgnsize = rgn->region[i].size; + for (i = 0; i < type->cnt; i++) { + u64 rgnbase = type->regions[i].base; + u64 rgnsize = type->regions[i].size; if (memblock_addrs_overlap(base, size, rgnbase, rgnsize)) break; } - return (i < rgn->cnt) ? i : -1; + return (i < type->cnt) ? i : -1; } static u64 memblock_align_down(u64 addr, u64 size) @@ -311,7 +309,7 @@ static u64 __init memblock_alloc_nid_unreserved(u64 start, u64 end, base = ~(u64)0; return base; } - res_base = memblock.reserved.region[j].base; + res_base = memblock.reserved.regions[j].base; if (res_base < size) break; base = memblock_align_down(res_base - size, align); @@ -320,7 +318,7 @@ static u64 __init memblock_alloc_nid_unreserved(u64 start, u64 end, return ~(u64)0; } -static u64 __init memblock_alloc_nid_region(struct memblock_property *mp, +static u64 __init memblock_alloc_nid_region(struct memblock_region *mp, u64 (*nid_range)(u64, u64, int *), u64 size, u64 align, int nid) { @@ -350,7 +348,7 @@ static u64 __init memblock_alloc_nid_region(struct memblock_property *mp, u64 __init memblock_alloc_nid(u64 size, u64 align, int nid, u64 (*nid_range)(u64 start, u64 end, int *nid)) { - struct memblock_region *mem = &memblock.memory; + struct memblock_type *mem = &memblock.memory; int i; BUG_ON(0 == size); @@ -358,7 +356,7 @@ u64 __init memblock_alloc_nid(u64 size, u64 align, int nid, size = memblock_align_up(size, align); for (i = 0; i < mem->cnt; i++) { - u64 ret = memblock_alloc_nid_region(&mem->region[i], + u64 ret = memblock_alloc_nid_region(&mem->regions[i], nid_range, size, align, nid); if (ret != ~(u64)0) @@ -402,8 +400,8 @@ u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr) max_addr = MEMBLOCK_REAL_LIMIT; for (i = memblock.memory.cnt - 1; i >= 0; i--) { - u64 memblockbase = memblock.memory.region[i].base; - u64 memblocksize = memblock.memory.region[i].size; + u64 memblockbase = memblock.memory.regions[i].base; + u64 memblocksize = memblock.memory.regions[i].size; if (memblocksize < size) continue; @@ -423,7 +421,7 @@ u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr) return 0; return base; } - res_base = memblock.reserved.region[j].base; + res_base = memblock.reserved.regions[j].base; if (res_base < size) break; base = memblock_align_down(res_base - size, align); @@ -442,7 +440,7 @@ u64 memblock_end_of_DRAM(void) { int idx = memblock.memory.cnt - 1; - return (memblock.memory.region[idx].base + memblock.memory.region[idx].size); + return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size); } /* You must call memblock_analyze() after this. */ @@ -450,7 +448,7 @@ void __init memblock_enforce_memory_limit(u64 memory_limit) { unsigned long i; u64 limit; - struct memblock_property *p; + struct memblock_region *p; if (!memory_limit) return; @@ -458,24 +456,24 @@ void __init memblock_enforce_memory_limit(u64 memory_limit) /* Truncate the memblock regions to satisfy the memory limit. */ limit = memory_limit; for (i = 0; i < memblock.memory.cnt; i++) { - if (limit > memblock.memory.region[i].size) { - limit -= memblock.memory.region[i].size; + if (limit > memblock.memory.regions[i].size) { + limit -= memblock.memory.regions[i].size; continue; } - memblock.memory.region[i].size = limit; + memblock.memory.regions[i].size = limit; memblock.memory.cnt = i + 1; break; } - if (memblock.memory.region[0].size < memblock.rmo_size) - memblock.rmo_size = memblock.memory.region[0].size; + if (memblock.memory.regions[0].size < memblock.rmo_size) + memblock.rmo_size = memblock.memory.regions[0].size; memory_limit = memblock_end_of_DRAM(); /* And truncate any reserves above the limit also. */ for (i = 0; i < memblock.reserved.cnt; i++) { - p = &memblock.reserved.region[i]; + p = &memblock.reserved.regions[i]; if (p->base > memory_limit) p->size = 0; @@ -494,9 +492,9 @@ int __init memblock_is_reserved(u64 addr) int i; for (i = 0; i < memblock.reserved.cnt; i++) { - u64 upper = memblock.reserved.region[i].base + - memblock.reserved.region[i].size - 1; - if ((addr >= memblock.reserved.region[i].base) && (addr <= upper)) + u64 upper = memblock.reserved.regions[i].base + + memblock.reserved.regions[i].size - 1; + if ((addr >= memblock.reserved.regions[i].base) && (addr <= upper)) return 1; } return 0; @@ -511,7 +509,7 @@ int memblock_is_region_reserved(u64 base, u64 size) * Given a , find which memory regions belong to this range. * Adjust the request and return a contiguous chunk. */ -int memblock_find(struct memblock_property *res) +int memblock_find(struct memblock_region *res) { int i; u64 rstart, rend; @@ -520,8 +518,8 @@ int memblock_find(struct memblock_property *res) rend = rstart + res->size - 1; for (i = 0; i < memblock.memory.cnt; i++) { - u64 start = memblock.memory.region[i].base; - u64 end = start + memblock.memory.region[i].size - 1; + u64 start = memblock.memory.regions[i].base; + u64 end = start + memblock.memory.regions[i].size - 1; if (start > rend) return -1; -- cgit v1.2.3 From 411a25a80da328f5ae6b6c037872ffe867fcc130 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:38:56 -0700 Subject: memblock: No reason to include asm/memblock.h late Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 86e7daf742f2..4b6931327b22 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -16,6 +16,8 @@ #include #include +#include + #define MAX_MEMBLOCK_REGIONS 128 struct memblock_region { @@ -82,8 +84,6 @@ memblock_end_pfn(struct memblock_type *type, unsigned long region_nr) memblock_size_pages(type, region_nr); } -#include - #endif /* __KERNEL__ */ #endif /* _LINUX_MEMBLOCK_H */ -- cgit v1.2.3 From 72d4b0b4e0e7fa858767e03972771a9f7c02b689 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 4 Aug 2010 14:38:47 +1000 Subject: memblock: Implement memblock_is_memory and memblock_is_region_memory To make it fast, we steal ARM's binary search for memblock_is_memory() and we use that to also the replace existing implementation of memblock_is_reserved(). Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 2 ++ mm/memblock.c | 42 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 36 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 4b6931327b22..47bceb187058 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -56,6 +56,8 @@ extern u64 __init __memblock_alloc_base(u64 size, extern u64 __init memblock_phys_mem_size(void); extern u64 memblock_end_of_DRAM(void); extern void __init memblock_enforce_memory_limit(u64 memory_limit); +extern int memblock_is_memory(u64 addr); +extern int memblock_is_region_memory(u64 base, u64 size); extern int __init memblock_is_reserved(u64 addr); extern int memblock_is_region_reserved(u64 base, u64 size); extern int memblock_find(struct memblock_region *res); diff --git a/mm/memblock.c b/mm/memblock.c index 6f407ccf604e..aa88c62bce7f 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -487,17 +487,43 @@ void __init memblock_enforce_memory_limit(u64 memory_limit) } } +static int memblock_search(struct memblock_type *type, u64 addr) +{ + unsigned int left = 0, right = type->cnt; + + do { + unsigned int mid = (right + left) / 2; + + if (addr < type->regions[mid].base) + right = mid; + else if (addr >= (type->regions[mid].base + + type->regions[mid].size)) + left = mid + 1; + else + return mid; + } while (left < right); + return -1; +} + int __init memblock_is_reserved(u64 addr) { - int i; + return memblock_search(&memblock.reserved, addr) != -1; +} - for (i = 0; i < memblock.reserved.cnt; i++) { - u64 upper = memblock.reserved.regions[i].base + - memblock.reserved.regions[i].size - 1; - if ((addr >= memblock.reserved.regions[i].base) && (addr <= upper)) - return 1; - } - return 0; +int memblock_is_memory(u64 addr) +{ + return memblock_search(&memblock.memory, addr) != -1; +} + +int memblock_is_region_memory(u64 base, u64 size) +{ + int idx = memblock_search(&memblock.reserved, base); + + if (idx == -1) + return 0; + return memblock.reserved.regions[idx].base <= base && + (memblock.reserved.regions[idx].base + + memblock.reserved.regions[idx].size) >= (base + size); } int memblock_is_region_reserved(u64 base, u64 size) -- cgit v1.2.3 From 5b385f259fa4d356452e3b4729cbaf5213f4f55b Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 4 Aug 2010 13:40:38 +1000 Subject: memblock: Introduce for_each_memblock() and new accessors Walk memblock's using for_each_memblock() and use memblock_region_base/end_pfn() for getting to PFNs. Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 47bceb187058..c914112cd24f 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -64,6 +64,7 @@ extern int memblock_find(struct memblock_region *res); extern void memblock_dump_all(void); +/* Obsolete accessors */ static inline u64 memblock_size_bytes(struct memblock_type *type, unsigned long region_nr) { @@ -86,6 +87,57 @@ memblock_end_pfn(struct memblock_type *type, unsigned long region_nr) memblock_size_pages(type, region_nr); } +/* + * pfn conversion functions + * + * While the memory MEMBLOCKs should always be page aligned, the reserved + * MEMBLOCKs may not be. This accessor attempt to provide a very clear + * idea of what they return for such non aligned MEMBLOCKs. + */ + +/** + * memblock_region_base_pfn - Return the lowest pfn intersecting with the region + * @reg: memblock_region structure + */ +static inline unsigned long memblock_region_base_pfn(const struct memblock_region *reg) +{ + return reg->base >> PAGE_SHIFT; +} + +/** + * memblock_region_last_pfn - Return the highest pfn intersecting with the region + * @reg: memblock_region structure + */ +static inline unsigned long memblock_region_last_pfn(const struct memblock_region *reg) +{ + return (reg->base + reg->size - 1) >> PAGE_SHIFT; +} + +/** + * memblock_region_end_pfn - Return the pfn of the first page following the region + * but not intersecting it + * @reg: memblock_region structure + */ +static inline unsigned long memblock_region_end_pfn(const struct memblock_region *reg) +{ + return memblock_region_last_pfn(reg) + 1; +} + +/** + * memblock_region_pages - Return the number of pages covering a region + * @reg: memblock_region structure + */ +static inline unsigned long memblock_region_pages(const struct memblock_region *reg) +{ + return memblock_region_end_pfn(reg) - memblock_region_end_pfn(reg); +} + +#define for_each_memblock(memblock_type, region) \ + for (region = memblock.memblock_type.regions; \ + region < (memblock.memblock_type.regions + memblock.memblock_type.cnt); \ + region++) + + #endif /* __KERNEL__ */ #endif /* _LINUX_MEMBLOCK_H */ -- cgit v1.2.3 From 1e2b904026e9debf95f500b8980a00c43ac0f31c Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 4 Aug 2010 13:52:25 +1000 Subject: memblock: Remove obsolete accessors Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c914112cd24f..7d70fdd43db4 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -64,29 +64,6 @@ extern int memblock_find(struct memblock_region *res); extern void memblock_dump_all(void); -/* Obsolete accessors */ -static inline u64 -memblock_size_bytes(struct memblock_type *type, unsigned long region_nr) -{ - return type->regions[region_nr].size; -} -static inline u64 -memblock_size_pages(struct memblock_type *type, unsigned long region_nr) -{ - return memblock_size_bytes(type, region_nr) >> PAGE_SHIFT; -} -static inline u64 -memblock_start_pfn(struct memblock_type *type, unsigned long region_nr) -{ - return type->regions[region_nr].base >> PAGE_SHIFT; -} -static inline u64 -memblock_end_pfn(struct memblock_type *type, unsigned long region_nr) -{ - return memblock_start_pfn(type, region_nr) + - memblock_size_pages(type, region_nr); -} - /* * pfn conversion functions * -- cgit v1.2.3 From b693fffb189fbfe7e1e8317ce5838808be8666a0 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 4 Aug 2010 13:52:55 +1000 Subject: memblock: Remove memblock_find() Nobody uses it anymore. It's semantics were ... weird Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 1 - mm/memblock.c | 32 -------------------------------- 2 files changed, 33 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 7d70fdd43db4..776c7d945dcc 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -60,7 +60,6 @@ extern int memblock_is_memory(u64 addr); extern int memblock_is_region_memory(u64 base, u64 size); extern int __init memblock_is_reserved(u64 addr); extern int memblock_is_region_reserved(u64 base, u64 size); -extern int memblock_find(struct memblock_region *res); extern void memblock_dump_all(void); diff --git a/mm/memblock.c b/mm/memblock.c index aa88c62bce7f..8a118b71cbec 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -531,35 +531,3 @@ int memblock_is_region_reserved(u64 base, u64 size) return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; } -/* - * Given a , find which memory regions belong to this range. - * Adjust the request and return a contiguous chunk. - */ -int memblock_find(struct memblock_region *res) -{ - int i; - u64 rstart, rend; - - rstart = res->base; - rend = rstart + res->size - 1; - - for (i = 0; i < memblock.memory.cnt; i++) { - u64 start = memblock.memory.regions[i].base; - u64 end = start + memblock.memory.regions[i].size - 1; - - if (start > rend) - return -1; - - if ((end >= rstart) && (start < rend)) { - /* adjust the request */ - if (rstart < start) - rstart = start; - if (rend > end) - rend = end; - res->base = rstart; - res->size = rend - rstart + 1; - return 0; - } - } - return -1; -} -- cgit v1.2.3 From 35a1f0bd07015dde66501b47cfb6ddc72ebe7346 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:38:58 -0700 Subject: memblock: Remove nid_range argument, arch provides memblock_nid_range() instead Signed-off-by: Benjamin Herrenschmidt --- arch/sparc/mm/init_64.c | 16 ++++++---------- include/linux/memblock.h | 7 +++++-- mm/memblock.c | 13 ++++++++----- 3 files changed, 19 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index dd68025ecdbe..0883113624b9 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -785,8 +785,7 @@ static int find_node(unsigned long addr) return -1; } -static unsigned long long nid_range(unsigned long long start, - unsigned long long end, int *nid) +u64 memblock_nid_range(u64 start, u64 end, int *nid) { *nid = find_node(start); start += PAGE_SIZE; @@ -804,8 +803,7 @@ static unsigned long long nid_range(unsigned long long start, return start; } #else -static unsigned long long nid_range(unsigned long long start, - unsigned long long end, int *nid) +u64 memblock_nid_range(u64 start, u64 end, int *nid) { *nid = 0; return end; @@ -822,8 +820,7 @@ static void __init allocate_node_data(int nid) struct pglist_data *p; #ifdef CONFIG_NEED_MULTIPLE_NODES - paddr = memblock_alloc_nid(sizeof(struct pglist_data), - SMP_CACHE_BYTES, nid, nid_range); + paddr = memblock_alloc_nid(sizeof(struct pglist_data), SMP_CACHE_BYTES, nid); if (!paddr) { prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid); prom_halt(); @@ -843,8 +840,7 @@ static void __init allocate_node_data(int nid) if (p->node_spanned_pages) { num_pages = bootmem_bootmap_pages(p->node_spanned_pages); - paddr = memblock_alloc_nid(num_pages << PAGE_SHIFT, PAGE_SIZE, nid, - nid_range); + paddr = memblock_alloc_nid(num_pages << PAGE_SHIFT, PAGE_SIZE, nid); if (!paddr) { prom_printf("Cannot allocate bootmap for nid[%d]\n", nid); @@ -984,7 +980,7 @@ static void __init add_node_ranges(void) unsigned long this_end; int nid; - this_end = nid_range(start, end, &nid); + this_end = memblock_nid_range(start, end, &nid); numadbg("Adding active range nid[%d] " "start[%lx] end[%lx]\n", @@ -1317,7 +1313,7 @@ static void __init reserve_range_in_node(int nid, unsigned long start, unsigned long this_end; int n; - this_end = nid_range(start, end, &n); + this_end = memblock_nid_range(start, end, &n); if (n == nid) { numadbg(" MATCH reserving range [%lx:%lx]\n", start, this_end); diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 776c7d945dcc..367dea6e95a0 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -46,8 +46,7 @@ extern long memblock_add(u64 base, u64 size); extern long memblock_remove(u64 base, u64 size); extern long __init memblock_free(u64 base, u64 size); extern long __init memblock_reserve(u64 base, u64 size); -extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid, - u64 (*nid_range)(u64, u64, int *)); +extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid); extern u64 __init memblock_alloc(u64 size, u64 align); extern u64 __init memblock_alloc_base(u64 size, u64, u64 max_addr); @@ -63,6 +62,10 @@ extern int memblock_is_region_reserved(u64 base, u64 size); extern void memblock_dump_all(void); +/* Provided by the architecture */ +extern u64 memblock_nid_range(u64 start, u64 end, int *nid); + + /* * pfn conversion functions * diff --git a/mm/memblock.c b/mm/memblock.c index 8a118b71cbec..13807f280ada 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -319,7 +319,6 @@ static u64 __init memblock_alloc_nid_unreserved(u64 start, u64 end, } static u64 __init memblock_alloc_nid_region(struct memblock_region *mp, - u64 (*nid_range)(u64, u64, int *), u64 size, u64 align, int nid) { u64 start, end; @@ -332,7 +331,7 @@ static u64 __init memblock_alloc_nid_region(struct memblock_region *mp, u64 this_end; int this_nid; - this_end = nid_range(start, end, &this_nid); + this_end = memblock_nid_range(start, end, &this_nid); if (this_nid == nid) { u64 ret = memblock_alloc_nid_unreserved(start, this_end, size, align); @@ -345,8 +344,7 @@ static u64 __init memblock_alloc_nid_region(struct memblock_region *mp, return ~(u64)0; } -u64 __init memblock_alloc_nid(u64 size, u64 align, int nid, - u64 (*nid_range)(u64 start, u64 end, int *nid)) +u64 __init memblock_alloc_nid(u64 size, u64 align, int nid) { struct memblock_type *mem = &memblock.memory; int i; @@ -357,7 +355,6 @@ u64 __init memblock_alloc_nid(u64 size, u64 align, int nid, for (i = 0; i < mem->cnt; i++) { u64 ret = memblock_alloc_nid_region(&mem->regions[i], - nid_range, size, align, nid); if (ret != ~(u64)0) return ret; @@ -531,3 +528,9 @@ int memblock_is_region_reserved(u64 base, u64 size) return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; } +u64 __weak memblock_nid_range(u64 start, u64 end, int *nid) +{ + *nid = 0; + + return end; +} -- cgit v1.2.3 From 27f574c223d2c09610058b3ec7a29582d63a3e06 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:00 -0700 Subject: memblock: Expose MEMBLOCK_ALLOC_ANYWHERE Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/hash_utils_64.c | 2 +- include/linux/memblock.h | 1 + mm/memblock.c | 2 -- 3 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 4072b871497d..a542ff5ec8a9 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -625,7 +625,7 @@ static void __init htab_initialize(void) if (machine_is(cell)) limit = 0x80000000; else - limit = 0; + limit = MEMBLOCK_ALLOC_ANYWHERE; table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, limit); diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 367dea6e95a0..3cf3304e901d 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -50,6 +50,7 @@ extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid); extern u64 __init memblock_alloc(u64 size, u64 align); extern u64 __init memblock_alloc_base(u64 size, u64, u64 max_addr); +#define MEMBLOCK_ALLOC_ANYWHERE 0 extern u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr); extern u64 __init memblock_phys_mem_size(void); diff --git a/mm/memblock.c b/mm/memblock.c index e264e8c70892..0131684c42f8 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -15,8 +15,6 @@ #include #include -#define MEMBLOCK_ALLOC_ANYWHERE 0 - struct memblock memblock; static int memblock_debug; -- cgit v1.2.3 From e63075a3c9377536d085bc013cd3fe6323162449 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:01 -0700 Subject: memblock: Introduce default allocation limit and use it to replace explicit ones This introduce memblock.current_limit which is used to limit allocations from memblock_alloc() or memblock_alloc_base(..., MEMBLOCK_ALLOC_ACCESSIBLE). The old MEMBLOCK_ALLOC_ANYWHERE changes value from 0 to ~(u64)0 and can still be used with memblock_alloc_base() to allocate really anywhere. It is -no-longer- cropped to MEMBLOCK_REAL_LIMIT which disappears. Note to archs: I'm leaving the default limit to MEMBLOCK_ALLOC_ANYWHERE. I strongly recommend that you ensure that you set an appropriate limit during boot in order to guarantee that an memblock_alloc() at any time results in something that is accessible with a simple __va(). The reason is that a subsequent patch will introduce the ability for the array to resize itself by reallocating itself. The MEMBLOCK core will honor the current limit when performing those allocations. Signed-off-by: Benjamin Herrenschmidt --- arch/microblaze/include/asm/memblock.h | 3 --- arch/powerpc/include/asm/memblock.h | 7 ------- arch/powerpc/kernel/prom.c | 20 +++++++++++++++++++- arch/powerpc/kernel/setup_32.c | 2 +- arch/powerpc/mm/40x_mmu.c | 5 +++-- arch/powerpc/mm/fsl_booke_mmu.c | 3 ++- arch/powerpc/mm/hash_utils_64.c | 3 ++- arch/powerpc/mm/init_32.c | 29 +++++++---------------------- arch/powerpc/mm/ppc_mmu_32.c | 3 +-- arch/powerpc/mm/tlb_nohash.c | 2 ++ arch/sh/include/asm/memblock.h | 2 -- arch/sparc/include/asm/memblock.h | 2 -- include/linux/memblock.h | 16 +++++++++++++++- mm/memblock.c | 19 +++++++++++-------- 14 files changed, 63 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/arch/microblaze/include/asm/memblock.h b/arch/microblaze/include/asm/memblock.h index f9c2fa331d2a..20a8e257c77f 100644 --- a/arch/microblaze/include/asm/memblock.h +++ b/arch/microblaze/include/asm/memblock.h @@ -9,9 +9,6 @@ #ifndef _ASM_MICROBLAZE_MEMBLOCK_H #define _ASM_MICROBLAZE_MEMBLOCK_H -/* MEMBLOCK limit is OFF */ -#define MEMBLOCK_REAL_LIMIT 0xFFFFFFFF - #endif /* _ASM_MICROBLAZE_MEMBLOCK_H */ diff --git a/arch/powerpc/include/asm/memblock.h b/arch/powerpc/include/asm/memblock.h index 3c29728b56b1..43efc345065e 100644 --- a/arch/powerpc/include/asm/memblock.h +++ b/arch/powerpc/include/asm/memblock.h @@ -5,11 +5,4 @@ #define MEMBLOCK_DBG(fmt...) udbg_printf(fmt) -#ifdef CONFIG_PPC32 -extern phys_addr_t lowmem_end_addr; -#define MEMBLOCK_REAL_LIMIT lowmem_end_addr -#else -#define MEMBLOCK_REAL_LIMIT 0 -#endif - #endif /* _ASM_POWERPC_MEMBLOCK_H */ diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index fed9bf6187d1..3aec0b980f6a 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -98,7 +98,7 @@ static void __init move_device_tree(void) if ((memory_limit && (start + size) > memory_limit) || overlaps_crashkernel(start, size)) { - p = __va(memblock_alloc_base(size, PAGE_SIZE, memblock.rmo_size)); + p = __va(memblock_alloc(size, PAGE_SIZE)); memcpy(p, initial_boot_params, size); initial_boot_params = (struct boot_param_header *)p; DBG("Moved device tree to 0x%p\n", p); @@ -655,6 +655,21 @@ static void __init phyp_dump_reserve_mem(void) static inline void __init phyp_dump_reserve_mem(void) {} #endif /* CONFIG_PHYP_DUMP && CONFIG_PPC_RTAS */ +static void set_boot_memory_limit(void) +{ +#ifdef CONFIG_PPC32 + /* 601 can only access 16MB at the moment */ + if (PVR_VER(mfspr(SPRN_PVR)) == 1) + memblock_set_current_limit(0x01000000); + /* 8xx can only access 8MB at the moment */ + else if (PVR_VER(mfspr(SPRN_PVR)) == 0x50) + memblock_set_current_limit(0x00800000); + else + memblock_set_current_limit(0x10000000); +#else + memblock_set_current_limit(memblock.rmo_size); +#endif +} void __init early_init_devtree(void *params) { @@ -683,6 +698,7 @@ void __init early_init_devtree(void *params) /* Scan memory nodes and rebuild MEMBLOCKs */ memblock_init(); + of_scan_flat_dt(early_init_dt_scan_root, NULL); of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); @@ -718,6 +734,8 @@ void __init early_init_devtree(void *params) DBG("Phys. mem: %llx\n", memblock_phys_mem_size()); + set_boot_memory_limit(); + /* We may need to relocate the flat tree, do it now. * FIXME .. and the initrd too? */ move_device_tree(); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index a10ffc85ada7..b7eb1ded3b5f 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -246,7 +246,7 @@ static void __init irqstack_early_init(void) unsigned int i; /* interrupt stacks must be in lowmem, we get that for free on ppc32 - * as the memblock is limited to lowmem by MEMBLOCK_REAL_LIMIT */ + * as the memblock is limited to lowmem by default */ for_each_possible_cpu(i) { softirq_ctx[i] = (struct thread_info *) __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c index 1dc2fa5ce1bd..58969b51f454 100644 --- a/arch/powerpc/mm/40x_mmu.c +++ b/arch/powerpc/mm/40x_mmu.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -47,6 +48,7 @@ #include #include #include + #include "mmu_decl.h" extern int __map_without_ltlbs; @@ -139,8 +141,7 @@ unsigned long __init mmu_mapin_ram(unsigned long top) * coverage with normal-sized pages (or other reasons) do not * attempt to allocate outside the allowed range. */ - - __initial_memory_limit_addr = memstart_addr + mapped; + memblock_set_current_limit(memstart_addr + mapped); return mapped; } diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index cdc7526e9c93..e525f862d759 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -212,5 +213,5 @@ void __init adjust_total_lowmem(void) pr_cont("%lu Mb, residual: %dMb\n", tlbcam_sz(tlbcam_index - 1) >> 20, (unsigned int)((total_lowmem - __max_low_memory) >> 20)); - __initial_memory_limit_addr = memstart_addr + __max_low_memory; + memblock_set_current_limit(memstart_addr + __max_low_memory); } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index a542ff5ec8a9..b05890e23813 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -696,7 +696,8 @@ static void __init htab_initialize(void) #endif /* CONFIG_U3_DART */ BUG_ON(htab_bolt_mapping(base, base + size, __pa(base), prot, mmu_linear_psize, mmu_kernel_ssize)); - } + } + memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); /* * If we have a memory_limit and we've allocated TCEs then we need to diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 6a6975dc2654..59b208b7ec6f 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -91,12 +91,6 @@ int __allow_ioremap_reserved; /* max amount of low RAM to map in */ unsigned long __max_low_memory = MAX_LOW_MEM; -/* - * address of the limit of what is accessible with initial MMU setup - - * 256MB usually, but only 16MB on 601. - */ -phys_addr_t __initial_memory_limit_addr = (phys_addr_t)0x10000000; - /* * Check for command-line options that affect what MMU_init will do. */ @@ -126,13 +120,6 @@ void __init MMU_init(void) if (ppc_md.progress) ppc_md.progress("MMU:enter", 0x111); - /* 601 can only access 16MB at the moment */ - if (PVR_VER(mfspr(SPRN_PVR)) == 1) - __initial_memory_limit_addr = 0x01000000; - /* 8xx can only access 8MB at the moment */ - if (PVR_VER(mfspr(SPRN_PVR)) == 0x50) - __initial_memory_limit_addr = 0x00800000; - /* parse args from command line */ MMU_setup(); @@ -190,20 +177,18 @@ void __init MMU_init(void) #ifdef CONFIG_BOOTX_TEXT btext_unmap(); #endif + + /* Shortly after that, the entire linear mapping will be available */ + memblock_set_current_limit(lowmem_end_addr); } /* This is only called until mem_init is done. */ void __init *early_get_page(void) { - void *p; - - if (init_bootmem_done) { - p = alloc_bootmem_pages(PAGE_SIZE); - } else { - p = __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, - __initial_memory_limit_addr)); - } - return p; + if (init_bootmem_done) + return alloc_bootmem_pages(PAGE_SIZE); + else + return __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE)); } /* Free up now-unused memory */ diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index f8a01829d64f..7d34e170e80f 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -223,8 +223,7 @@ void __init MMU_init_hw(void) * Find some memory for the hash table. */ if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322); - Hash = __va(memblock_alloc_base(Hash_size, Hash_size, - __initial_memory_limit_addr)); + Hash = __va(memblock_alloc(Hash_size, Hash_size)); cacheable_memzero(Hash, Hash_size); _SDR1 = __pa(Hash) | SDR1_LOW_BITS; diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index d8695b02a968..7ba32e762990 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -432,6 +432,8 @@ static void __early_init_mmu(int boot_cpu) * the MMU configuration */ mb(); + + memblock_set_current_limit(linear_map_top); } void __init early_init_mmu(void) diff --git a/arch/sh/include/asm/memblock.h b/arch/sh/include/asm/memblock.h index dfe683b88075..e87063fad2ea 100644 --- a/arch/sh/include/asm/memblock.h +++ b/arch/sh/include/asm/memblock.h @@ -1,6 +1,4 @@ #ifndef __ASM_SH_MEMBLOCK_H #define __ASM_SH_MEMBLOCK_H -#define MEMBLOCK_REAL_LIMIT 0 - #endif /* __ASM_SH_MEMBLOCK_H */ diff --git a/arch/sparc/include/asm/memblock.h b/arch/sparc/include/asm/memblock.h index f12af880649b..c67b047ef85e 100644 --- a/arch/sparc/include/asm/memblock.h +++ b/arch/sparc/include/asm/memblock.h @@ -5,6 +5,4 @@ #define MEMBLOCK_DBG(fmt...) prom_printf(fmt) -#define MEMBLOCK_REAL_LIMIT 0 - #endif /* !(_SPARC64_MEMBLOCK_H) */ diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 3cf3304e901d..c4f6e53264ed 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -34,6 +34,7 @@ struct memblock_type { struct memblock { unsigned long debug; u64 rmo_size; + u64 current_limit; struct memblock_type memory; struct memblock_type reserved; }; @@ -46,11 +47,16 @@ extern long memblock_add(u64 base, u64 size); extern long memblock_remove(u64 base, u64 size); extern long __init memblock_free(u64 base, u64 size); extern long __init memblock_reserve(u64 base, u64 size); + extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid); extern u64 __init memblock_alloc(u64 size, u64 align); + +/* Flags for memblock_alloc_base() amd __memblock_alloc_base() */ +#define MEMBLOCK_ALLOC_ANYWHERE (~(u64)0) +#define MEMBLOCK_ALLOC_ACCESSIBLE 0 + extern u64 __init memblock_alloc_base(u64 size, u64, u64 max_addr); -#define MEMBLOCK_ALLOC_ANYWHERE 0 extern u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr); extern u64 __init memblock_phys_mem_size(void); @@ -66,6 +72,14 @@ extern void memblock_dump_all(void); /* Provided by the architecture */ extern u64 memblock_nid_range(u64 start, u64 end, int *nid); +/** + * memblock_set_current_limit - Set the current allocation limit to allow + * limiting allocations to what is currently + * accessible during boot + * @limit: New limit value (physical address) + */ +extern void memblock_set_current_limit(u64 limit); + /* * pfn conversion functions diff --git a/mm/memblock.c b/mm/memblock.c index 0131684c42f8..770c5bfac2cd 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -115,6 +115,8 @@ void __init memblock_init(void) memblock.reserved.regions[0].base = 0; memblock.reserved.regions[0].size = 0; memblock.reserved.cnt = 1; + + memblock.current_limit = MEMBLOCK_ALLOC_ANYWHERE; } void __init memblock_analyze(void) @@ -373,7 +375,7 @@ u64 __init memblock_alloc_nid(u64 size, u64 align, int nid) u64 __init memblock_alloc(u64 size, u64 align) { - return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE); + return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); } u64 __init memblock_alloc_base(u64 size, u64 align, u64 max_addr) @@ -399,14 +401,9 @@ u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr) size = memblock_align_up(size, align); - /* On some platforms, make sure we allocate lowmem */ - /* Note that MEMBLOCK_REAL_LIMIT may be MEMBLOCK_ALLOC_ANYWHERE */ - if (max_addr == MEMBLOCK_ALLOC_ANYWHERE) - max_addr = MEMBLOCK_REAL_LIMIT; - /* Pump up max_addr */ - if (max_addr == MEMBLOCK_ALLOC_ANYWHERE) - max_addr = ~(u64)0; + if (max_addr == MEMBLOCK_ALLOC_ACCESSIBLE) + max_addr = memblock.current_limit; /* We do a top-down search, this tends to limit memory * fragmentation by keeping early boot allocs near the @@ -527,3 +524,9 @@ int memblock_is_region_reserved(u64 base, u64 size) return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; } + +void __init memblock_set_current_limit(u64 limit) +{ + memblock.current_limit = limit; +} + -- cgit v1.2.3 From cd3db0c4ca3d237e7ad20f7107216e575705d2b0 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:02 -0700 Subject: memblock: Remove rmo_size, burry it in arch/powerpc where it belongs The RMA (RMO is a misnomer) is a concept specific to ppc64 (in fact server ppc64 though I hijack it on embedded ppc64 for similar purposes) and represents the area of memory that can be accessed in real mode (aka with MMU off), or on embedded, from the exception vectors (which is bolted in the TLB) which pretty much boils down to the same thing. We take that out of the generic MEMBLOCK data structure and move it into arch/powerpc where it belongs, renaming it to "RMA" while at it. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/mmu.h | 12 ++++++++++++ arch/powerpc/kernel/head_40x.S | 6 +----- arch/powerpc/kernel/paca.c | 2 +- arch/powerpc/kernel/prom.c | 29 ++++++++--------------------- arch/powerpc/kernel/rtas.c | 2 +- arch/powerpc/kernel/setup_64.c | 2 +- arch/powerpc/mm/40x_mmu.c | 14 +++++++++++++- arch/powerpc/mm/44x_mmu.c | 14 ++++++++++++++ arch/powerpc/mm/fsl_booke_mmu.c | 9 +++++++++ arch/powerpc/mm/hash_utils_64.c | 22 +++++++++++++++++++++- arch/powerpc/mm/init_32.c | 14 ++++++++++++++ arch/powerpc/mm/init_64.c | 1 + arch/powerpc/mm/ppc_mmu_32.c | 15 +++++++++++++++ arch/powerpc/mm/tlb_nohash.c | 14 ++++++++++++++ include/linux/memblock.h | 1 - mm/memblock.c | 8 -------- 16 files changed, 125 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 7ebf42ed84a2..bb40a06d3b77 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -2,6 +2,8 @@ #define _ASM_POWERPC_MMU_H_ #ifdef __KERNEL__ +#include + #include #include @@ -82,6 +84,16 @@ extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup; extern void early_init_mmu(void); extern void early_init_mmu_secondary(void); +extern void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size); + +#ifdef CONFIG_PPC64 +/* This is our real memory area size on ppc64 server, on embedded, we + * make it match the size our of bolted TLB area + */ +extern u64 ppc64_rma_size; +#endif /* CONFIG_PPC64 */ + #endif /* !__ASSEMBLY__ */ /* The kernel use the constants below to index in the page sizes array. diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index a90625f9b485..8278e8bad5a0 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -923,11 +923,7 @@ initial_mmu: mtspr SPRN_PID,r0 sync - /* Configure and load two entries into TLB slots 62 and 63. - * In case we are pinning TLBs, these are reserved in by the - * other TLB functions. If not reserving, then it doesn't - * matter where they are loaded. - */ + /* Configure and load one entry into TLB slots 63 */ clrrwi r4,r4,10 /* Mask off the real page number */ ori r4,r4,(TLB_WR | TLB_EX) /* Set the write and execute bits */ diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 139a773853f4..b9ffd7deeed7 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -117,7 +117,7 @@ void __init allocate_pacas(void) * the first segment. On iSeries they must be within the area mapped * by the HV, which is HvPagesToMap * HVPAGESIZE bytes. */ - limit = min(0x10000000ULL, memblock.rmo_size); + limit = min(0x10000000ULL, ppc64_rma_size); if (firmware_has_feature(FW_FEATURE_ISERIES)) limit = min(limit, HvPagesToMap * HVPAGESIZE); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 3aec0b980f6a..c3c6a8857544 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -66,6 +66,7 @@ int __initdata iommu_is_off; int __initdata iommu_force_on; unsigned long tce_alloc_start, tce_alloc_end; +u64 ppc64_rma_size; #endif static int __init early_parse_mem(char *p) @@ -492,7 +493,7 @@ static int __init early_init_dt_scan_memory_ppc(unsigned long node, void __init early_init_dt_add_memory_arch(u64 base, u64 size) { -#if defined(CONFIG_PPC64) +#ifdef CONFIG_PPC64 if (iommu_is_off) { if (base >= 0x80000000ul) return; @@ -501,9 +502,13 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) } #endif - memblock_add(base, size); - + /* First MEMBLOCK added, do some special initializations */ + if (memstart_addr == ~(phys_addr_t)0) + setup_initial_memory_limit(base, size); memstart_addr = min((u64)memstart_addr, base); + + /* Add the chunk to the MEMBLOCK list */ + memblock_add(base, size); } u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align) @@ -655,22 +660,6 @@ static void __init phyp_dump_reserve_mem(void) static inline void __init phyp_dump_reserve_mem(void) {} #endif /* CONFIG_PHYP_DUMP && CONFIG_PPC_RTAS */ -static void set_boot_memory_limit(void) -{ -#ifdef CONFIG_PPC32 - /* 601 can only access 16MB at the moment */ - if (PVR_VER(mfspr(SPRN_PVR)) == 1) - memblock_set_current_limit(0x01000000); - /* 8xx can only access 8MB at the moment */ - else if (PVR_VER(mfspr(SPRN_PVR)) == 0x50) - memblock_set_current_limit(0x00800000); - else - memblock_set_current_limit(0x10000000); -#else - memblock_set_current_limit(memblock.rmo_size); -#endif -} - void __init early_init_devtree(void *params) { phys_addr_t limit; @@ -734,8 +723,6 @@ void __init early_init_devtree(void *params) DBG("Phys. mem: %llx\n", memblock_phys_mem_size()); - set_boot_memory_limit(); - /* We may need to relocate the flat tree, do it now. * FIXME .. and the initrd too? */ move_device_tree(); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index d0516dbee762..1662777be5dd 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -934,7 +934,7 @@ void __init rtas_initialize(void) */ #ifdef CONFIG_PPC64 if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR)) { - rtas_region = min(memblock.rmo_size, RTAS_INSTANTIATE_MAX); + rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX); ibm_suspend_me_token = rtas_token("ibm,suspend-me"); } #endif diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index d135f93cb0f6..4360944b60f0 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -487,7 +487,7 @@ static void __init emergency_stack_init(void) * bringup, we need to get at them in real mode. This means they * must also be within the RMO region. */ - limit = min(slb0_limit(), memblock.rmo_size); + limit = min(slb0_limit(), ppc64_rma_size); for_each_possible_cpu(i) { unsigned long sp; diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c index 58969b51f454..5810967511d4 100644 --- a/arch/powerpc/mm/40x_mmu.c +++ b/arch/powerpc/mm/40x_mmu.c @@ -141,7 +141,19 @@ unsigned long __init mmu_mapin_ram(unsigned long top) * coverage with normal-sized pages (or other reasons) do not * attempt to allocate outside the allowed range. */ - memblock_set_current_limit(memstart_addr + mapped); + memblock_set_current_limit(mapped); return mapped; } + +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + /* We don't currently support the first MEMBLOCK not mapping 0 + * physical on those processors + */ + BUG_ON(first_memblock_base != 0); + + /* 40x can only access 16MB at the moment (see head_40x.S) */ + memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000)); +} diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c index d8c6efb32bc6..024acab588fd 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/44x_mmu.c @@ -24,6 +24,8 @@ */ #include +#include + #include #include #include @@ -213,6 +215,18 @@ unsigned long __init mmu_mapin_ram(unsigned long top) return total_lowmem; } +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + /* We don't currently support the first MEMBLOCK not mapping 0 + * physical on those processors + */ + BUG_ON(first_memblock_base != 0); + + /* 44x has a 256M TLB entry pinned at boot */ + memblock_set_current_limit(min_t(u64, first_memblock_size, PPC_PIN_SIZE)); +} + #ifdef CONFIG_SMP void __cpuinit mmu_init_secondary(int cpu) { diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index e525f862d759..0be8fe24c54e 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -215,3 +215,12 @@ void __init adjust_total_lowmem(void) memblock_set_current_limit(memstart_addr + __max_low_memory); } + +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + phys_addr_t limit = first_memblock_base + first_memblock_size; + + /* 64M mapped initially according to head_fsl_booke.S */ + memblock_set_current_limit(min_t(u64, limit, 0x04000000)); +} diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index b05890e23813..83f534d862db 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -649,7 +649,7 @@ static void __init htab_initialize(void) #ifdef CONFIG_DEBUG_PAGEALLOC linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; linear_map_hash_slots = __va(memblock_alloc_base(linear_map_hash_count, - 1, memblock.rmo_size)); + 1, ppc64_rma_size)); memset(linear_map_hash_slots, 0, linear_map_hash_count); #endif /* CONFIG_DEBUG_PAGEALLOC */ @@ -1248,3 +1248,23 @@ void kernel_map_pages(struct page *page, int numpages, int enable) local_irq_restore(flags); } #endif /* CONFIG_DEBUG_PAGEALLOC */ + +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + /* We don't currently support the first MEMBLOCK not mapping 0 + * physical on those processors + */ + BUG_ON(first_memblock_base != 0); + + /* On LPAR systems, the first entry is our RMA region, + * non-LPAR 64-bit hash MMU systems don't have a limitation + * on real mode access, but using the first entry works well + * enough. We also clamp it to 1G to avoid some funky things + * such as RTAS bugs etc... + */ + ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); + + /* Finally limit subsequent allocations */ + memblock_set_current_limit(ppc64_rma_size); +} diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 59b208b7ec6f..742da43b4ab6 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -237,3 +237,17 @@ void free_initrd_mem(unsigned long start, unsigned long end) } #endif + +#ifdef CONFIG_8xx /* No 8xx specific .c file to put that in ... */ +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + /* We don't currently support the first MEMBLOCK not mapping 0 + * physical on those processors + */ + BUG_ON(first_memblock_base != 0); + + /* 8xx can only access 8MB at the moment */ + memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000)); +} +#endif /* CONFIG_8xx */ diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 71f1415e2472..9e081ffbf0f2 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -328,3 +328,4 @@ int __meminit vmemmap_populate(struct page *start_page, return 0; } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ + diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 7d34e170e80f..11571e118831 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -271,3 +271,18 @@ void __init MMU_init_hw(void) if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205); } + +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + /* We don't currently support the first MEMBLOCK not mapping 0 + * physical on those processors + */ + BUG_ON(first_memblock_base != 0); + + /* 601 can only access 16MB at the moment */ + if (PVR_VER(mfspr(SPRN_PVR)) == 1) + memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01000000)); + else /* Anything else has 256M mapped */ + memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000)); +} diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 7ba32e762990..a086ed562606 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -446,4 +446,18 @@ void __cpuinit early_init_mmu_secondary(void) __early_init_mmu(0); } +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + /* On Embedded 64-bit, we adjust the RMA size to match + * the bolted TLB entry. We know for now that only 1G + * entries are supported though that may eventually + * change. We crop it to the size of the first MEMBLOCK to + * avoid going over total available memory just in case... + */ + ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); + + /* Finally limit subsequent allocations */ + memblock_set_current_limit(ppc64_memblock_base + ppc64_rma_size); +} #endif /* CONFIG_PPC64 */ diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c4f6e53264ed..71b8edc6ede8 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -33,7 +33,6 @@ struct memblock_type { struct memblock { unsigned long debug; - u64 rmo_size; u64 current_limit; struct memblock_type memory; struct memblock_type reserved; diff --git a/mm/memblock.c b/mm/memblock.c index 770c5bfac2cd..73d903ebf3d4 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -49,7 +49,6 @@ void memblock_dump_all(void) return; pr_info("MEMBLOCK configuration:\n"); - pr_info(" rmo_size = 0x%llx\n", (unsigned long long)memblock.rmo_size); pr_info(" memory.size = 0x%llx\n", (unsigned long long)memblock.memory.size); memblock_dump(&memblock.memory, "memory"); @@ -195,10 +194,6 @@ static long memblock_add_region(struct memblock_type *type, u64 base, u64 size) long memblock_add(u64 base, u64 size) { - /* On pSeries LPAR systems, the first MEMBLOCK is our RMO region. */ - if (base == 0) - memblock.rmo_size = size; - return memblock_add_region(&memblock.memory, base, size); } @@ -459,9 +454,6 @@ void __init memblock_enforce_memory_limit(u64 memory_limit) break; } - if (memblock.memory.regions[0].size < memblock.rmo_size) - memblock.rmo_size = memblock.memory.regions[0].size; - memory_limit = memblock_end_of_DRAM(); /* And truncate any reserves above the limit also. */ -- cgit v1.2.3 From 2898cc4cdf208f15246b7a1c6951d2b126a70fd6 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 4 Aug 2010 13:34:42 +1000 Subject: memblock: Change u64 to phys_addr_t Let's not waste space and cycles on archs that don't support >32-bit physical address space. Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 48 +++++++++---------- mm/memblock.c | 118 ++++++++++++++++++++++++----------------------- 2 files changed, 84 insertions(+), 82 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 71b8edc6ede8..b65045a4ed08 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -21,19 +21,19 @@ #define MAX_MEMBLOCK_REGIONS 128 struct memblock_region { - u64 base; - u64 size; + phys_addr_t base; + phys_addr_t size; }; struct memblock_type { unsigned long cnt; - u64 size; + phys_addr_t size; struct memblock_region regions[MAX_MEMBLOCK_REGIONS+1]; }; struct memblock { unsigned long debug; - u64 current_limit; + phys_addr_t current_limit; struct memblock_type memory; struct memblock_type reserved; }; @@ -42,34 +42,34 @@ extern struct memblock memblock; extern void __init memblock_init(void); extern void __init memblock_analyze(void); -extern long memblock_add(u64 base, u64 size); -extern long memblock_remove(u64 base, u64 size); -extern long __init memblock_free(u64 base, u64 size); -extern long __init memblock_reserve(u64 base, u64 size); +extern long memblock_add(phys_addr_t base, phys_addr_t size); +extern long memblock_remove(phys_addr_t base, phys_addr_t size); +extern long __init memblock_free(phys_addr_t base, phys_addr_t size); +extern long __init memblock_reserve(phys_addr_t base, phys_addr_t size); -extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid); -extern u64 __init memblock_alloc(u64 size, u64 align); +extern phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); +extern phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align); /* Flags for memblock_alloc_base() amd __memblock_alloc_base() */ -#define MEMBLOCK_ALLOC_ANYWHERE (~(u64)0) +#define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) #define MEMBLOCK_ALLOC_ACCESSIBLE 0 -extern u64 __init memblock_alloc_base(u64 size, - u64, u64 max_addr); -extern u64 __init __memblock_alloc_base(u64 size, - u64 align, u64 max_addr); -extern u64 __init memblock_phys_mem_size(void); -extern u64 memblock_end_of_DRAM(void); -extern void __init memblock_enforce_memory_limit(u64 memory_limit); -extern int memblock_is_memory(u64 addr); -extern int memblock_is_region_memory(u64 base, u64 size); -extern int __init memblock_is_reserved(u64 addr); -extern int memblock_is_region_reserved(u64 base, u64 size); +extern phys_addr_t __init memblock_alloc_base(phys_addr_t size, + phys_addr_t, phys_addr_t max_addr); +extern phys_addr_t __init __memblock_alloc_base(phys_addr_t size, + phys_addr_t align, phys_addr_t max_addr); +extern phys_addr_t __init memblock_phys_mem_size(void); +extern phys_addr_t memblock_end_of_DRAM(void); +extern void __init memblock_enforce_memory_limit(phys_addr_t memory_limit); +extern int memblock_is_memory(phys_addr_t addr); +extern int memblock_is_region_memory(phys_addr_t base, phys_addr_t size); +extern int __init memblock_is_reserved(phys_addr_t addr); +extern int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); extern void memblock_dump_all(void); /* Provided by the architecture */ -extern u64 memblock_nid_range(u64 start, u64 end, int *nid); +extern phys_addr_t memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid); /** * memblock_set_current_limit - Set the current allocation limit to allow @@ -77,7 +77,7 @@ extern u64 memblock_nid_range(u64 start, u64 end, int *nid); * accessible during boot * @limit: New limit value (physical address) */ -extern void memblock_set_current_limit(u64 limit); +extern void memblock_set_current_limit(phys_addr_t limit); /* diff --git a/mm/memblock.c b/mm/memblock.c index 73d903ebf3d4..81da63592a68 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -55,13 +55,14 @@ void memblock_dump_all(void) memblock_dump(&memblock.reserved, "reserved"); } -static unsigned long memblock_addrs_overlap(u64 base1, u64 size1, u64 base2, - u64 size2) +static unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, + phys_addr_t base2, phys_addr_t size2) { return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); } -static long memblock_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2) +static long memblock_addrs_adjacent(phys_addr_t base1, phys_addr_t size1, + phys_addr_t base2, phys_addr_t size2) { if (base2 == base1 + size1) return 1; @@ -72,12 +73,12 @@ static long memblock_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2) } static long memblock_regions_adjacent(struct memblock_type *type, - unsigned long r1, unsigned long r2) + unsigned long r1, unsigned long r2) { - u64 base1 = type->regions[r1].base; - u64 size1 = type->regions[r1].size; - u64 base2 = type->regions[r2].base; - u64 size2 = type->regions[r2].size; + phys_addr_t base1 = type->regions[r1].base; + phys_addr_t size1 = type->regions[r1].size; + phys_addr_t base2 = type->regions[r2].base; + phys_addr_t size2 = type->regions[r2].size; return memblock_addrs_adjacent(base1, size1, base2, size2); } @@ -128,7 +129,7 @@ void __init memblock_analyze(void) memblock.memory.size += memblock.memory.regions[i].size; } -static long memblock_add_region(struct memblock_type *type, u64 base, u64 size) +static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) { unsigned long coalesced = 0; long adjacent, i; @@ -141,8 +142,8 @@ static long memblock_add_region(struct memblock_type *type, u64 base, u64 size) /* First try and coalesce this MEMBLOCK with another. */ for (i = 0; i < type->cnt; i++) { - u64 rgnbase = type->regions[i].base; - u64 rgnsize = type->regions[i].size; + phys_addr_t rgnbase = type->regions[i].base; + phys_addr_t rgnsize = type->regions[i].size; if ((rgnbase == base) && (rgnsize == size)) /* Already have this region, so we're done */ @@ -192,16 +193,16 @@ static long memblock_add_region(struct memblock_type *type, u64 base, u64 size) return 0; } -long memblock_add(u64 base, u64 size) +long memblock_add(phys_addr_t base, phys_addr_t size) { return memblock_add_region(&memblock.memory, base, size); } -static long __memblock_remove(struct memblock_type *type, u64 base, u64 size) +static long __memblock_remove(struct memblock_type *type, phys_addr_t base, phys_addr_t size) { - u64 rgnbegin, rgnend; - u64 end = base + size; + phys_addr_t rgnbegin, rgnend; + phys_addr_t end = base + size; int i; rgnbegin = rgnend = 0; /* supress gcc warnings */ @@ -246,17 +247,17 @@ static long __memblock_remove(struct memblock_type *type, u64 base, u64 size) return memblock_add_region(type, end, rgnend - end); } -long memblock_remove(u64 base, u64 size) +long memblock_remove(phys_addr_t base, phys_addr_t size) { return __memblock_remove(&memblock.memory, base, size); } -long __init memblock_free(u64 base, u64 size) +long __init memblock_free(phys_addr_t base, phys_addr_t size) { return __memblock_remove(&memblock.reserved, base, size); } -long __init memblock_reserve(u64 base, u64 size) +long __init memblock_reserve(phys_addr_t base, phys_addr_t size) { struct memblock_type *_rgn = &memblock.reserved; @@ -265,13 +266,13 @@ long __init memblock_reserve(u64 base, u64 size) return memblock_add_region(_rgn, base, size); } -long memblock_overlaps_region(struct memblock_type *type, u64 base, u64 size) +long memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) { unsigned long i; for (i = 0; i < type->cnt; i++) { - u64 rgnbase = type->regions[i].base; - u64 rgnsize = type->regions[i].size; + phys_addr_t rgnbase = type->regions[i].base; + phys_addr_t rgnsize = type->regions[i].size; if (memblock_addrs_overlap(base, size, rgnbase, rgnsize)) break; } @@ -279,20 +280,20 @@ long memblock_overlaps_region(struct memblock_type *type, u64 base, u64 size) return (i < type->cnt) ? i : -1; } -static u64 memblock_align_down(u64 addr, u64 size) +static phys_addr_t memblock_align_down(phys_addr_t addr, phys_addr_t size) { return addr & ~(size - 1); } -static u64 memblock_align_up(u64 addr, u64 size) +static phys_addr_t memblock_align_up(phys_addr_t addr, phys_addr_t size) { return (addr + (size - 1)) & ~(size - 1); } -static u64 __init memblock_alloc_region(u64 start, u64 end, - u64 size, u64 align) +static phys_addr_t __init memblock_alloc_region(phys_addr_t start, phys_addr_t end, + phys_addr_t size, phys_addr_t align) { - u64 base, res_base; + phys_addr_t base, res_base; long j; base = memblock_align_down((end - size), align); @@ -301,7 +302,7 @@ static u64 __init memblock_alloc_region(u64 start, u64 end, if (j < 0) { /* this area isn't reserved, take it */ if (memblock_add_region(&memblock.reserved, base, size) < 0) - base = ~(u64)0; + base = ~(phys_addr_t)0; return base; } res_base = memblock.reserved.regions[j].base; @@ -310,42 +311,43 @@ static u64 __init memblock_alloc_region(u64 start, u64 end, base = memblock_align_down(res_base - size, align); } - return ~(u64)0; + return ~(phys_addr_t)0; } -u64 __weak __init memblock_nid_range(u64 start, u64 end, int *nid) +phys_addr_t __weak __init memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid) { *nid = 0; return end; } -static u64 __init memblock_alloc_nid_region(struct memblock_region *mp, - u64 size, u64 align, int nid) +static phys_addr_t __init memblock_alloc_nid_region(struct memblock_region *mp, + phys_addr_t size, + phys_addr_t align, int nid) { - u64 start, end; + phys_addr_t start, end; start = mp->base; end = start + mp->size; start = memblock_align_up(start, align); while (start < end) { - u64 this_end; + phys_addr_t this_end; int this_nid; this_end = memblock_nid_range(start, end, &this_nid); if (this_nid == nid) { - u64 ret = memblock_alloc_region(start, this_end, size, align); - if (ret != ~(u64)0) + phys_addr_t ret = memblock_alloc_region(start, this_end, size, align); + if (ret != ~(phys_addr_t)0) return ret; } start = this_end; } - return ~(u64)0; + return ~(phys_addr_t)0; } -u64 __init memblock_alloc_nid(u64 size, u64 align, int nid) +phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid) { struct memblock_type *mem = &memblock.memory; int i; @@ -359,23 +361,23 @@ u64 __init memblock_alloc_nid(u64 size, u64 align, int nid) size = memblock_align_up(size, align); for (i = 0; i < mem->cnt; i++) { - u64 ret = memblock_alloc_nid_region(&mem->regions[i], + phys_addr_t ret = memblock_alloc_nid_region(&mem->regions[i], size, align, nid); - if (ret != ~(u64)0) + if (ret != ~(phys_addr_t)0) return ret; } return memblock_alloc(size, align); } -u64 __init memblock_alloc(u64 size, u64 align) +phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align) { return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); } -u64 __init memblock_alloc_base(u64 size, u64 align, u64 max_addr) +phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) { - u64 alloc; + phys_addr_t alloc; alloc = __memblock_alloc_base(size, align, max_addr); @@ -386,11 +388,11 @@ u64 __init memblock_alloc_base(u64 size, u64 align, u64 max_addr) return alloc; } -u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr) +phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) { long i; - u64 base = 0; - u64 res_base; + phys_addr_t base = 0; + phys_addr_t res_base; BUG_ON(0 == size); @@ -405,26 +407,26 @@ u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr) * top of memory */ for (i = memblock.memory.cnt - 1; i >= 0; i--) { - u64 memblockbase = memblock.memory.regions[i].base; - u64 memblocksize = memblock.memory.regions[i].size; + phys_addr_t memblockbase = memblock.memory.regions[i].base; + phys_addr_t memblocksize = memblock.memory.regions[i].size; if (memblocksize < size) continue; base = min(memblockbase + memblocksize, max_addr); res_base = memblock_alloc_region(memblockbase, base, size, align); - if (res_base != ~(u64)0) + if (res_base != ~(phys_addr_t)0) return res_base; } return 0; } /* You must call memblock_analyze() before this. */ -u64 __init memblock_phys_mem_size(void) +phys_addr_t __init memblock_phys_mem_size(void) { return memblock.memory.size; } -u64 memblock_end_of_DRAM(void) +phys_addr_t memblock_end_of_DRAM(void) { int idx = memblock.memory.cnt - 1; @@ -432,10 +434,10 @@ u64 memblock_end_of_DRAM(void) } /* You must call memblock_analyze() after this. */ -void __init memblock_enforce_memory_limit(u64 memory_limit) +void __init memblock_enforce_memory_limit(phys_addr_t memory_limit) { unsigned long i; - u64 limit; + phys_addr_t limit; struct memblock_region *p; if (!memory_limit) @@ -472,7 +474,7 @@ void __init memblock_enforce_memory_limit(u64 memory_limit) } } -static int memblock_search(struct memblock_type *type, u64 addr) +static int memblock_search(struct memblock_type *type, phys_addr_t addr) { unsigned int left = 0, right = type->cnt; @@ -490,17 +492,17 @@ static int memblock_search(struct memblock_type *type, u64 addr) return -1; } -int __init memblock_is_reserved(u64 addr) +int __init memblock_is_reserved(phys_addr_t addr) { return memblock_search(&memblock.reserved, addr) != -1; } -int memblock_is_memory(u64 addr) +int memblock_is_memory(phys_addr_t addr) { return memblock_search(&memblock.memory, addr) != -1; } -int memblock_is_region_memory(u64 base, u64 size) +int memblock_is_region_memory(phys_addr_t base, phys_addr_t size) { int idx = memblock_search(&memblock.reserved, base); @@ -511,13 +513,13 @@ int memblock_is_region_memory(u64 base, u64 size) memblock.reserved.regions[idx].size) >= (base + size); } -int memblock_is_region_reserved(u64 base, u64 size) +int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) { return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; } -void __init memblock_set_current_limit(u64 limit) +void __init memblock_set_current_limit(phys_addr_t limit) { memblock.current_limit = limit; } -- cgit v1.2.3 From 9d3c30f5a17ec35894eadb7171f724643dce19c3 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:04 -0700 Subject: memblock: Remove unused memblock.debug struct member Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index b65045a4ed08..0fe6dd56a4b4 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -32,7 +32,6 @@ struct memblock_type { }; struct memblock { - unsigned long debug; phys_addr_t current_limit; struct memblock_type memory; struct memblock_type reserved; @@ -55,9 +54,11 @@ extern phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align); #define MEMBLOCK_ALLOC_ACCESSIBLE 0 extern phys_addr_t __init memblock_alloc_base(phys_addr_t size, - phys_addr_t, phys_addr_t max_addr); + phys_addr_t align, + phys_addr_t max_addr); extern phys_addr_t __init __memblock_alloc_base(phys_addr_t size, - phys_addr_t align, phys_addr_t max_addr); + phys_addr_t align, + phys_addr_t max_addr); extern phys_addr_t __init memblock_phys_mem_size(void); extern phys_addr_t memblock_end_of_DRAM(void); extern void __init memblock_enforce_memory_limit(phys_addr_t memory_limit); -- cgit v1.2.3 From 4734b594c6ca1be796d30c82d93fdf5160f45124 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 28 Jul 2010 14:31:29 +1000 Subject: memblock: Remove memblock_type.size and add memblock.memory_size instead Right now, both the "memory" and "reserved" memblock_type structures have a "size" member. It represents the calculated memory size in the former case and is unused in the latter. This moves it out to the main memblock structure instead Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/mem.c | 2 +- include/linux/memblock.h | 2 +- mm/memblock.c | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 52df5428ece4..f661f6c527da 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -301,7 +301,7 @@ void __init mem_init(void) swiotlb_init(1); #endif - num_physpages = memblock.memory.size >> PAGE_SHIFT; + num_physpages = memblock_phys_mem_size() >> PAGE_SHIFT; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); #ifdef CONFIG_NEED_MULTIPLE_NODES diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 0fe6dd56a4b4..c9c7b0f344a5 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -27,12 +27,12 @@ struct memblock_region { struct memblock_type { unsigned long cnt; - phys_addr_t size; struct memblock_region regions[MAX_MEMBLOCK_REGIONS+1]; }; struct memblock { phys_addr_t current_limit; + phys_addr_t memory_size; /* Updated by memblock_analyze() */ struct memblock_type memory; struct memblock_type reserved; }; diff --git a/mm/memblock.c b/mm/memblock.c index 81da63592a68..5ae413e9afd8 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -49,7 +49,7 @@ void memblock_dump_all(void) return; pr_info("MEMBLOCK configuration:\n"); - pr_info(" memory.size = 0x%llx\n", (unsigned long long)memblock.memory.size); + pr_info(" memory size = 0x%llx\n", (unsigned long long)memblock.memory_size); memblock_dump(&memblock.memory, "memory"); memblock_dump(&memblock.reserved, "reserved"); @@ -123,10 +123,10 @@ void __init memblock_analyze(void) { int i; - memblock.memory.size = 0; + memblock.memory_size = 0; for (i = 0; i < memblock.memory.cnt; i++) - memblock.memory.size += memblock.memory.regions[i].size; + memblock.memory_size += memblock.memory.regions[i].size; } static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) @@ -423,7 +423,7 @@ phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, ph /* You must call memblock_analyze() before this. */ phys_addr_t __init memblock_phys_mem_size(void) { - return memblock.memory.size; + return memblock.memory_size; } phys_addr_t memblock_end_of_DRAM(void) -- cgit v1.2.3 From bf23c51f1f49d3960f3cd8e3d2e7f943d9c41042 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:06 -0700 Subject: memblock: Move memblock arrays to static storage in memblock.c and make their size a variable This is in preparation for having resizable arrays. Note that we still allocate one more than needed, this is unchanged from the previous implementation. Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 7 ++++--- mm/memblock.c | 10 +++++++++- 2 files changed, 13 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c9c7b0f344a5..150be938b910 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -18,7 +18,7 @@ #include -#define MAX_MEMBLOCK_REGIONS 128 +#define INIT_MEMBLOCK_REGIONS 128 struct memblock_region { phys_addr_t base; @@ -26,8 +26,9 @@ struct memblock_region { }; struct memblock_type { - unsigned long cnt; - struct memblock_region regions[MAX_MEMBLOCK_REGIONS+1]; + unsigned long cnt; /* number of regions */ + unsigned long max; /* size of the allocated array */ + struct memblock_region *regions; }; struct memblock { diff --git a/mm/memblock.c b/mm/memblock.c index 5ae413e9afd8..3c474502d92b 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -18,6 +18,8 @@ struct memblock memblock; static int memblock_debug; +static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1]; +static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1]; static int __init early_memblock(char *p) { @@ -104,6 +106,12 @@ static void memblock_coalesce_regions(struct memblock_type *type, void __init memblock_init(void) { + /* Hookup the initial arrays */ + memblock.memory.regions = memblock_memory_init_regions; + memblock.memory.max = INIT_MEMBLOCK_REGIONS; + memblock.reserved.regions = memblock_reserved_init_regions; + memblock.reserved.max = INIT_MEMBLOCK_REGIONS; + /* Create a dummy zero size MEMBLOCK which will get coalesced away later. * This simplifies the memblock_add() code below... */ @@ -169,7 +177,7 @@ static long memblock_add_region(struct memblock_type *type, phys_addr_t base, ph if (coalesced) return coalesced; - if (type->cnt >= MAX_MEMBLOCK_REGIONS) + if (type->cnt >= type->max) return -1; /* Couldn't coalesce the MEMBLOCK, so add it to the sorted table. */ -- cgit v1.2.3 From d2cd563ba82c424083b78e0ce97d68bfb04d1242 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:14 -0700 Subject: memblock: Add arch function to control coalescing of memblock memory regions Some archs such as ARM want to avoid coalescing accross things such as the lowmem/highmem boundary or similar. This provides the option to control it via an arch callback for which a weak default is provided which always allows coalescing. Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 2 ++ mm/memblock.c | 19 ++++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 150be938b910..e5e8f9db3a84 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -72,6 +72,8 @@ extern void memblock_dump_all(void); /* Provided by the architecture */ extern phys_addr_t memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid); +extern int memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1, + phys_addr_t addr2, phys_addr_t size2); /** * memblock_set_current_limit - Set the current allocation limit to allow diff --git a/mm/memblock.c b/mm/memblock.c index 0787790b1ce0..8715f09434df 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -241,6 +241,12 @@ static int memblock_double_array(struct memblock_type *type) return 0; } +extern int __weak memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1, + phys_addr_t addr2, phys_addr_t size2) +{ + return 1; +} + static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) { unsigned long coalesced = 0; @@ -262,6 +268,10 @@ static long memblock_add_region(struct memblock_type *type, phys_addr_t base, ph return 0; adjacent = memblock_addrs_adjacent(base, size, rgnbase, rgnsize); + /* Check if arch allows coalescing */ + if (adjacent != 0 && type == &memblock.memory && + !memblock_memory_can_coalesce(base, size, rgnbase, rgnsize)) + break; if (adjacent > 0) { type->regions[i].base -= size; type->regions[i].size += size; @@ -274,7 +284,14 @@ static long memblock_add_region(struct memblock_type *type, phys_addr_t base, ph } } - if ((i < type->cnt - 1) && memblock_regions_adjacent(type, i, i+1)) { + /* If we plugged a hole, we may want to also coalesce with the + * next region + */ + if ((i < type->cnt - 1) && memblock_regions_adjacent(type, i, i+1) && + ((type != &memblock.memory || memblock_memory_can_coalesce(type->regions[i].base, + type->regions[i].size, + type->regions[i+1].base, + type->regions[i+1].size)))) { memblock_coalesce_regions(type, i, i+1); coalesced++; } -- cgit v1.2.3 From c196f76fd5ece716ee3b7fa5dda3576961c0cecc Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:16 -0700 Subject: memblock: NUMA allocate can now use early_pfn_map We now provide a default (weak) implementation of memblock_nid_range() which uses the early_pfn_map[] if CONFIG_ARCH_POPULATES_NODE_MAP is set. Sparc still needs to use its own method due to the way the pages can be scattered between nodes. This implementation is inefficient due to our main algorithm and callback construct wanting to work on an ascending addresses bases while early_pfn_map[] would rather work with nid's (it's unsorted at that stage). But it should work and we can look into improving it subsequently, possibly using arch compile options to chose a different algorithm alltogether. Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 3 +++ mm/memblock.c | 28 +++++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index e5e8f9db3a84..82b030244aa7 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -47,6 +47,9 @@ extern long memblock_remove(phys_addr_t base, phys_addr_t size); extern long __init memblock_free(phys_addr_t base, phys_addr_t size); extern long __init memblock_reserve(phys_addr_t base, phys_addr_t size); +/* The numa aware allocator is only available if + * CONFIG_ARCH_POPULATES_NODE_MAP is set + */ extern phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); extern phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align); diff --git a/mm/memblock.c b/mm/memblock.c index 468ff43a72b4..af7e4d9cf400 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -15,6 +15,7 @@ #include #include #include +#include #include struct memblock memblock; @@ -451,11 +452,36 @@ phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align) /* * Additional node-local allocators. Search for node memory is bottom up * and walks memblock regions within that node bottom-up as well, but allocation - * within an memblock region is top-down. + * within an memblock region is top-down. XXX I plan to fix that at some stage + * + * WARNING: Only available after early_node_map[] has been populated, + * on some architectures, that is after all the calls to add_active_range() + * have been done to populate it. */ phys_addr_t __weak __init memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid) { +#ifdef CONFIG_ARCH_POPULATES_NODE_MAP + /* + * This code originates from sparc which really wants use to walk by addresses + * and returns the nid. This is not very convenient for early_pfn_map[] users + * as the map isn't sorted yet, and it really wants to be walked by nid. + * + * For now, I implement the inefficient method below which walks the early + * map multiple times. Eventually we may want to use an ARCH config option + * to implement a completely different method for both case. + */ + unsigned long start_pfn, end_pfn; + int i; + + for (i = 0; i < MAX_NUMNODES; i++) { + get_pfn_range_for_nid(i, &start_pfn, &end_pfn); + if (start < PFN_PHYS(start_pfn) || start >= PFN_PHYS(end_pfn)) + continue; + *nid = i; + return min(end, PFN_PHYS(end_pfn)); + } +#endif *nid = 0; return end; -- cgit v1.2.3 From 9d1e24928e6a0728d1c7c76818ccbd11b93e7ac9 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:17 -0700 Subject: memblock: Separate memblock_alloc_nid() and memblock_alloc_try_nid() The former is now strict, it will fail if it cannot honor the allocation within the node, while the later implements the previous semantic which falls back to allocating anywhere. Signed-off-by: Benjamin Herrenschmidt --- arch/sparc/mm/init_64.c | 4 ++-- include/linux/memblock.h | 6 +++++- mm/memblock.c | 14 ++++++++++++++ 3 files changed, 21 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 0883113624b9..dc584d26d597 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -820,7 +820,7 @@ static void __init allocate_node_data(int nid) struct pglist_data *p; #ifdef CONFIG_NEED_MULTIPLE_NODES - paddr = memblock_alloc_nid(sizeof(struct pglist_data), SMP_CACHE_BYTES, nid); + paddr = memblock_alloc_try_nid(sizeof(struct pglist_data), SMP_CACHE_BYTES, nid); if (!paddr) { prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid); prom_halt(); @@ -840,7 +840,7 @@ static void __init allocate_node_data(int nid) if (p->node_spanned_pages) { num_pages = bootmem_bootmap_pages(p->node_spanned_pages); - paddr = memblock_alloc_nid(num_pages << PAGE_SHIFT, PAGE_SIZE, nid); + paddr = memblock_alloc_try_nid(num_pages << PAGE_SHIFT, PAGE_SIZE, nid); if (!paddr) { prom_printf("Cannot allocate bootmap for nid[%d]\n", nid); diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 82b030244aa7..c8da03eb7ba3 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -50,7 +50,11 @@ extern long __init memblock_reserve(phys_addr_t base, phys_addr_t size); /* The numa aware allocator is only available if * CONFIG_ARCH_POPULATES_NODE_MAP is set */ -extern phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); +extern phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, + int nid); +extern phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, + int nid); + extern phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align); /* Flags for memblock_alloc_base() amd __memblock_alloc_base() */ diff --git a/mm/memblock.c b/mm/memblock.c index af7e4d9cf400..1802d97c7284 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -537,9 +537,23 @@ phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int n return ret; } + return 0; +} + +phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) +{ + phys_addr_t res = memblock_alloc_nid(size, align, nid); + + if (res) + return res; return memblock_alloc(size, align); } + +/* + * Remaining API functions + */ + /* You must call memblock_analyze() before this. */ phys_addr_t __init memblock_phys_mem_size(void) { -- cgit v1.2.3 From 5e63cf43af844ed30acc278b38b8c9bc51eba493 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 28 Jul 2010 15:07:21 +1000 Subject: memblock: Expose some memblock bits for use by x86 This exposes memblock_debug and associated memblock_dbg() macro, along with memblock_can_resize so that x86 can use these when ported to use memblock Signed-off-by: Yinghai Lu Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 5 +++++ mm/memblock.c | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c8da03eb7ba3..eed0f9b8e526 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -39,6 +39,11 @@ struct memblock { }; extern struct memblock memblock; +extern int memblock_debug; +extern int memblock_can_resize; + +#define memblock_dbg(fmt, ...) \ + if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) extern void __init memblock_init(void); extern void __init memblock_analyze(void); diff --git a/mm/memblock.c b/mm/memblock.c index cc15be29fd0a..5499ab162b9d 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -22,7 +22,8 @@ struct memblock memblock; -static int memblock_debug, memblock_can_resize; +int memblock_debug; +int memblock_can_resize; static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1]; static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1]; -- cgit v1.2.3 From 37d8d4bf489e39eedc9537f8616fe87879b13cb0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 28 Jul 2010 15:20:58 +1000 Subject: memblock: Export MEMBLOCK_ERROR will used by x86 memblock_x86_find_in_range_node and nobootmem replacement Signed-off-by: Yinghai Lu Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 3 ++- mm/memblock.c | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index eed0f9b8e526..1a9c29cc92fa 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -18,7 +18,8 @@ #include -#define INIT_MEMBLOCK_REGIONS 128 +#define INIT_MEMBLOCK_REGIONS 128 +#define MEMBLOCK_ERROR (~(phys_addr_t)0) struct memblock_region { phys_addr_t base; diff --git a/mm/memblock.c b/mm/memblock.c index c3703abf057e..85cfa1d3ab28 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -27,8 +27,6 @@ int memblock_can_resize; static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1]; static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1]; -#define MEMBLOCK_ERROR (~(phys_addr_t)0) - /* inline so we don't get a warning when pr_debug is compiled out */ static inline const char *memblock_type_name(struct memblock_type *type) { -- cgit v1.2.3 From 25818f0f288cd5333ba5a90ad6dde3def4c4ff58 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 28 Jul 2010 15:25:10 +1000 Subject: memblock: Make MEMBLOCK_ERROR be 0 And ensure we don't hand out 0 as a valid allocation. We put the low limit at PAGE_SIZE arbitrarily. Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 2 +- mm/memblock.c | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 1a9c29cc92fa..dfa64494ced1 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -19,7 +19,7 @@ #include #define INIT_MEMBLOCK_REGIONS 128 -#define MEMBLOCK_ERROR (~(phys_addr_t)0) +#define MEMBLOCK_ERROR 0 struct memblock_region { phys_addr_t base; diff --git a/mm/memblock.c b/mm/memblock.c index 85cfa1d3ab28..cb520df2a414 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -105,6 +105,12 @@ static phys_addr_t __init memblock_find_region(phys_addr_t start, phys_addr_t en phys_addr_t base, res_base; long j; + /* Prevent allocations returning 0 as it's also used to + * indicate an allocation failure + */ + if (start == 0) + start = PAGE_SIZE; + base = memblock_align_down((end - size), align); while (start <= base) { j = memblock_overlaps_region(&memblock.reserved, base, size); -- cgit v1.2.3 From f0b37fad9a63217c39997b2d2b31f44e3d8be727 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 28 Jul 2010 15:28:21 +1000 Subject: memblock: Protect memblock.h with CONFIG_HAVE_MEMBLOCK This should make it easier to catch/debug incorrect use when the CONFIG_ option isn't set. Signed-off-by: Yinghai Lu Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index dfa64494ced1..c24b27849096 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -2,6 +2,7 @@ #define _LINUX_MEMBLOCK_H #ifdef __KERNEL__ +#ifdef CONFIG_HAVE_MEMBLOCK /* * Logical memory blocks. * @@ -148,6 +149,8 @@ static inline unsigned long memblock_region_pages(const struct memblock_region * region++) +#endif /* CONFIG_HAVE_MEMBLOCK */ + #endif /* __KERNEL__ */ #endif /* _LINUX_MEMBLOCK_H */ -- cgit v1.2.3 From 10d0643988e976360eb3497dcafb55b393b8e480 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 28 Jul 2010 15:43:02 +1000 Subject: memblock: Option for the architecture to put memblock into the .init section Arch code can define ARCH_DISCARD_MEMBLOCK in asm/memblock.h, which in turns causes memblock code and data to go respectively into the .init and .initdata sections. This will be used by the x86 architecture. If ARCH_DISCARD_MEMBLOCK is defined, the debugfs files to inspect the memblock arrays after boot are not created. Signed-off-by: Yinghai Lu Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 8 ++++++++ mm/memblock.c | 48 ++++++++++++++++++++++++------------------------ 2 files changed, 32 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c24b27849096..3978e6a8e824 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -149,6 +149,14 @@ static inline unsigned long memblock_region_pages(const struct memblock_region * region++) +#ifdef ARCH_DISCARD_MEMBLOCK +#define __init_memblock __init +#define __initdata_memblock __initdata +#else +#define __init_memblock +#define __initdata_memblock +#endif + #endif /* CONFIG_HAVE_MEMBLOCK */ #endif /* __KERNEL__ */ diff --git a/mm/memblock.c b/mm/memblock.c index cb520df2a414..a17faea37d47 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -20,12 +20,12 @@ #include #include -struct memblock memblock; +struct memblock memblock __initdata_memblock; -int memblock_debug; -int memblock_can_resize; -static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1]; -static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1]; +int memblock_debug __initdata_memblock; +int memblock_can_resize __initdata_memblock; +static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock; +static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock; /* inline so we don't get a warning when pr_debug is compiled out */ static inline const char *memblock_type_name(struct memblock_type *type) @@ -42,23 +42,23 @@ static inline const char *memblock_type_name(struct memblock_type *type) * Address comparison utilities */ -static phys_addr_t memblock_align_down(phys_addr_t addr, phys_addr_t size) +static phys_addr_t __init_memblock memblock_align_down(phys_addr_t addr, phys_addr_t size) { return addr & ~(size - 1); } -static phys_addr_t memblock_align_up(phys_addr_t addr, phys_addr_t size) +static phys_addr_t __init_memblock memblock_align_up(phys_addr_t addr, phys_addr_t size) { return (addr + (size - 1)) & ~(size - 1); } -static unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, +static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2, phys_addr_t size2) { return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); } -static long memblock_addrs_adjacent(phys_addr_t base1, phys_addr_t size1, +static long __init_memblock memblock_addrs_adjacent(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2, phys_addr_t size2) { if (base2 == base1 + size1) @@ -69,7 +69,7 @@ static long memblock_addrs_adjacent(phys_addr_t base1, phys_addr_t size1, return 0; } -static long memblock_regions_adjacent(struct memblock_type *type, +static long __init_memblock memblock_regions_adjacent(struct memblock_type *type, unsigned long r1, unsigned long r2) { phys_addr_t base1 = type->regions[r1].base; @@ -80,7 +80,7 @@ static long memblock_regions_adjacent(struct memblock_type *type, return memblock_addrs_adjacent(base1, size1, base2, size2); } -long memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) +long __init_memblock memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) { unsigned long i; @@ -162,7 +162,7 @@ static phys_addr_t __init memblock_find_base(phys_addr_t size, phys_addr_t align return MEMBLOCK_ERROR; } -static void memblock_remove_region(struct memblock_type *type, unsigned long r) +static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) { unsigned long i; @@ -174,7 +174,7 @@ static void memblock_remove_region(struct memblock_type *type, unsigned long r) } /* Assumption: base addr of region 1 < base addr of region 2 */ -static void memblock_coalesce_regions(struct memblock_type *type, +static void __init_memblock memblock_coalesce_regions(struct memblock_type *type, unsigned long r1, unsigned long r2) { type->regions[r1].size += type->regions[r2].size; @@ -184,7 +184,7 @@ static void memblock_coalesce_regions(struct memblock_type *type, /* Defined below but needed now */ static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size); -static int memblock_double_array(struct memblock_type *type) +static int __init_memblock memblock_double_array(struct memblock_type *type) { struct memblock_region *new_array, *old_array; phys_addr_t old_size, new_size, addr; @@ -255,13 +255,13 @@ static int memblock_double_array(struct memblock_type *type) return 0; } -extern int __weak memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1, +extern int __init_memblock __weak memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1, phys_addr_t addr2, phys_addr_t size2) { return 1; } -static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) +static long __init_memblock memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) { unsigned long coalesced = 0; long adjacent, i; @@ -348,13 +348,13 @@ static long memblock_add_region(struct memblock_type *type, phys_addr_t base, ph return 0; } -long memblock_add(phys_addr_t base, phys_addr_t size) +long __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) { return memblock_add_region(&memblock.memory, base, size); } -static long __memblock_remove(struct memblock_type *type, phys_addr_t base, phys_addr_t size) +static long __init_memblock __memblock_remove(struct memblock_type *type, phys_addr_t base, phys_addr_t size) { phys_addr_t rgnbegin, rgnend; phys_addr_t end = base + size; @@ -402,7 +402,7 @@ static long __memblock_remove(struct memblock_type *type, phys_addr_t base, phys return memblock_add_region(type, end, rgnend - end); } -long memblock_remove(phys_addr_t base, phys_addr_t size) +long __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) { return __memblock_remove(&memblock.memory, base, size); } @@ -568,7 +568,7 @@ phys_addr_t __init memblock_phys_mem_size(void) return memblock.memory_size; } -phys_addr_t memblock_end_of_DRAM(void) +phys_addr_t __init_memblock memblock_end_of_DRAM(void) { int idx = memblock.memory.cnt - 1; @@ -655,7 +655,7 @@ int memblock_is_region_memory(phys_addr_t base, phys_addr_t size) memblock.reserved.regions[idx].size) >= (base + size); } -int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) +int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) { return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; } @@ -666,7 +666,7 @@ void __init memblock_set_current_limit(phys_addr_t limit) memblock.current_limit = limit; } -static void memblock_dump(struct memblock_type *region, char *name) +static void __init_memblock memblock_dump(struct memblock_type *region, char *name) { unsigned long long base, size; int i; @@ -682,7 +682,7 @@ static void memblock_dump(struct memblock_type *region, char *name) } } -void memblock_dump_all(void) +void __init_memblock memblock_dump_all(void) { if (!memblock_debug) return; @@ -748,7 +748,7 @@ static int __init early_memblock(char *p) } early_param("memblock", early_memblock); -#ifdef CONFIG_DEBUG_FS +#if defined(CONFIG_DEBUG_FS) && !defined(ARCH_DISCARD_MEMBLOCK) static int memblock_debug_show(struct seq_file *m, void *private) { -- cgit v1.2.3 From 5303b68f57c227c27193a14e57dd12be27cd670f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 28 Jul 2010 15:38:40 +1000 Subject: memblock: Add memblock_find_in_range() This is a wrapper for memblock_find_base() using slightly different arguments (start,end instead of start,size for example) in order to make it easier to convert existing arch/x86 code. Signed-off-by: Yinghai Lu Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 2 ++ mm/memblock.c | 8 ++++++++ 2 files changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 3978e6a8e824..4df09bdcae42 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -47,6 +47,8 @@ extern int memblock_can_resize; #define memblock_dbg(fmt, ...) \ if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) +u64 memblock_find_in_range(u64 start, u64 end, u64 size, u64 align); + extern void __init memblock_init(void); extern void __init memblock_analyze(void); extern long memblock_add(phys_addr_t base, phys_addr_t size); diff --git a/mm/memblock.c b/mm/memblock.c index a17faea37d47..b7ab10a2ef46 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -162,6 +162,14 @@ static phys_addr_t __init memblock_find_base(phys_addr_t size, phys_addr_t align return MEMBLOCK_ERROR; } +/* + * Find a free area with specified alignment in a specific range. + */ +u64 __init_memblock memblock_find_in_range(u64 start, u64 end, u64 size, u64 align) +{ + return memblock_find_base(size, align, start, end); +} + static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) { unsigned long i; -- cgit v1.2.3 From 73e4008ddddc84d5f2499c17012b340a0dae153e Mon Sep 17 00:00:00 2001 From: Nikolai Kondrashov Date: Fri, 6 Aug 2010 23:03:06 +0400 Subject: HID: allow resizing and replacing report descriptors Update hid_driver's report_fixup prototype to allow changing report descriptor size and/or returning completely different report descriptor. Update existing usage accordingly. This is to give more freedom in descriptor fixup and to allow having a whole fixed descriptor in the code for the sake of readability. Signed-off-by: Nikolai Kondrashov Signed-off-by: Jiri Kosina --- drivers/hid/hid-apple.c | 7 ++++--- drivers/hid/hid-cherry.c | 7 ++++--- drivers/hid/hid-core.c | 2 +- drivers/hid/hid-cypress.c | 9 +++++---- drivers/hid/hid-elecom.c | 7 ++++--- drivers/hid/hid-kye.c | 7 ++++--- drivers/hid/hid-lg.c | 9 +++++---- drivers/hid/hid-microsoft.c | 7 ++++--- drivers/hid/hid-monterey.c | 7 ++++--- drivers/hid/hid-ortek.c | 7 ++++--- drivers/hid/hid-petalynx.c | 7 ++++--- drivers/hid/hid-prodikeys.c | 7 ++++--- drivers/hid/hid-samsung.c | 20 +++++++++++--------- drivers/hid/hid-sony.c | 7 ++++--- drivers/hid/hid-sunplus.c | 7 ++++--- drivers/hid/hid-zydacron.c | 7 ++++--- include/linux/hid.h | 4 ++-- 17 files changed, 72 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index bba05d0a8980..eaeca564a8d3 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -246,17 +246,18 @@ static int apple_event(struct hid_device *hdev, struct hid_field *field, /* * MacBook JIS keyboard has wrong logical maximum */ -static void apple_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *apple_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { struct apple_sc *asc = hid_get_drvdata(hdev); - if ((asc->quirks & APPLE_RDESC_JIS) && rsize >= 60 && + if ((asc->quirks & APPLE_RDESC_JIS) && *rsize >= 60 && rdesc[53] == 0x65 && rdesc[59] == 0x65) { dev_info(&hdev->dev, "fixing up MacBook JIS keyboard report " "descriptor\n"); rdesc[53] = rdesc[59] = 0xe7; } + return rdesc; } static void apple_setup_input(struct input_dev *input) diff --git a/drivers/hid/hid-cherry.c b/drivers/hid/hid-cherry.c index 24663a8717b1..e880086c2311 100644 --- a/drivers/hid/hid-cherry.c +++ b/drivers/hid/hid-cherry.c @@ -26,15 +26,16 @@ * Cherry Cymotion keyboard have an invalid HID report descriptor, * that needs fixing before we can parse it. */ -static void ch_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *ch_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { - if (rsize >= 17 && rdesc[11] == 0x3c && rdesc[12] == 0x02) { + if (*rsize >= 17 && rdesc[11] == 0x3c && rdesc[12] == 0x02) { dev_info(&hdev->dev, "fixing up Cherry Cymotion report " "descriptor\n"); rdesc[11] = rdesc[16] = 0xff; rdesc[12] = rdesc[17] = 0x03; } + return rdesc; } #define ch_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \ diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index e635199a0cd2..ec20e83cbd61 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -651,7 +651,7 @@ int hid_parse_report(struct hid_device *device, __u8 *start, }; if (device->driver->report_fixup) - device->driver->report_fixup(device, start, size); + start = device->driver->report_fixup(device, start, &size); device->rdesc = kmemdup(start, size, GFP_KERNEL); if (device->rdesc == NULL) diff --git a/drivers/hid/hid-cypress.c b/drivers/hid/hid-cypress.c index 998b6f443d7d..4cd0e2345991 100644 --- a/drivers/hid/hid-cypress.c +++ b/drivers/hid/hid-cypress.c @@ -31,16 +31,16 @@ * Some USB barcode readers from cypress have usage min and usage max in * the wrong order */ -static void cp_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *cp_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { unsigned long quirks = (unsigned long)hid_get_drvdata(hdev); unsigned int i; if (!(quirks & CP_RDESC_SWAPPED_MIN_MAX)) - return; + return rdesc; - for (i = 0; i < rsize - 4; i++) + for (i = 0; i < *rsize - 4; i++) if (rdesc[i] == 0x29 && rdesc[i + 2] == 0x19) { __u8 tmp; @@ -50,6 +50,7 @@ static void cp_report_fixup(struct hid_device *hdev, __u8 *rdesc, rdesc[i + 3] = rdesc[i + 1]; rdesc[i + 1] = tmp; } + return rdesc; } static int cp_input_mapped(struct hid_device *hdev, struct hid_input *hi, diff --git a/drivers/hid/hid-elecom.c b/drivers/hid/hid-elecom.c index 7a40878f46b4..6e31f305397d 100644 --- a/drivers/hid/hid-elecom.c +++ b/drivers/hid/hid-elecom.c @@ -20,14 +20,15 @@ #include "hid-ids.h" -static void elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { - if (rsize >= 48 && rdesc[46] == 0x05 && rdesc[47] == 0x0c) { + if (*rsize >= 48 && rdesc[46] == 0x05 && rdesc[47] == 0x0c) { dev_info(&hdev->dev, "Fixing up Elecom BM084 " "report descriptor.\n"); rdesc[47] = 0x00; } + return rdesc; } static const struct hid_device_id elecom_devices[] = { diff --git a/drivers/hid/hid-kye.c b/drivers/hid/hid-kye.c index f8871712b7b5..817247ee006c 100644 --- a/drivers/hid/hid-kye.c +++ b/drivers/hid/hid-kye.c @@ -23,10 +23,10 @@ * - report size 8 count 1 must be size 1 count 8 for button bitfield * - change the button usage range to 4-7 for the extra buttons */ -static void kye_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *kye_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { - if (rsize >= 74 && + if (*rsize >= 74 && rdesc[61] == 0x05 && rdesc[62] == 0x08 && rdesc[63] == 0x19 && rdesc[64] == 0x08 && rdesc[65] == 0x29 && rdesc[66] == 0x0f && @@ -40,6 +40,7 @@ static void kye_report_fixup(struct hid_device *hdev, __u8 *rdesc, rdesc[72] = 0x01; rdesc[74] = 0x08; } + return rdesc; } static const struct hid_device_id kye_devices[] = { diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c index f6433d8050a9..68c0b68856c7 100644 --- a/drivers/hid/hid-lg.c +++ b/drivers/hid/hid-lg.c @@ -41,25 +41,26 @@ * above the logical maximum described in descriptor. This extends * the original value of 0x28c of logical maximum to 0x104d */ -static void lg_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *lg_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { unsigned long quirks = (unsigned long)hid_get_drvdata(hdev); - if ((quirks & LG_RDESC) && rsize >= 90 && rdesc[83] == 0x26 && + if ((quirks & LG_RDESC) && *rsize >= 90 && rdesc[83] == 0x26 && rdesc[84] == 0x8c && rdesc[85] == 0x02) { dev_info(&hdev->dev, "fixing up Logitech keyboard report " "descriptor\n"); rdesc[84] = rdesc[89] = 0x4d; rdesc[85] = rdesc[90] = 0x10; } - if ((quirks & LG_RDESC_REL_ABS) && rsize >= 50 && + if ((quirks & LG_RDESC_REL_ABS) && *rsize >= 50 && rdesc[32] == 0x81 && rdesc[33] == 0x06 && rdesc[49] == 0x81 && rdesc[50] == 0x06) { dev_info(&hdev->dev, "fixing up rel/abs in Logitech " "report descriptor\n"); rdesc[33] = rdesc[50] = 0x02; } + return rdesc; } #define lg_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \ diff --git a/drivers/hid/hid-microsoft.c b/drivers/hid/hid-microsoft.c index 359cc447c6c6..dc618c33d0a2 100644 --- a/drivers/hid/hid-microsoft.c +++ b/drivers/hid/hid-microsoft.c @@ -33,18 +33,19 @@ * Microsoft Wireless Desktop Receiver (Model 1028) has * 'Usage Min/Max' where it ought to have 'Physical Min/Max' */ -static void ms_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *ms_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { unsigned long quirks = (unsigned long)hid_get_drvdata(hdev); - if ((quirks & MS_RDESC) && rsize == 571 && rdesc[557] == 0x19 && + if ((quirks & MS_RDESC) && *rsize == 571 && rdesc[557] == 0x19 && rdesc[559] == 0x29) { dev_info(&hdev->dev, "fixing up Microsoft Wireless Receiver " "Model 1028 report descriptor\n"); rdesc[557] = 0x35; rdesc[559] = 0x45; } + return rdesc; } #define ms_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \ diff --git a/drivers/hid/hid-monterey.c b/drivers/hid/hid-monterey.c index 2cd05aa244b9..c95c31e2d869 100644 --- a/drivers/hid/hid-monterey.c +++ b/drivers/hid/hid-monterey.c @@ -22,14 +22,15 @@ #include "hid-ids.h" -static void mr_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *mr_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { - if (rsize >= 30 && rdesc[29] == 0x05 && rdesc[30] == 0x09) { + if (*rsize >= 30 && rdesc[29] == 0x05 && rdesc[30] == 0x09) { dev_info(&hdev->dev, "fixing up button/consumer in HID report " "descriptor\n"); rdesc[30] = 0x0c; } + return rdesc; } #define mr_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \ diff --git a/drivers/hid/hid-ortek.c b/drivers/hid/hid-ortek.c index aa9a960f73a4..2e79716dca31 100644 --- a/drivers/hid/hid-ortek.c +++ b/drivers/hid/hid-ortek.c @@ -19,14 +19,15 @@ #include "hid-ids.h" -static void ortek_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *ortek_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { - if (rsize >= 56 && rdesc[54] == 0x25 && rdesc[55] == 0x01) { + if (*rsize >= 56 && rdesc[54] == 0x25 && rdesc[55] == 0x01) { dev_info(&hdev->dev, "Fixing up Ortek WKB-2000 " "report descriptor.\n"); rdesc[55] = 0x92; } + return rdesc; } static const struct hid_device_id ortek_devices[] = { diff --git a/drivers/hid/hid-petalynx.c b/drivers/hid/hid-petalynx.c index 500fbd0652dc..308d6ae48a3e 100644 --- a/drivers/hid/hid-petalynx.c +++ b/drivers/hid/hid-petalynx.c @@ -23,10 +23,10 @@ #include "hid-ids.h" /* Petalynx Maxter Remote has maximum for consumer page set too low */ -static void pl_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *pl_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { - if (rsize >= 60 && rdesc[39] == 0x2a && rdesc[40] == 0xf5 && + if (*rsize >= 60 && rdesc[39] == 0x2a && rdesc[40] == 0xf5 && rdesc[41] == 0x00 && rdesc[59] == 0x26 && rdesc[60] == 0xf9 && rdesc[61] == 0x00) { dev_info(&hdev->dev, "fixing up Petalynx Maxter Remote report " @@ -34,6 +34,7 @@ static void pl_report_fixup(struct hid_device *hdev, __u8 *rdesc, rdesc[60] = 0xfa; rdesc[40] = 0xfa; } + return rdesc; } #define pl_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \ diff --git a/drivers/hid/hid-prodikeys.c b/drivers/hid/hid-prodikeys.c index 845f428b8090..48eab84f53b5 100644 --- a/drivers/hid/hid-prodikeys.c +++ b/drivers/hid/hid-prodikeys.c @@ -740,10 +740,10 @@ int pcmidi_snd_terminate(struct pcmidi_snd *pm) /* * PC-MIDI report descriptor for report id is wrong. */ -static void pk_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *pk_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { - if (rsize == 178 && + if (*rsize == 178 && rdesc[111] == 0x06 && rdesc[112] == 0x00 && rdesc[113] == 0xff) { dev_info(&hdev->dev, "fixing up pc-midi keyboard report " @@ -751,6 +751,7 @@ static void pk_report_fixup(struct hid_device *hdev, __u8 *rdesc, rdesc[144] = 0x18; /* report 4: was 0x10 report count */ } + return rdesc; } static int pk_input_mapping(struct hid_device *hdev, struct hid_input *hi, diff --git a/drivers/hid/hid-samsung.c b/drivers/hid/hid-samsung.c index bda0fd60c98d..35894444e000 100644 --- a/drivers/hid/hid-samsung.c +++ b/drivers/hid/hid-samsung.c @@ -61,10 +61,10 @@ static inline void samsung_irda_dev_trace(struct hid_device *hdev, "descriptor\n", rsize); } -static void samsung_irda_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *samsung_irda_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { - if (rsize == 184 && rdesc[175] == 0x25 && rdesc[176] == 0x40 && + if (*rsize == 184 && rdesc[175] == 0x25 && rdesc[176] == 0x40 && rdesc[177] == 0x75 && rdesc[178] == 0x30 && rdesc[179] == 0x95 && rdesc[180] == 0x01 && rdesc[182] == 0x40) { @@ -74,24 +74,25 @@ static void samsung_irda_report_fixup(struct hid_device *hdev, __u8 *rdesc, rdesc[180] = 0x06; rdesc[182] = 0x42; } else - if (rsize == 203 && rdesc[192] == 0x15 && rdesc[193] == 0x0 && + if (*rsize == 203 && rdesc[192] == 0x15 && rdesc[193] == 0x0 && rdesc[194] == 0x25 && rdesc[195] == 0x12) { samsung_irda_dev_trace(hdev, 203); rdesc[193] = 0x1; rdesc[195] = 0xf; } else - if (rsize == 135 && rdesc[124] == 0x15 && rdesc[125] == 0x0 && + if (*rsize == 135 && rdesc[124] == 0x15 && rdesc[125] == 0x0 && rdesc[126] == 0x25 && rdesc[127] == 0x11) { samsung_irda_dev_trace(hdev, 135); rdesc[125] = 0x1; rdesc[127] = 0xe; } else - if (rsize == 171 && rdesc[160] == 0x15 && rdesc[161] == 0x0 && + if (*rsize == 171 && rdesc[160] == 0x15 && rdesc[161] == 0x0 && rdesc[162] == 0x25 && rdesc[163] == 0x01) { samsung_irda_dev_trace(hdev, 171); rdesc[161] = 0x1; rdesc[163] = 0x3; } + return rdesc; } #define samsung_kbd_mouse_map_key_clear(c) \ @@ -130,11 +131,12 @@ static int samsung_kbd_mouse_input_mapping(struct hid_device *hdev, return 1; } -static void samsung_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *samsung_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { if (USB_DEVICE_ID_SAMSUNG_IR_REMOTE == hdev->product) - samsung_irda_report_fixup(hdev, rdesc, rsize); + rdesc = samsung_irda_report_fixup(hdev, rdesc, rsize); + return rdesc; } static int samsung_input_mapping(struct hid_device *hdev, struct hid_input *hi, diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index 402d5574b574..9fa034915185 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -31,17 +31,18 @@ struct sony_sc { }; /* Sony Vaio VGX has wrongly mouse pointer declared as constant */ -static void sony_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *sony_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { struct sony_sc *sc = hid_get_drvdata(hdev); if ((sc->quirks & VAIO_RDESC_CONSTANT) && - rsize >= 56 && rdesc[54] == 0x81 && rdesc[55] == 0x07) { + *rsize >= 56 && rdesc[54] == 0x81 && rdesc[55] == 0x07) { dev_info(&hdev->dev, "Fixing up Sony Vaio VGX report " "descriptor\n"); rdesc[55] = 0x06; } + return rdesc; } /* diff --git a/drivers/hid/hid-sunplus.c b/drivers/hid/hid-sunplus.c index 438107d9f1b2..164ed568f6cf 100644 --- a/drivers/hid/hid-sunplus.c +++ b/drivers/hid/hid-sunplus.c @@ -22,16 +22,17 @@ #include "hid-ids.h" -static void sp_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *sp_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { - if (rsize >= 107 && rdesc[104] == 0x26 && rdesc[105] == 0x80 && + if (*rsize >= 107 && rdesc[104] == 0x26 && rdesc[105] == 0x80 && rdesc[106] == 0x03) { dev_info(&hdev->dev, "fixing up Sunplus Wireless Desktop " "report descriptor\n"); rdesc[105] = rdesc[110] = 0x03; rdesc[106] = rdesc[111] = 0x21; } + return rdesc; } #define sp_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \ diff --git a/drivers/hid/hid-zydacron.c b/drivers/hid/hid-zydacron.c index 9e8d35a203e4..aac1f9273149 100644 --- a/drivers/hid/hid-zydacron.c +++ b/drivers/hid/hid-zydacron.c @@ -27,10 +27,10 @@ struct zc_device { * Zydacron remote control has an invalid HID report descriptor, * that needs fixing before we can parse it. */ -static void zc_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *zc_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { - if (rsize >= 253 && + if (*rsize >= 253 && rdesc[0x96] == 0xbc && rdesc[0x97] == 0xff && rdesc[0xca] == 0xbc && rdesc[0xcb] == 0xff && rdesc[0xe1] == 0xbc && rdesc[0xe2] == 0xff) { @@ -40,6 +40,7 @@ static void zc_report_fixup(struct hid_device *hdev, __u8 *rdesc, rdesc[0x96] = rdesc[0xca] = rdesc[0xe1] = 0x0c; rdesc[0x97] = rdesc[0xcb] = rdesc[0xe2] = 0x00; } + return rdesc; } #define zc_map_key_clear(c) \ diff --git a/include/linux/hid.h b/include/linux/hid.h index 42a0f1d11365..0a34fb071379 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -626,8 +626,8 @@ struct hid_driver { int (*event)(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage, __s32 value); - void (*report_fixup)(struct hid_device *hdev, __u8 *buf, - unsigned int size); + __u8 *(*report_fixup)(struct hid_device *hdev, __u8 *buf, + unsigned int *size); int (*input_mapping)(struct hid_device *hdev, struct hid_input *hidinput, struct hid_field *field, -- cgit v1.2.3 From c84e032e145775032fa9078b55e6333dd866603b Mon Sep 17 00:00:00 2001 From: "Justin P. Mattock" Date: Sat, 14 Aug 2010 09:43:22 -0700 Subject: include/video/vga.h: update web address. The below updates a broken web address to one(hopefully) thats the new correct address. Signed-off-by: Justin P. Mattock Signed-off-by: Jiri Kosina --- include/video/vga.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/video/vga.h b/include/video/vga.h index b49a5120ca2d..2b8691f7d256 100644 --- a/include/video/vga.h +++ b/include/video/vga.h @@ -5,7 +5,7 @@ * * Copyright history from vga16fb.c: * Copyright 1999 Ben Pfaff and Petr Vandrovec - * Based on VGA info at http://www.goodnet.com/~tinara/FreeVGA/home.htm + * Based on VGA info at http://www.osdever.net/FreeVGA/home.htm * Based on VESA framebuffer (c) 1998 Gerd Knorr * * This file is subject to the terms and conditions of the GNU General -- cgit v1.2.3 From 4e6cbfd09c66893e5134c9896e9af353c2322b66 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Thu, 29 Jul 2010 16:14:13 -0400 Subject: mac80211: support use of NAPI for bottom-half processing This patch implement basic infrastructure to support use of NAPI by mac80211-based hardware drivers. Because mac80211 devices can support multiple netdevs, a dummy netdev is used for interfacing with the NAPI code in the core of the network stack. That structure is hidden from the hardware drivers, but the actual napi_struct is exposed in the ieee80211_hw structure so that the poll routines in drivers can retrieve that structure. Hardware drivers can also specify their own weight value for NAPI polling. Signed-off-by: John W. Linville --- include/net/mac80211.h | 24 ++++++++++++++++++++++++ net/mac80211/ieee80211_i.h | 5 +++++ net/mac80211/iface.c | 4 ++++ net/mac80211/main.c | 30 ++++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b0787a1dea90..3f1e03b521ec 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1102,6 +1102,10 @@ enum ieee80211_hw_flags { * * @max_rates: maximum number of alternate rate retry stages * @max_rate_tries: maximum number of tries for each stage + * + * @napi_weight: weight used for NAPI polling. You must specify an + * appropriate value here if a napi_poll operation is provided + * by your driver. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -1113,6 +1117,7 @@ struct ieee80211_hw { int channel_change_time; int vif_data_size; int sta_data_size; + int napi_weight; u16 queues; u16 max_listen_interval; s8 max_signal; @@ -1687,6 +1692,8 @@ enum ieee80211_ampdu_mlme_action { * switch operation for CSAs received from the AP may implement this * callback. They must then call ieee80211_chswitch_done() to indicate * completion of the channel switch. + * + * @napi_poll: Poll Rx queue for incoming data frames. */ struct ieee80211_ops { int (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb); @@ -1752,6 +1759,7 @@ struct ieee80211_ops { void (*flush)(struct ieee80211_hw *hw, bool drop); void (*channel_switch)(struct ieee80211_hw *hw, struct ieee80211_channel_switch *ch_switch); + int (*napi_poll)(struct ieee80211_hw *hw, int budget); }; /** @@ -1897,6 +1905,22 @@ void ieee80211_free_hw(struct ieee80211_hw *hw); */ void ieee80211_restart_hw(struct ieee80211_hw *hw); +/** ieee80211_napi_schedule - schedule NAPI poll + * + * Use this function to schedule NAPI polling on a device. + * + * @hw: the hardware to start polling + */ +void ieee80211_napi_schedule(struct ieee80211_hw *hw); + +/** ieee80211_napi_complete - complete NAPI polling + * + * Use this function to finish NAPI polling on a device. + * + * @hw: the hardware to stop polling + */ +void ieee80211_napi_complete(struct ieee80211_hw *hw); + /** * ieee80211_rx - receive frame * diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 65e0ed6c2975..79d56454484a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -870,6 +870,11 @@ struct ieee80211_local { struct dentry *keys; } debugfs; #endif + + /* dummy netdev for use w/ NAPI */ + struct net_device napi_dev; + + struct napi_struct napi; }; static inline struct ieee80211_sub_if_data * diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index ebbe264e2b0b..c1008a9d7bfb 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -187,6 +187,8 @@ static int ieee80211_open(struct net_device *dev) res = drv_start(local); if (res) goto err_del_bss; + if (local->ops->napi_poll) + napi_enable(&local->napi); /* we're brought up, everything changes */ hw_reconf_flags = ~0; ieee80211_led_radio(local, true); @@ -519,6 +521,8 @@ static int ieee80211_stop(struct net_device *dev) ieee80211_recalc_ps(local, -1); if (local->open_count == 0) { + if (local->ops->napi_poll) + napi_disable(&local->napi); ieee80211_clear_tx_pending(local); ieee80211_stop_device(local); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 798a91b100cc..1ed956c9cb8b 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -390,6 +390,30 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, } #endif +static int ieee80211_napi_poll(struct napi_struct *napi, int budget) +{ + struct ieee80211_local *local = + container_of(napi, struct ieee80211_local, napi); + + return local->ops->napi_poll(&local->hw, budget); +} + +void ieee80211_napi_schedule(struct ieee80211_hw *hw) +{ + struct ieee80211_local *local = hw_to_local(hw); + + napi_schedule(&local->napi); +} +EXPORT_SYMBOL(ieee80211_napi_schedule); + +void ieee80211_napi_complete(struct ieee80211_hw *hw) +{ + struct ieee80211_local *local = hw_to_local(hw); + + napi_complete(&local->napi); +} +EXPORT_SYMBOL(ieee80211_napi_complete); + struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, const struct ieee80211_ops *ops) { @@ -494,6 +518,9 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, skb_queue_head_init(&local->skb_queue); skb_queue_head_init(&local->skb_queue_unreliable); + /* init dummy netdev for use w/ NAPI */ + init_dummy_netdev(&local->napi_dev); + return local_to_hw(local); } EXPORT_SYMBOL(ieee80211_alloc_hw); @@ -683,6 +710,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) goto fail_ifa; #endif + netif_napi_add(&local->napi_dev, &local->napi, ieee80211_napi_poll, + local->hw.napi_weight); + return 0; #ifdef CONFIG_INET -- cgit v1.2.3 From 7da7cc1d42d8ce02cca16df8c021e6d657f1f8fd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Aug 2010 17:02:38 +0200 Subject: mac80211: per interface idle notification Sometimes we don't just need to know whether or not the device is idle, but also per interface. This adds that reporting capability to mac80211. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 6 ++++++ net/mac80211/ibss.c | 8 +++++-- net/mac80211/ieee80211_i.h | 3 +++ net/mac80211/iface.c | 53 +++++++++++++++++++++++++++++++++++++++------- net/mac80211/mlme.c | 17 +++++++++++++-- net/mac80211/scan.c | 2 ++ net/mac80211/work.c | 8 +++---- 7 files changed, 81 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 3f1e03b521ec..3a3c26f647b7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -149,6 +149,7 @@ struct ieee80211_low_level_stats { * @BSS_CHANGED_ARP_FILTER: Hardware ARP filter address list or state changed. * @BSS_CHANGED_QOS: QoS for this association was enabled/disabled. Note * that it is only ever disabled for station mode. + * @BSS_CHANGED_IDLE: Idle changed for this BSS/interface. */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -165,6 +166,7 @@ enum ieee80211_bss_change { BSS_CHANGED_IBSS = 1<<11, BSS_CHANGED_ARP_FILTER = 1<<12, BSS_CHANGED_QOS = 1<<13, + BSS_CHANGED_IDLE = 1<<14, /* when adding here, make sure to change ieee80211_reconfig */ }; @@ -223,6 +225,9 @@ enum ieee80211_bss_change { * hardware must not perform any ARP filtering. Note, that the filter will * be enabled also in promiscuous mode. * @qos: This is a QoS-enabled BSS. + * @idle: This interface is idle. There's also a global idle flag in the + * hardware config which may be more appropriate depending on what + * your driver/device needs to do. */ struct ieee80211_bss_conf { const u8 *bssid; @@ -247,6 +252,7 @@ struct ieee80211_bss_conf { u8 arp_addr_cnt; bool arp_filter_enabled; bool qos; + bool idle; }; /** diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index c691780725a7..32af97108425 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -920,12 +920,14 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, memcpy(sdata->u.ibss.ssid, params->ssid, IEEE80211_MAX_SSID_LEN); sdata->u.ibss.ssid_len = params->ssid_len; + mutex_unlock(&sdata->u.ibss.mtx); + + mutex_lock(&sdata->local->mtx); ieee80211_recalc_idle(sdata->local); + mutex_unlock(&sdata->local->mtx); ieee80211_queue_work(&sdata->local->hw, &sdata->work); - mutex_unlock(&sdata->u.ibss.mtx); - return 0; } @@ -980,7 +982,9 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) mutex_unlock(&sdata->u.ibss.mtx); + mutex_lock(&local->mtx); ieee80211_recalc_idle(sdata->local); + mutex_unlock(&local->mtx); return 0; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b44e03a02da9..98e783c6a363 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -497,6 +497,9 @@ struct ieee80211_sub_if_data { */ bool ht_opmode_valid; + /* to detect idle changes */ + bool old_idle; + /* Fragment table for host-based reassembly */ struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX]; unsigned int fragment_next; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index c1008a9d7bfb..9459aeee0ddc 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -309,7 +309,9 @@ static int ieee80211_open(struct net_device *dev) if (sdata->flags & IEEE80211_SDATA_PROMISC) atomic_inc(&local->iff_promiscs); + mutex_lock(&local->mtx); hw_reconf_flags |= __ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); local->open_count++; if (hw_reconf_flags) { @@ -516,7 +518,9 @@ static int ieee80211_stop(struct net_device *dev) sdata->bss = NULL; + mutex_lock(&local->mtx); hw_reconf_flags |= __ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); ieee80211_recalc_ps(local, -1); @@ -1199,28 +1203,61 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; int count = 0; + bool working = false, scanning = false; + struct ieee80211_work *wk; - if (!list_empty(&local->work_list)) - return ieee80211_idle_off(local, "working"); - - if (local->scanning) - return ieee80211_idle_off(local, "scanning"); +#ifdef CONFIG_PROVE_LOCKING + WARN_ON(debug_locks && !lockdep_rtnl_is_held() && + !lockdep_is_held(&local->iflist_mtx)); +#endif + lockdep_assert_held(&local->mtx); list_for_each_entry(sdata, &local->interfaces, list) { - if (!ieee80211_sdata_running(sdata)) + if (!ieee80211_sdata_running(sdata)) { + sdata->vif.bss_conf.idle = true; continue; + } + + sdata->old_idle = sdata->vif.bss_conf.idle; + /* do not count disabled managed interfaces */ if (sdata->vif.type == NL80211_IFTYPE_STATION && - !sdata->u.mgd.associated) + !sdata->u.mgd.associated) { + sdata->vif.bss_conf.idle = true; continue; + } /* do not count unused IBSS interfaces */ if (sdata->vif.type == NL80211_IFTYPE_ADHOC && - !sdata->u.ibss.ssid_len) + !sdata->u.ibss.ssid_len) { + sdata->vif.bss_conf.idle = true; continue; + } /* count everything else */ count++; } + list_for_each_entry(wk, &local->work_list, list) { + working = true; + wk->sdata->vif.bss_conf.idle = false; + } + + if (local->scan_sdata) { + scanning = true; + local->scan_sdata->vif.bss_conf.idle = false; + } + + list_for_each_entry(sdata, &local->interfaces, list) { + if (sdata->old_idle == sdata->vif.bss_conf.idle) + continue; + if (!ieee80211_sdata_running(sdata)) + continue; + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE); + } + + if (working) + return ieee80211_idle_off(local, "working"); + if (scanning) + return ieee80211_idle_off(local, "scanning"); if (!count) return ieee80211_idle_on(local); else diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 17e9257a61d8..82e7cec5179c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1103,8 +1103,11 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata) printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid); ieee80211_set_disassoc(sdata, true); - ieee80211_recalc_idle(local); mutex_unlock(&ifmgd->mtx); + + mutex_lock(&local->mtx); + ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); /* * must be outside lock due to cfg80211, * but that's not a problem. @@ -1173,7 +1176,9 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, sdata->name, bssid, reason_code); ieee80211_set_disassoc(sdata, true); + mutex_lock(&sdata->local->mtx); ieee80211_recalc_idle(sdata->local); + mutex_unlock(&sdata->local->mtx); return RX_MGMT_CFG80211_DEAUTH; } @@ -1203,7 +1208,9 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, sdata->name, mgmt->sa, reason_code); ieee80211_set_disassoc(sdata, true); + mutex_lock(&sdata->local->mtx); ieee80211_recalc_idle(sdata->local); + mutex_unlock(&sdata->local->mtx); return RX_MGMT_CFG80211_DISASSOC; } @@ -1840,8 +1847,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) " after %dms, disconnecting.\n", bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ); ieee80211_set_disassoc(sdata, true); - ieee80211_recalc_idle(local); mutex_unlock(&ifmgd->mtx); + mutex_lock(&local->mtx); + ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); /* * must be outside lock due to cfg80211, * but that's not a problem. @@ -2319,7 +2328,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, if (assoc_bss) sta_info_destroy_addr(sdata, bssid); + mutex_lock(&sdata->local->mtx); ieee80211_recalc_idle(sdata->local); + mutex_unlock(&sdata->local->mtx); return 0; } @@ -2357,7 +2368,9 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, cookie, !req->local_state_change); sta_info_destroy_addr(sdata, bssid); + mutex_lock(&sdata->local->mtx); ieee80211_recalc_idle(sdata->local); + mutex_unlock(&sdata->local->mtx); return 0; } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index f31f549733b1..31f233f7f51a 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -304,7 +304,9 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) ieee80211_offchannel_return(local, true); done: + mutex_lock(&local->mtx); ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); ieee80211_mlme_notify_scan_completed(local); ieee80211_ibss_notify_scan_completed(local); ieee80211_mesh_notify_scan_completed(local); diff --git a/net/mac80211/work.c b/net/mac80211/work.c index b98af64f5862..ae344d1ba056 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -888,10 +888,10 @@ static void ieee80211_work_work(struct work_struct *work) while ((skb = skb_dequeue(&local->work_skb_queue))) ieee80211_work_rx_queued_mgmt(local, skb); - ieee80211_recalc_idle(local); - mutex_lock(&local->mtx); + ieee80211_recalc_idle(local); + list_for_each_entry_safe(wk, tmp, &local->work_list, list) { bool started = wk->started; @@ -1001,10 +1001,10 @@ static void ieee80211_work_work(struct work_struct *work) &local->scan_work, round_jiffies_relative(0)); - mutex_unlock(&local->mtx); - ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); + list_for_each_entry_safe(wk, tmp, &free_work, list) { wk->done(wk, NULL); list_del(&wk->list); -- cgit v1.2.3 From d1f5b7a34aa5ff703c4966ea2652d4212ac75940 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Aug 2010 17:05:55 +0200 Subject: mac80211: allow drivers to request SM PS mode change Sometimes drivers have more information than the stack about how their antennas/chains are used, and may require that the SM PS mode be changed. This could happen, for example, when detecting that the user disconnected an antenna. Thus this patch introduces API to allow drivers to request SM PS mode changes. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 12 ++++++++++++ net/mac80211/ht.c | 28 ++++++++++++++++++++++++++++ net/mac80211/ieee80211_i.h | 6 +++++- net/mac80211/mlme.c | 3 +++ 4 files changed, 48 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 3a3c26f647b7..871ed1de736a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2548,6 +2548,18 @@ void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, */ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); +/** + * ieee80211_request_smps - request SM PS transition + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @mode: new SM PS mode + * + * This allows the driver to request an SM PS transition in managed + * mode. This is useful when the driver has more information than + * the stack about possible interference, for example by bluetooth. + */ +void ieee80211_request_smps(struct ieee80211_vif *vif, + enum ieee80211_smps_mode smps_mode); + /* Rate control API */ /** diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 9d101fb33861..11f74f5f7b2f 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -265,3 +265,31 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, return 0; } + +void ieee80211_request_smps_work(struct work_struct *work) +{ + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, + u.mgd.request_smps_work); + + mutex_lock(&sdata->u.mgd.mtx); + __ieee80211_request_smps(sdata, sdata->u.mgd.driver_smps_mode); + mutex_unlock(&sdata->u.mgd.mtx); +} + +void ieee80211_request_smps(struct ieee80211_vif *vif, + enum ieee80211_smps_mode smps_mode) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + if (WARN_ON(vif->type != NL80211_IFTYPE_STATION)) + return; + + if (WARN_ON(smps_mode == IEEE80211_SMPS_OFF)) + smps_mode = IEEE80211_SMPS_AUTOMATIC; + + ieee80211_queue_work(&sdata->local->hw, + &sdata->u.mgd.request_smps_work); +} +/* this might change ... don't want non-open drivers using it */ +EXPORT_SYMBOL_GPL(ieee80211_request_smps); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 98e783c6a363..1bf05bfd149d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -343,7 +343,10 @@ struct ieee80211_if_managed { unsigned long timers_running; /* used for quiesce/restart */ bool powersave; /* powersave requested for this iface */ enum ieee80211_smps_mode req_smps, /* requested smps mode */ - ap_smps; /* smps mode AP thinks we're in */ + ap_smps, /* smps mode AP thinks we're in */ + driver_smps_mode; /* smps mode request */ + + struct work_struct request_smps_work; unsigned int flags; @@ -1113,6 +1116,7 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps, const u8 *da, const u8 *bssid); +void ieee80211_request_smps_work(struct work_struct *work); void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, u16 initiator, u16 reason); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 82e7cec5179c..38996a44aa8e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1926,6 +1926,8 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata) * time -- the code here is properly synchronised. */ + cancel_work_sync(&ifmgd->request_smps_work); + cancel_work_sync(&ifmgd->beacon_connection_loss_work); if (del_timer_sync(&ifmgd->timer)) set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running); @@ -1961,6 +1963,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work); INIT_WORK(&ifmgd->beacon_connection_loss_work, ieee80211_beacon_connection_loss_work); + INIT_WORK(&ifmgd->request_smps_work, ieee80211_request_smps_work); setup_timer(&ifmgd->timer, ieee80211_sta_timer, (unsigned long) sdata); setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer, -- cgit v1.2.3 From 04600794958f1833f5571c6cde40f260ab557f55 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Aug 2010 17:45:15 +0200 Subject: cfg80211: support sysfs namespaces Enable using network namespaces with wireless devices even when sysfs is enabled using the same infrastructure that was built for netdevs. Signed-off-by: Johannes Berg Acked-by: "Eric W. Biederman" Signed-off-by: John W. Linville --- include/linux/netdevice.h | 2 ++ net/core/net-sysfs.c | 3 ++- net/wireless/core.c | 7 ++++++- net/wireless/sysfs.c | 9 +++++++++ 4 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 46c36ffe20ee..a4b14fd81c6a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2171,6 +2171,8 @@ extern void dev_seq_stop(struct seq_file *seq, void *v); extern int netdev_class_create_file(struct class_attribute *class_attr); extern void netdev_class_remove_file(struct class_attribute *class_attr); +extern struct kobj_ns_type_operations net_ns_type_operations; + extern char *netdev_drivername(const struct net_device *dev, char *buffer, int len); extern void linkwatch_run_queue(void); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index af4dfbadf2a0..7d748542d97e 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -789,12 +789,13 @@ static const void *net_netlink_ns(struct sock *sk) return sock_net(sk); } -static struct kobj_ns_type_operations net_ns_type_operations = { +struct kobj_ns_type_operations net_ns_type_operations = { .type = KOBJ_NS_TYPE_NET, .current_ns = net_current_ns, .netlink_ns = net_netlink_ns, .initial_ns = net_initial_ns, }; +EXPORT_SYMBOL_GPL(net_ns_type_operations); static void net_kobj_ns_exit(struct net *net) { diff --git a/net/wireless/core.c b/net/wireless/core.c index 541e2fff5e9c..c70909c3eae4 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -253,11 +253,16 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, WARN_ON(err); wdev->netdev->features |= NETIF_F_NETNS_LOCAL; } + + return err; } wiphy_net_set(&rdev->wiphy, net); - return err; + err = device_rename(&rdev->wiphy.dev, dev_name(&rdev->wiphy.dev)); + WARN_ON(err); + + return 0; } static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data) diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 9f2cef3e0ca0..74a9e3cce452 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -110,6 +110,13 @@ static int wiphy_resume(struct device *dev) return ret; } +static const void *wiphy_namespace(struct device *d) +{ + struct wiphy *wiphy = container_of(d, struct wiphy, dev); + + return wiphy_net(wiphy); +} + struct class ieee80211_class = { .name = "ieee80211", .owner = THIS_MODULE, @@ -120,6 +127,8 @@ struct class ieee80211_class = { #endif .suspend = wiphy_suspend, .resume = wiphy_resume, + .ns_type = &net_ns_type_operations, + .namespace = wiphy_namespace, }; int wiphy_sysfs_init(void) -- cgit v1.2.3 From 97359d1235eaf634fe706c9faa6e40181cc95fb8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Aug 2010 09:46:38 +0200 Subject: mac80211: use cipher suite selectors Currently, mac80211 translates the cfg80211 cipher suite selectors into ALG_* values. That isn't all too useful, and some drivers benefit from the distinction between WEP40 and WEP104 as well. Therefore, convert it all to use the cipher suite selectors. Signed-off-by: Johannes Berg Acked-by: Gertjan van Wingerde Signed-off-by: John W. Linville --- drivers/net/wireless/at76c50x-usb.c | 7 ++-- drivers/net/wireless/ath/ar9170/main.c | 31 ++++++++------- drivers/net/wireless/ath/ath5k/base.c | 9 +++-- drivers/net/wireless/ath/ath5k/pcu.c | 19 ++++----- drivers/net/wireless/ath/ath9k/common.c | 36 ++++++++++-------- drivers/net/wireless/ath/ath9k/htc_drv_main.c | 5 ++- drivers/net/wireless/ath/ath9k/main.c | 5 ++- drivers/net/wireless/b43/main.c | 16 ++++---- drivers/net/wireless/iwlwifi/iwl-agn-tx.c | 16 ++++---- drivers/net/wireless/iwlwifi/iwl-agn.c | 4 +- drivers/net/wireless/iwlwifi/iwl-dev.h | 2 +- drivers/net/wireless/iwlwifi/iwl-sta.c | 24 ++++++------ drivers/net/wireless/iwlwifi/iwl3945-base.c | 43 ++++++++++++--------- drivers/net/wireless/p54/main.c | 9 +++-- drivers/net/wireless/p54/txrx.c | 17 +++++---- drivers/net/wireless/rt2x00/rt2500usb.c | 4 +- drivers/net/wireless/rt2x00/rt2x00crypto.c | 17 ++++----- drivers/net/wireless/wl12xx/wl1251_main.c | 13 ++++--- drivers/net/wireless/wl12xx/wl1251_tx.c | 4 +- drivers/net/wireless/wl12xx/wl1271_main.c | 13 ++++--- drivers/net/wireless/wl12xx/wl1271_tx.c | 4 +- include/net/mac80211.h | 18 +-------- net/mac80211/cfg.c | 44 ++++++--------------- net/mac80211/debugfs_key.c | 55 +++++++++++---------------- net/mac80211/driver-trace.h | 4 +- net/mac80211/key.c | 25 ++++++------ net/mac80211/key.h | 4 +- net/mac80211/rx.c | 18 +++++---- net/mac80211/tx.c | 22 ++++++----- net/mac80211/wep.c | 2 +- net/mac80211/wpa.c | 6 +-- 31 files changed, 236 insertions(+), 260 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/at76c50x-usb.c b/drivers/net/wireless/at76c50x-usb.c index d5140a87f073..a267bf55574c 100644 --- a/drivers/net/wireless/at76c50x-usb.c +++ b/drivers/net/wireless/at76c50x-usb.c @@ -2061,11 +2061,12 @@ static int at76_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, int i; - at76_dbg(DBG_MAC80211, "%s(): cmd %d key->alg %d key->keyidx %d " + at76_dbg(DBG_MAC80211, "%s(): cmd %d key->cipher %d key->keyidx %d " "key->keylen %d", - __func__, cmd, key->alg, key->keyidx, key->keylen); + __func__, cmd, key->cipher, key->keyidx, key->keylen); - if (key->alg != ALG_WEP) + if ((key->cipher != WLAN_CIPHER_SUITE_WEP40) && + (key->cipher != WLAN_CIPHER_SUITE_WEP104)) return -EOPNOTSUPP; key->hw_key_idx = key->keyidx; diff --git a/drivers/net/wireless/ath/ar9170/main.c b/drivers/net/wireless/ath/ar9170/main.c index c67b05f3bcbd..29d2253ad7e4 100644 --- a/drivers/net/wireless/ath/ar9170/main.c +++ b/drivers/net/wireless/ath/ar9170/main.c @@ -1190,14 +1190,13 @@ static int ar9170_tx_prepare(struct ar9170 *ar, struct sk_buff *skb) if (info->control.hw_key) { icv = info->control.hw_key->icv_len; - switch (info->control.hw_key->alg) { - case ALG_WEP: + switch (info->control.hw_key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: + case WLAN_CIPHER_SUITE_TKIP: keytype = AR9170_TX_MAC_ENCR_RC4; break; - case ALG_TKIP: - keytype = AR9170_TX_MAC_ENCR_RC4; - break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: keytype = AR9170_TX_MAC_ENCR_AES; break; default: @@ -1778,17 +1777,17 @@ static int ar9170_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, if ((!ar->vif) || (ar->disable_offload)) return -EOPNOTSUPP; - switch (key->alg) { - case ALG_WEP: - if (key->keylen == WLAN_KEY_LEN_WEP40) - ktype = AR9170_ENC_ALG_WEP64; - else - ktype = AR9170_ENC_ALG_WEP128; + switch (key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + ktype = AR9170_ENC_ALG_WEP64; + break; + case WLAN_CIPHER_SUITE_WEP104: + ktype = AR9170_ENC_ALG_WEP128; break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: ktype = AR9170_ENC_ALG_TKIP; break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: ktype = AR9170_ENC_ALG_AESCCMP; break; default: @@ -1827,7 +1826,7 @@ static int ar9170_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, if (err) goto out; - if (key->alg == ALG_TKIP) { + if (key->cipher == WLAN_CIPHER_SUITE_TKIP) { err = ar9170_upload_key(ar, i, sta ? sta->addr : NULL, ktype, 1, key->key + 16, 16); if (err) @@ -1864,7 +1863,7 @@ static int ar9170_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, if (err) goto out; - if (key->alg == ALG_TKIP) { + if (key->cipher == WLAN_CIPHER_SUITE_TKIP) { err = ar9170_upload_key(ar, key->hw_key_idx, NULL, AR9170_ENC_ALG_NONE, 1, diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index 373dcfec689c..a729b8774670 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c @@ -3297,11 +3297,12 @@ ath5k_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, if (sc->opmode == NL80211_IFTYPE_AP) return -EOPNOTSUPP; - switch (key->alg) { - case ALG_WEP: - case ALG_TKIP: + switch (key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: + case WLAN_CIPHER_SUITE_TKIP: break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: if (sc->ah->ah_aes_support) break; diff --git a/drivers/net/wireless/ath/ath5k/pcu.c b/drivers/net/wireless/ath/ath5k/pcu.c index 86fdb6ddfaaa..af273358c7cb 100644 --- a/drivers/net/wireless/ath/ath5k/pcu.c +++ b/drivers/net/wireless/ath/ath5k/pcu.c @@ -695,21 +695,18 @@ int ath5k_hw_reset_key(struct ath5k_hw *ah, u16 entry) static int ath5k_keycache_type(const struct ieee80211_key_conf *key) { - switch (key->alg) { - case ALG_TKIP: + switch (key->cipher) { + case WLAN_CIPHER_SUITE_TKIP: return AR5K_KEYTABLE_TYPE_TKIP; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: return AR5K_KEYTABLE_TYPE_CCM; - case ALG_WEP: - if (key->keylen == WLAN_KEY_LEN_WEP40) - return AR5K_KEYTABLE_TYPE_40; - else if (key->keylen == WLAN_KEY_LEN_WEP104) - return AR5K_KEYTABLE_TYPE_104; - return -EINVAL; + case WLAN_CIPHER_SUITE_WEP40: + return AR5K_KEYTABLE_TYPE_40; + case WLAN_CIPHER_SUITE_WEP104: + return AR5K_KEYTABLE_TYPE_104; default: return -EINVAL; } - return -EINVAL; } /* @@ -728,7 +725,7 @@ int ath5k_hw_set_key(struct ath5k_hw *ah, u16 entry, bool is_tkip; const u8 *key_ptr; - is_tkip = (key->alg == ALG_TKIP); + is_tkip = (key->cipher == WLAN_CIPHER_SUITE_TKIP); /* * key->keylen comes in from mac80211 in bytes. diff --git a/drivers/net/wireless/ath/ath9k/common.c b/drivers/net/wireless/ath/ath9k/common.c index c86f7d3593ab..3100c87a4fcd 100644 --- a/drivers/net/wireless/ath/ath9k/common.c +++ b/drivers/net/wireless/ath/ath9k/common.c @@ -46,12 +46,17 @@ int ath9k_cmn_get_hw_crypto_keytype(struct sk_buff *skb) struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); if (tx_info->control.hw_key) { - if (tx_info->control.hw_key->alg == ALG_WEP) + switch (tx_info->control.hw_key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: return ATH9K_KEY_TYPE_WEP; - else if (tx_info->control.hw_key->alg == ALG_TKIP) + case WLAN_CIPHER_SUITE_TKIP: return ATH9K_KEY_TYPE_TKIP; - else if (tx_info->control.hw_key->alg == ALG_CCMP) + case WLAN_CIPHER_SUITE_CCMP: return ATH9K_KEY_TYPE_AES; + default: + break; + } } return ATH9K_KEY_TYPE_CLEAR; @@ -212,11 +217,11 @@ static int ath_reserve_key_cache_slot_tkip(struct ath_common *common) } static int ath_reserve_key_cache_slot(struct ath_common *common, - enum ieee80211_key_alg alg) + u32 cipher) { int i; - if (alg == ALG_TKIP) + if (cipher == WLAN_CIPHER_SUITE_TKIP) return ath_reserve_key_cache_slot_tkip(common); /* First, try to find slots that would not be available for TKIP. */ @@ -293,14 +298,15 @@ int ath9k_cmn_key_config(struct ath_common *common, memset(&hk, 0, sizeof(hk)); - switch (key->alg) { - case ALG_WEP: + switch (key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: hk.kv_type = ATH9K_CIPHER_WEP; break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: hk.kv_type = ATH9K_CIPHER_TKIP; break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: hk.kv_type = ATH9K_CIPHER_AES_CCM; break; default: @@ -316,7 +322,7 @@ int ath9k_cmn_key_config(struct ath_common *common, memcpy(gmac, vif->addr, ETH_ALEN); gmac[0] |= 0x01; mac = gmac; - idx = ath_reserve_key_cache_slot(common, key->alg); + idx = ath_reserve_key_cache_slot(common, key->cipher); break; case NL80211_IFTYPE_ADHOC: if (!sta) { @@ -326,7 +332,7 @@ int ath9k_cmn_key_config(struct ath_common *common, memcpy(gmac, sta->addr, ETH_ALEN); gmac[0] |= 0x01; mac = gmac; - idx = ath_reserve_key_cache_slot(common, key->alg); + idx = ath_reserve_key_cache_slot(common, key->cipher); break; default: idx = key->keyidx; @@ -348,13 +354,13 @@ int ath9k_cmn_key_config(struct ath_common *common, return -EOPNOTSUPP; mac = sta->addr; - idx = ath_reserve_key_cache_slot(common, key->alg); + idx = ath_reserve_key_cache_slot(common, key->cipher); } if (idx < 0) return -ENOSPC; /* no free key cache entries */ - if (key->alg == ALG_TKIP) + if (key->cipher == WLAN_CIPHER_SUITE_TKIP) ret = ath_setkey_tkip(common, idx, key->key, &hk, mac, vif->type == NL80211_IFTYPE_AP); else @@ -364,7 +370,7 @@ int ath9k_cmn_key_config(struct ath_common *common, return -EIO; set_bit(idx, common->keymap); - if (key->alg == ALG_TKIP) { + if (key->cipher == WLAN_CIPHER_SUITE_TKIP) { set_bit(idx + 64, common->keymap); if (common->splitmic) { set_bit(idx + 32, common->keymap); @@ -389,7 +395,7 @@ void ath9k_cmn_key_delete(struct ath_common *common, return; clear_bit(key->hw_key_idx, common->keymap); - if (key->alg != ALG_TKIP) + if (key->cipher != WLAN_CIPHER_SUITE_TKIP) return; clear_bit(key->hw_key_idx + 64, common->keymap); diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index 7d09b4b17bbd..4e345be62435 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -1585,9 +1585,10 @@ static int ath9k_htc_set_key(struct ieee80211_hw *hw, key->hw_key_idx = ret; /* push IV and Michael MIC generation to stack */ key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; - if (key->alg == ALG_TKIP) + if (key->cipher == WLAN_CIPHER_SUITE_TKIP) key->flags |= IEEE80211_KEY_FLAG_GENERATE_MMIC; - if (priv->ah->sw_mgmt_crypto && key->alg == ALG_CCMP) + if (priv->ah->sw_mgmt_crypto && + key->cipher == WLAN_CIPHER_SUITE_CCMP) key->flags |= IEEE80211_KEY_FLAG_SW_MGMT; ret = 0; } diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index a3b0ea90439d..1165f909ef04 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1782,9 +1782,10 @@ static int ath9k_set_key(struct ieee80211_hw *hw, key->hw_key_idx = ret; /* push IV and Michael MIC generation to stack */ key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; - if (key->alg == ALG_TKIP) + if (key->cipher == WLAN_CIPHER_SUITE_TKIP) key->flags |= IEEE80211_KEY_FLAG_GENERATE_MMIC; - if (sc->sc_ah->sw_mgmt_crypto && key->alg == ALG_CCMP) + if (sc->sc_ah->sw_mgmt_crypto && + key->cipher == WLAN_CIPHER_SUITE_CCMP) key->flags |= IEEE80211_KEY_FLAG_SW_MGMT; ret = 0; } diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index a3e2f2bfe3a7..a1186525c70d 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -3759,17 +3759,17 @@ static int b43_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, } err = -EINVAL; - switch (key->alg) { - case ALG_WEP: - if (key->keylen == WLAN_KEY_LEN_WEP40) - algorithm = B43_SEC_ALGO_WEP40; - else - algorithm = B43_SEC_ALGO_WEP104; + switch (key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + algorithm = B43_SEC_ALGO_WEP40; + break; + case WLAN_CIPHER_SUITE_WEP104: + algorithm = B43_SEC_ALGO_WEP104; break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: algorithm = B43_SEC_ALGO_TKIP; break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: algorithm = B43_SEC_ALGO_AES; break; default: diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-tx.c b/drivers/net/wireless/iwlwifi/iwl-agn-tx.c index 69155aa448fb..3fc982e87921 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn-tx.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn-tx.c @@ -470,8 +470,8 @@ static void iwlagn_tx_cmd_build_hwcrypto(struct iwl_priv *priv, { struct ieee80211_key_conf *keyconf = info->control.hw_key; - switch (keyconf->alg) { - case ALG_CCMP: + switch (keyconf->cipher) { + case WLAN_CIPHER_SUITE_CCMP: tx_cmd->sec_ctl = TX_CMD_SEC_CCM; memcpy(tx_cmd->key, keyconf->key, keyconf->keylen); if (info->flags & IEEE80211_TX_CTL_AMPDU) @@ -479,20 +479,20 @@ static void iwlagn_tx_cmd_build_hwcrypto(struct iwl_priv *priv, IWL_DEBUG_TX(priv, "tx_cmd with AES hwcrypto\n"); break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: tx_cmd->sec_ctl = TX_CMD_SEC_TKIP; ieee80211_get_tkip_key(keyconf, skb_frag, IEEE80211_TKIP_P2_KEY, tx_cmd->key); IWL_DEBUG_TX(priv, "tx_cmd with tkip hwcrypto\n"); break; - case ALG_WEP: + case WLAN_CIPHER_SUITE_WEP104: + tx_cmd->sec_ctl |= TX_CMD_SEC_KEY128; + /* fall through */ + case WLAN_CIPHER_SUITE_WEP40: tx_cmd->sec_ctl |= (TX_CMD_SEC_WEP | (keyconf->keyidx & TX_CMD_SEC_MSK) << TX_CMD_SEC_SHIFT); - if (keyconf->keylen == WEP_KEY_LEN_128) - tx_cmd->sec_ctl |= TX_CMD_SEC_KEY128; - memcpy(&tx_cmd->key[3], keyconf->key, keyconf->keylen); IWL_DEBUG_TX(priv, "Configuring packet for WEP encryption " @@ -500,7 +500,7 @@ static void iwlagn_tx_cmd_build_hwcrypto(struct iwl_priv *priv, break; default: - IWL_ERR(priv, "Unknown encode alg %d\n", keyconf->alg); + IWL_ERR(priv, "Unknown encode cipher %x\n", keyconf->cipher); break; } } diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index 43e078b87378..21a52ae05c0d 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -3389,7 +3389,9 @@ static int iwl_mac_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, * in 1X mode. * In legacy wep mode, we use another host command to the uCode. */ - if (key->alg == ALG_WEP && !sta && vif->type != NL80211_IFTYPE_AP) { + if ((key->cipher == WLAN_CIPHER_SUITE_WEP40 || + key->cipher == WLAN_CIPHER_SUITE_WEP104) && + !sta && vif->type != NL80211_IFTYPE_AP) { if (cmd == SET_KEY) is_default_wep_key = !priv->key_mapping_key; else diff --git a/drivers/net/wireless/iwlwifi/iwl-dev.h b/drivers/net/wireless/iwlwifi/iwl-dev.h index 599635547021..149619fb1c07 100644 --- a/drivers/net/wireless/iwlwifi/iwl-dev.h +++ b/drivers/net/wireless/iwlwifi/iwl-dev.h @@ -420,7 +420,7 @@ struct iwl_tid_data { }; struct iwl_hw_key { - enum ieee80211_key_alg alg; + u32 cipher; int keylen; u8 keyidx; u8 key[32]; diff --git a/drivers/net/wireless/iwlwifi/iwl-sta.c b/drivers/net/wireless/iwlwifi/iwl-sta.c index 7e0829be5e78..d5e8db37b86e 100644 --- a/drivers/net/wireless/iwlwifi/iwl-sta.c +++ b/drivers/net/wireless/iwlwifi/iwl-sta.c @@ -818,7 +818,7 @@ int iwl_set_default_wep_key(struct iwl_priv *priv, keyconf->flags &= ~IEEE80211_KEY_FLAG_GENERATE_IV; keyconf->hw_key_idx = HW_KEY_DEFAULT; - priv->stations[IWL_AP_ID].keyinfo.alg = ALG_WEP; + priv->stations[IWL_AP_ID].keyinfo.cipher = keyconf->cipher; priv->wep_keys[keyconf->keyidx].key_size = keyconf->keylen; memcpy(&priv->wep_keys[keyconf->keyidx].key, &keyconf->key, @@ -856,7 +856,7 @@ static int iwl_set_wep_dynamic_key_info(struct iwl_priv *priv, spin_lock_irqsave(&priv->sta_lock, flags); - priv->stations[sta_id].keyinfo.alg = keyconf->alg; + priv->stations[sta_id].keyinfo.cipher = keyconf->cipher; priv->stations[sta_id].keyinfo.keylen = keyconf->keylen; priv->stations[sta_id].keyinfo.keyidx = keyconf->keyidx; @@ -906,7 +906,7 @@ static int iwl_set_ccmp_dynamic_key_info(struct iwl_priv *priv, keyconf->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; spin_lock_irqsave(&priv->sta_lock, flags); - priv->stations[sta_id].keyinfo.alg = keyconf->alg; + priv->stations[sta_id].keyinfo.cipher = keyconf->cipher; priv->stations[sta_id].keyinfo.keylen = keyconf->keylen; memcpy(priv->stations[sta_id].keyinfo.key, keyconf->key, @@ -955,7 +955,7 @@ static int iwl_set_tkip_dynamic_key_info(struct iwl_priv *priv, spin_lock_irqsave(&priv->sta_lock, flags); - priv->stations[sta_id].keyinfo.alg = keyconf->alg; + priv->stations[sta_id].keyinfo.cipher = keyconf->cipher; priv->stations[sta_id].keyinfo.keylen = 16; if ((priv->stations[sta_id].sta.key.key_flags & STA_KEY_FLG_ENCRYPT_MSK) @@ -1090,24 +1090,26 @@ int iwl_set_dynamic_key(struct iwl_priv *priv, priv->key_mapping_key++; keyconf->hw_key_idx = HW_KEY_DYNAMIC; - switch (keyconf->alg) { - case ALG_CCMP: + switch (keyconf->cipher) { + case WLAN_CIPHER_SUITE_CCMP: ret = iwl_set_ccmp_dynamic_key_info(priv, keyconf, sta_id); break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: ret = iwl_set_tkip_dynamic_key_info(priv, keyconf, sta_id); break; - case ALG_WEP: + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: ret = iwl_set_wep_dynamic_key_info(priv, keyconf, sta_id); break; default: IWL_ERR(priv, - "Unknown alg: %s alg = %d\n", __func__, keyconf->alg); + "Unknown alg: %s cipher = %x\n", __func__, + keyconf->cipher); ret = -EINVAL; } - IWL_DEBUG_WEP(priv, "Set dynamic key: alg= %d len=%d idx=%d sta=%d ret=%d\n", - keyconf->alg, keyconf->keylen, keyconf->keyidx, + IWL_DEBUG_WEP(priv, "Set dynamic key: cipher=%x len=%d idx=%d sta=%d ret=%d\n", + keyconf->cipher, keyconf->keylen, keyconf->keyidx, sta_id, ret); return ret; diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index b14eaf91c7c2..faa2e0037e10 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -152,7 +152,7 @@ static int iwl3945_set_ccmp_dynamic_key_info(struct iwl_priv *priv, key_flags &= ~STA_KEY_FLG_INVALID; spin_lock_irqsave(&priv->sta_lock, flags); - priv->stations[sta_id].keyinfo.alg = keyconf->alg; + priv->stations[sta_id].keyinfo.cipher = keyconf->cipher; priv->stations[sta_id].keyinfo.keylen = keyconf->keylen; memcpy(priv->stations[sta_id].keyinfo.key, keyconf->key, keyconf->keylen); @@ -223,23 +223,25 @@ static int iwl3945_set_dynamic_key(struct iwl_priv *priv, keyconf->hw_key_idx = HW_KEY_DYNAMIC; - switch (keyconf->alg) { - case ALG_CCMP: + switch (keyconf->cipher) { + case WLAN_CIPHER_SUITE_CCMP: ret = iwl3945_set_ccmp_dynamic_key_info(priv, keyconf, sta_id); break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: ret = iwl3945_set_tkip_dynamic_key_info(priv, keyconf, sta_id); break; - case ALG_WEP: + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: ret = iwl3945_set_wep_dynamic_key_info(priv, keyconf, sta_id); break; default: - IWL_ERR(priv, "Unknown alg: %s alg = %d\n", __func__, keyconf->alg); + IWL_ERR(priv, "Unknown alg: %s alg=%x\n", __func__, + keyconf->cipher); ret = -EINVAL; } - IWL_DEBUG_WEP(priv, "Set dynamic key: alg= %d len=%d idx=%d sta=%d ret=%d\n", - keyconf->alg, keyconf->keylen, keyconf->keyidx, + IWL_DEBUG_WEP(priv, "Set dynamic key: alg=%x len=%d idx=%d sta=%d ret=%d\n", + keyconf->cipher, keyconf->keylen, keyconf->keyidx, sta_id, ret); return ret; @@ -255,10 +257,11 @@ static int iwl3945_remove_static_key(struct iwl_priv *priv) static int iwl3945_set_static_key(struct iwl_priv *priv, struct ieee80211_key_conf *key) { - if (key->alg == ALG_WEP) + if (key->cipher == WLAN_CIPHER_SUITE_WEP40 || + key->cipher == WLAN_CIPHER_SUITE_WEP104) return -EOPNOTSUPP; - IWL_ERR(priv, "Static key invalid: alg %d\n", key->alg); + IWL_ERR(priv, "Static key invalid: cipher %x\n", key->cipher); return -EINVAL; } @@ -370,23 +373,25 @@ static void iwl3945_build_tx_cmd_hwcrypto(struct iwl_priv *priv, struct iwl3945_tx_cmd *tx_cmd = (struct iwl3945_tx_cmd *)cmd->cmd.payload; struct iwl_hw_key *keyinfo = &priv->stations[sta_id].keyinfo; - switch (keyinfo->alg) { - case ALG_CCMP: + tx_cmd->sec_ctl = 0; + + switch (keyinfo->cipher) { + case WLAN_CIPHER_SUITE_CCMP: tx_cmd->sec_ctl = TX_CMD_SEC_CCM; memcpy(tx_cmd->key, keyinfo->key, keyinfo->keylen); IWL_DEBUG_TX(priv, "tx_cmd with AES hwcrypto\n"); break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: break; - case ALG_WEP: - tx_cmd->sec_ctl = TX_CMD_SEC_WEP | + case WLAN_CIPHER_SUITE_WEP104: + tx_cmd->sec_ctl |= TX_CMD_SEC_KEY128; + /* fall through */ + case WLAN_CIPHER_SUITE_WEP40: + tx_cmd->sec_ctl |= TX_CMD_SEC_WEP | (info->control.hw_key->hw_key_idx & TX_CMD_SEC_MSK) << TX_CMD_SEC_SHIFT; - if (keyinfo->keylen == 13) - tx_cmd->sec_ctl |= TX_CMD_SEC_KEY128; - memcpy(&tx_cmd->key[3], keyinfo->key, keyinfo->keylen); IWL_DEBUG_TX(priv, "Configuring packet for WEP encryption " @@ -394,7 +399,7 @@ static void iwl3945_build_tx_cmd_hwcrypto(struct iwl_priv *priv, break; default: - IWL_ERR(priv, "Unknown encode alg %d\n", keyinfo->alg); + IWL_ERR(priv, "Unknown encode cipher %x\n", keyinfo->cipher); break; } } diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c index 47db439b63bf..622d27b6d8f2 100644 --- a/drivers/net/wireless/p54/main.c +++ b/drivers/net/wireless/p54/main.c @@ -429,8 +429,8 @@ static int p54_set_key(struct ieee80211_hw *dev, enum set_key_cmd cmd, mutex_lock(&priv->conf_mutex); if (cmd == SET_KEY) { - switch (key->alg) { - case ALG_TKIP: + switch (key->cipher) { + case WLAN_CIPHER_SUITE_TKIP: if (!(priv->privacy_caps & (BR_DESC_PRIV_CAP_MICHAEL | BR_DESC_PRIV_CAP_TKIP))) { ret = -EOPNOTSUPP; @@ -439,7 +439,8 @@ static int p54_set_key(struct ieee80211_hw *dev, enum set_key_cmd cmd, key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; algo = P54_CRYPTO_TKIPMICHAEL; break; - case ALG_WEP: + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: if (!(priv->privacy_caps & BR_DESC_PRIV_CAP_WEP)) { ret = -EOPNOTSUPP; goto out_unlock; @@ -447,7 +448,7 @@ static int p54_set_key(struct ieee80211_hw *dev, enum set_key_cmd cmd, key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; algo = P54_CRYPTO_WEP; break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: if (!(priv->privacy_caps & BR_DESC_PRIV_CAP_AESCCMP)) { ret = -EOPNOTSUPP; goto out_unlock; diff --git a/drivers/net/wireless/p54/txrx.c b/drivers/net/wireless/p54/txrx.c index 427b46f558ed..e53f8cec7798 100644 --- a/drivers/net/wireless/p54/txrx.c +++ b/drivers/net/wireless/p54/txrx.c @@ -683,14 +683,15 @@ static void p54_tx_80211_header(struct p54_common *priv, struct sk_buff *skb, } } -static u8 p54_convert_algo(enum ieee80211_key_alg alg) +static u8 p54_convert_algo(u32 cipher) { - switch (alg) { - case ALG_WEP: + switch (cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: return P54_CRYPTO_WEP; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: return P54_CRYPTO_TKIPMICHAEL; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: return P54_CRYPTO_AESCCMP; default: return 0; @@ -731,7 +732,7 @@ int p54_tx_80211(struct ieee80211_hw *dev, struct sk_buff *skb) if (info->control.hw_key) { crypt_offset = ieee80211_get_hdrlen_from_skb(skb); - if (info->control.hw_key->alg == ALG_TKIP) { + if (info->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) { u8 *iv = (u8 *)(skb->data + crypt_offset); /* * The firmware excepts that the IV has to have @@ -827,10 +828,10 @@ int p54_tx_80211(struct ieee80211_hw *dev, struct sk_buff *skb) hdr->tries = ridx; txhdr->rts_rate_idx = 0; if (info->control.hw_key) { - txhdr->key_type = p54_convert_algo(info->control.hw_key->alg); + txhdr->key_type = p54_convert_algo(info->control.hw_key->cipher); txhdr->key_len = min((u8)16, info->control.hw_key->keylen); memcpy(txhdr->key, info->control.hw_key->key, txhdr->key_len); - if (info->control.hw_key->alg == ALG_TKIP) { + if (info->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) { /* reserve space for the MIC key */ len += 8; memcpy(skb_put(skb, 8), &(info->control.hw_key->key diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c index cdaf93f48263..97cf72f8bc12 100644 --- a/drivers/net/wireless/rt2x00/rt2500usb.c +++ b/drivers/net/wireless/rt2x00/rt2500usb.c @@ -355,7 +355,9 @@ static int rt2500usb_config_key(struct rt2x00_dev *rt2x00dev, * it is known that not work at least on some hardware. * SW crypto will be used in that case. */ - if (key->alg == ALG_WEP && key->keyidx != 0) + if ((key->cipher == WLAN_CIPHER_SUITE_WEP40 || + key->cipher == WLAN_CIPHER_SUITE_WEP104) && + key->keyidx != 0) return -EOPNOTSUPP; /* diff --git a/drivers/net/wireless/rt2x00/rt2x00crypto.c b/drivers/net/wireless/rt2x00/rt2x00crypto.c index 583dacd8d241..5e9074bf2b8e 100644 --- a/drivers/net/wireless/rt2x00/rt2x00crypto.c +++ b/drivers/net/wireless/rt2x00/rt2x00crypto.c @@ -31,15 +31,14 @@ enum cipher rt2x00crypto_key_to_cipher(struct ieee80211_key_conf *key) { - switch (key->alg) { - case ALG_WEP: - if (key->keylen == WLAN_KEY_LEN_WEP40) - return CIPHER_WEP64; - else - return CIPHER_WEP128; - case ALG_TKIP: + switch (key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + return CIPHER_WEP64; + case WLAN_CIPHER_SUITE_WEP104: + return CIPHER_WEP128; + case WLAN_CIPHER_SUITE_TKIP: return CIPHER_TKIP; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: return CIPHER_AES; default: return CIPHER_NONE; @@ -95,7 +94,7 @@ unsigned int rt2x00crypto_tx_overhead(struct rt2x00_dev *rt2x00dev, overhead += key->iv_len; if (!(key->flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) { - if (key->alg == ALG_TKIP) + if (key->cipher == WLAN_CIPHER_SUITE_TKIP) overhead += 8; } diff --git a/drivers/net/wireless/wl12xx/wl1251_main.c b/drivers/net/wireless/wl12xx/wl1251_main.c index 861a5f33761e..6d31c855fcfe 100644 --- a/drivers/net/wireless/wl12xx/wl1251_main.c +++ b/drivers/net/wireless/wl12xx/wl1251_main.c @@ -725,8 +725,9 @@ static int wl1251_set_key_type(struct wl1251 *wl, struct ieee80211_key_conf *mac80211_key, const u8 *addr) { - switch (mac80211_key->alg) { - case ALG_WEP: + switch (mac80211_key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: if (is_broadcast_ether_addr(addr)) key->key_type = KEY_WEP_DEFAULT; else @@ -734,7 +735,7 @@ static int wl1251_set_key_type(struct wl1251 *wl, mac80211_key->hw_key_idx = mac80211_key->keyidx; break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: if (is_broadcast_ether_addr(addr)) key->key_type = KEY_TKIP_MIC_GROUP; else @@ -742,7 +743,7 @@ static int wl1251_set_key_type(struct wl1251 *wl, mac80211_key->hw_key_idx = mac80211_key->keyidx; break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: if (is_broadcast_ether_addr(addr)) key->key_type = KEY_AES_GROUP; else @@ -750,7 +751,7 @@ static int wl1251_set_key_type(struct wl1251 *wl, mac80211_key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; break; default: - wl1251_error("Unknown key algo 0x%x", mac80211_key->alg); + wl1251_error("Unknown key cipher 0x%x", mac80211_key->cipher); return -EOPNOTSUPP; } @@ -783,7 +784,7 @@ static int wl1251_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, wl1251_debug(DEBUG_CRYPT, "CMD: 0x%x", cmd); wl1251_dump(DEBUG_CRYPT, "ADDR: ", addr, ETH_ALEN); wl1251_debug(DEBUG_CRYPT, "Key: algo:0x%x, id:%d, len:%d flags 0x%x", - key->alg, key->keyidx, key->keylen, key->flags); + key->cipher, key->keyidx, key->keylen, key->flags); wl1251_dump(DEBUG_CRYPT, "KEY: ", key->key, key->keylen); if (is_zero_ether_addr(addr)) { diff --git a/drivers/net/wireless/wl12xx/wl1251_tx.c b/drivers/net/wireless/wl12xx/wl1251_tx.c index a38ec199187a..6634b3e27cfc 100644 --- a/drivers/net/wireless/wl12xx/wl1251_tx.c +++ b/drivers/net/wireless/wl12xx/wl1251_tx.c @@ -189,7 +189,7 @@ static int wl1251_tx_send_packet(struct wl1251 *wl, struct sk_buff *skb, tx_hdr = (struct tx_double_buffer_desc *) skb->data; if (control->control.hw_key && - control->control.hw_key->alg == ALG_TKIP) { + control->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) { int hdrlen; __le16 fc; u16 length; @@ -399,7 +399,7 @@ static void wl1251_tx_packet_cb(struct wl1251 *wl, */ frame = skb_pull(skb, sizeof(struct tx_double_buffer_desc)); if (info->control.hw_key && - info->control.hw_key->alg == ALG_TKIP) { + info->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) { hdrlen = ieee80211_get_hdrlen_from_skb(skb); memmove(frame + WL1251_TKIP_IV_SPACE, frame, hdrlen); skb_pull(skb, WL1251_TKIP_IV_SPACE); diff --git a/drivers/net/wireless/wl12xx/wl1271_main.c b/drivers/net/wireless/wl12xx/wl1271_main.c index 9d68f0012f05..30194c0f36a9 100644 --- a/drivers/net/wireless/wl12xx/wl1271_main.c +++ b/drivers/net/wireless/wl12xx/wl1271_main.c @@ -1439,7 +1439,7 @@ static int wl1271_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, wl1271_debug(DEBUG_CRYPT, "CMD: 0x%x", cmd); wl1271_dump(DEBUG_CRYPT, "ADDR: ", addr, ETH_ALEN); wl1271_debug(DEBUG_CRYPT, "Key: algo:0x%x, id:%d, len:%d flags 0x%x", - key_conf->alg, key_conf->keyidx, + key_conf->cipher, key_conf->keyidx, key_conf->keylen, key_conf->flags); wl1271_dump(DEBUG_CRYPT, "KEY: ", key_conf->key, key_conf->keylen); @@ -1455,20 +1455,21 @@ static int wl1271_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, if (ret < 0) goto out_unlock; - switch (key_conf->alg) { - case ALG_WEP: + switch (key_conf->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: key_type = KEY_WEP; key_conf->hw_key_idx = key_conf->keyidx; break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: key_type = KEY_TKIP; key_conf->hw_key_idx = key_conf->keyidx; tx_seq_32 = WL1271_TX_SECURITY_HI32(wl->tx_security_seq); tx_seq_16 = WL1271_TX_SECURITY_LO16(wl->tx_security_seq); break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: key_type = KEY_AES; key_conf->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; @@ -1476,7 +1477,7 @@ static int wl1271_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, tx_seq_16 = WL1271_TX_SECURITY_LO16(wl->tx_security_seq); break; default: - wl1271_error("Unknown key algo 0x%x", key_conf->alg); + wl1271_error("Unknown key algo 0x%x", key_conf->cipher); ret = -EOPNOTSUPP; goto out_sleep; diff --git a/drivers/net/wireless/wl12xx/wl1271_tx.c b/drivers/net/wireless/wl12xx/wl1271_tx.c index c592cc2e9fe8..dc0b46c93c4b 100644 --- a/drivers/net/wireless/wl12xx/wl1271_tx.c +++ b/drivers/net/wireless/wl12xx/wl1271_tx.c @@ -193,7 +193,7 @@ static int wl1271_tx_frame(struct wl1271 *wl, struct sk_buff *skb) info = IEEE80211_SKB_CB(skb); if (info->control.hw_key && - info->control.hw_key->alg == ALG_TKIP) + info->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) extra = WL1271_TKIP_IV_SPACE; if (info->control.hw_key) { @@ -347,7 +347,7 @@ static void wl1271_tx_complete_packet(struct wl1271 *wl, /* remove TKIP header space if present */ if (info->control.hw_key && - info->control.hw_key->alg == ALG_TKIP) { + info->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) { int hdrlen = ieee80211_get_hdrlen_from_skb(skb); memmove(skb->data + WL1271_TKIP_IV_SPACE, skb->data, hdrlen); skb_pull(skb, WL1271_TKIP_IV_SPACE); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 871ed1de736a..914c747ac3a0 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -788,20 +788,6 @@ static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif) return false; } -/** - * enum ieee80211_key_alg - key algorithm - * @ALG_WEP: WEP40 or WEP104 - * @ALG_TKIP: TKIP - * @ALG_CCMP: CCMP (AES) - * @ALG_AES_CMAC: AES-128-CMAC - */ -enum ieee80211_key_alg { - ALG_WEP, - ALG_TKIP, - ALG_CCMP, - ALG_AES_CMAC, -}; - /** * enum ieee80211_key_flags - key flags * @@ -839,7 +825,7 @@ enum ieee80211_key_flags { * @hw_key_idx: To be set by the driver, this is the key index the driver * wants to be given when a frame is transmitted and needs to be * encrypted in hardware. - * @alg: The key algorithm. + * @cipher: The key's cipher suite selector. * @flags: key flags, see &enum ieee80211_key_flags. * @keyidx: the key index (0-3) * @keylen: key material length @@ -852,7 +838,7 @@ enum ieee80211_key_flags { * @iv_len: The IV length for this key type */ struct ieee80211_key_conf { - enum ieee80211_key_alg alg; + u32 cipher; u8 icv_len; u8 iv_len; u8 hw_key_idx; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 19c6146010b7..9a35d9e7efd7 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -116,7 +116,6 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, { struct ieee80211_sub_if_data *sdata; struct sta_info *sta = NULL; - enum ieee80211_key_alg alg; struct ieee80211_key *key; int err; @@ -125,31 +124,20 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); + /* reject WEP and TKIP keys if WEP failed to initialize */ switch (params->cipher) { case WLAN_CIPHER_SUITE_WEP40: - case WLAN_CIPHER_SUITE_WEP104: - alg = ALG_WEP; - break; case WLAN_CIPHER_SUITE_TKIP: - alg = ALG_TKIP; - break; - case WLAN_CIPHER_SUITE_CCMP: - alg = ALG_CCMP; - break; - case WLAN_CIPHER_SUITE_AES_CMAC: - alg = ALG_AES_CMAC; + case WLAN_CIPHER_SUITE_WEP104: + if (IS_ERR(sdata->local->wep_tx_tfm)) + return -EINVAL; break; default: - return -EINVAL; + break; } - /* reject WEP and TKIP keys if WEP failed to initialize */ - if ((alg == ALG_WEP || alg == ALG_TKIP) && - IS_ERR(sdata->local->wep_tx_tfm)) - return -EINVAL; - - key = ieee80211_key_alloc(alg, key_idx, params->key_len, params->key, - params->seq_len, params->seq); + key = ieee80211_key_alloc(params->cipher, key_idx, params->key_len, + params->key, params->seq_len, params->seq); if (IS_ERR(key)) return PTR_ERR(key); @@ -247,10 +235,10 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, memset(¶ms, 0, sizeof(params)); - switch (key->conf.alg) { - case ALG_TKIP: - params.cipher = WLAN_CIPHER_SUITE_TKIP; + params.cipher = key->conf.cipher; + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_TKIP: iv32 = key->u.tkip.tx.iv32; iv16 = key->u.tkip.tx.iv16; @@ -268,8 +256,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, params.seq = seq; params.seq_len = 6; break; - case ALG_CCMP: - params.cipher = WLAN_CIPHER_SUITE_CCMP; + case WLAN_CIPHER_SUITE_CCMP: seq[0] = key->u.ccmp.tx_pn[5]; seq[1] = key->u.ccmp.tx_pn[4]; seq[2] = key->u.ccmp.tx_pn[3]; @@ -279,14 +266,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, params.seq = seq; params.seq_len = 6; break; - case ALG_WEP: - if (key->conf.keylen == 5) - params.cipher = WLAN_CIPHER_SUITE_WEP40; - else - params.cipher = WLAN_CIPHER_SUITE_WEP104; - break; - case ALG_AES_CMAC: - params.cipher = WLAN_CIPHER_SUITE_AES_CMAC; + case WLAN_CIPHER_SUITE_AES_CMAC: seq[0] = key->u.aes_cmac.tx_pn[5]; seq[1] = key->u.aes_cmac.tx_pn[4]; seq[2] = key->u.aes_cmac.tx_pn[3]; diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index fa5e76e658ef..1647f8dc5cda 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -64,26 +64,13 @@ static ssize_t key_algorithm_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - char *alg; + char buf[15]; struct ieee80211_key *key = file->private_data; + u32 c = key->conf.cipher; - switch (key->conf.alg) { - case ALG_WEP: - alg = "WEP\n"; - break; - case ALG_TKIP: - alg = "TKIP\n"; - break; - case ALG_CCMP: - alg = "CCMP\n"; - break; - case ALG_AES_CMAC: - alg = "AES-128-CMAC\n"; - break; - default: - return 0; - } - return simple_read_from_buffer(userbuf, count, ppos, alg, strlen(alg)); + sprintf(buf, "%.2x-%.2x-%.2x:%d\n", + c >> 24, (c >> 16) & 0xff, (c >> 8) & 0xff, c & 0xff); + return simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf)); } KEY_OPS(algorithm); @@ -95,21 +82,22 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf, int len; struct ieee80211_key *key = file->private_data; - switch (key->conf.alg) { - case ALG_WEP: + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: len = scnprintf(buf, sizeof(buf), "\n"); break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: len = scnprintf(buf, sizeof(buf), "%08x %04x\n", key->u.tkip.tx.iv32, key->u.tkip.tx.iv16); break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: tpn = key->u.ccmp.tx_pn; len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]); break; - case ALG_AES_CMAC: + case WLAN_CIPHER_SUITE_AES_CMAC: tpn = key->u.aes_cmac.tx_pn; len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], @@ -130,11 +118,12 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, int i, len; const u8 *rpn; - switch (key->conf.alg) { - case ALG_WEP: + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: len = scnprintf(buf, sizeof(buf), "\n"); break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: for (i = 0; i < NUM_RX_DATA_QUEUES; i++) p += scnprintf(p, sizeof(buf)+buf-p, "%08x %04x\n", @@ -142,7 +131,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, key->u.tkip.rx[i].iv16); len = p - buf; break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++) { rpn = key->u.ccmp.rx_pn[i]; p += scnprintf(p, sizeof(buf)+buf-p, @@ -152,7 +141,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, } len = p - buf; break; - case ALG_AES_CMAC: + case WLAN_CIPHER_SUITE_AES_CMAC: rpn = key->u.aes_cmac.rx_pn; p += scnprintf(p, sizeof(buf)+buf-p, "%02x%02x%02x%02x%02x%02x\n", @@ -174,11 +163,11 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf, char buf[20]; int len; - switch (key->conf.alg) { - case ALG_CCMP: + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_CCMP: len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays); break; - case ALG_AES_CMAC: + case WLAN_CIPHER_SUITE_AES_CMAC: len = scnprintf(buf, sizeof(buf), "%u\n", key->u.aes_cmac.replays); break; @@ -196,8 +185,8 @@ static ssize_t key_icverrors_read(struct file *file, char __user *userbuf, char buf[20]; int len; - switch (key->conf.alg) { - case ALG_AES_CMAC: + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_AES_CMAC: len = scnprintf(buf, sizeof(buf), "%u\n", key->u.aes_cmac.icverrors); break; diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 5d5d2a974668..b5a95582d816 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -336,7 +336,7 @@ TRACE_EVENT(drv_set_key, LOCAL_ENTRY VIF_ENTRY STA_ENTRY - __field(enum ieee80211_key_alg, alg) + __field(u32, cipher) __field(u8, hw_key_idx) __field(u8, flags) __field(s8, keyidx) @@ -346,7 +346,7 @@ TRACE_EVENT(drv_set_key, LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; - __entry->alg = key->alg; + __entry->cipher = key->cipher; __entry->flags = key->flags; __entry->keyidx = key->keyidx; __entry->hw_key_idx = key->hw_key_idx; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index d6dbc8ea4ead..3203d1d3cd38 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -227,9 +227,7 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, } } -struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, - int idx, - size_t key_len, +struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, const u8 *key_data, size_t seq_len, const u8 *seq) { @@ -249,15 +247,16 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, key->conf.flags = 0; key->flags = 0; - key->conf.alg = alg; + key->conf.cipher = cipher; key->conf.keyidx = idx; key->conf.keylen = key_len; - switch (alg) { - case ALG_WEP: + switch (cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: key->conf.iv_len = WEP_IV_LEN; key->conf.icv_len = WEP_ICV_LEN; break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: key->conf.iv_len = TKIP_IV_LEN; key->conf.icv_len = TKIP_ICV_LEN; if (seq) { @@ -269,7 +268,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, } } break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: key->conf.iv_len = CCMP_HDR_LEN; key->conf.icv_len = CCMP_MIC_LEN; if (seq) { @@ -279,7 +278,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, seq[CCMP_PN_LEN - j - 1]; } break; - case ALG_AES_CMAC: + case WLAN_CIPHER_SUITE_AES_CMAC: key->conf.iv_len = 0; key->conf.icv_len = sizeof(struct ieee80211_mmie); if (seq) @@ -290,7 +289,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, memcpy(key->conf.key, key_data, key_len); INIT_LIST_HEAD(&key->list); - if (alg == ALG_CCMP) { + if (cipher == WLAN_CIPHER_SUITE_CCMP) { /* * Initialize AES key state here as an optimization so that * it does not need to be initialized for every packet. @@ -303,7 +302,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, } } - if (alg == ALG_AES_CMAC) { + if (cipher == WLAN_CIPHER_SUITE_AES_CMAC) { /* * Initialize AES key state here as an optimization so that * it does not need to be initialized for every packet. @@ -328,9 +327,9 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key) if (key->local) ieee80211_key_disable_hw_accel(key); - if (key->conf.alg == ALG_CCMP) + if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP) ieee80211_aes_key_free(key->u.ccmp.tfm); - if (key->conf.alg == ALG_AES_CMAC) + if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC) ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); if (key->local) ieee80211_debugfs_key_remove(key); diff --git a/net/mac80211/key.h b/net/mac80211/key.h index b665bbb7a471..53b5ce12536f 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -123,9 +123,7 @@ struct ieee80211_key { struct ieee80211_key_conf conf; }; -struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, - int idx, - size_t key_len, +struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, const u8 *key_data, size_t seq_len, const u8 *seq); /* diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index f24a0a1cff1a..ad2427021b26 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -961,7 +961,8 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) * pairwise or station-to-station keys, but for WEP we allow * using a key index as well. */ - if (rx->key && rx->key->conf.alg != ALG_WEP && + if (rx->key && rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP40 && + rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP104 && !is_multicast_ether_addr(hdr->addr1)) rx->key = NULL; } @@ -977,8 +978,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; /* the hdr variable is invalid now! */ - switch (rx->key->conf.alg) { - case ALG_WEP: + switch (rx->key->conf.cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: /* Check for weak IVs if possible */ if (rx->sta && ieee80211_is_data(fc) && (!(status->flag & RX_FLAG_IV_STRIPPED) || @@ -988,13 +990,13 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) result = ieee80211_crypto_wep_decrypt(rx); break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: result = ieee80211_crypto_tkip_decrypt(rx); break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: result = ieee80211_crypto_ccmp_decrypt(rx); break; - case ALG_AES_CMAC: + case WLAN_CIPHER_SUITE_AES_CMAC: result = ieee80211_crypto_aes_cmac_decrypt(rx); break; } @@ -1291,7 +1293,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) /* This is the first fragment of a new frame. */ entry = ieee80211_reassemble_add(rx->sdata, frag, seq, rx->queue, &(rx->skb)); - if (rx->key && rx->key->conf.alg == ALG_CCMP && + if (rx->key && rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP && ieee80211_has_protected(fc)) { int queue = ieee80211_is_mgmt(fc) ? NUM_RX_DATA_QUEUES : rx->queue; @@ -1320,7 +1322,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) int i; u8 pn[CCMP_PN_LEN], *rpn; int queue; - if (!rx->key || rx->key->conf.alg != ALG_CCMP) + if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP) return RX_DROP_UNUSABLE; memcpy(pn, entry->last_pn, CCMP_PN_LEN); for (i = CCMP_PN_LEN - 1; i >= 0; i--) { diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index c54db966926b..bc4fefc91663 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -543,15 +543,16 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) tx->key->tx_rx_count++; /* TODO: add threshold stuff again */ - switch (tx->key->conf.alg) { - case ALG_WEP: + switch (tx->key->conf.cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: if (ieee80211_is_auth(hdr->frame_control)) break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: if (!ieee80211_is_data_present(hdr->frame_control)) tx->key = NULL; break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: if (!ieee80211_is_data_present(hdr->frame_control) && !ieee80211_use_mfp(hdr->frame_control, tx->sta, tx->skb)) @@ -561,7 +562,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) IEEE80211_KEY_FLAG_SW_MGMT) && ieee80211_is_mgmt(hdr->frame_control); break; - case ALG_AES_CMAC: + case WLAN_CIPHER_SUITE_AES_CMAC: if (!ieee80211_is_mgmt(hdr->frame_control)) tx->key = NULL; break; @@ -949,14 +950,15 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx) if (!tx->key) return TX_CONTINUE; - switch (tx->key->conf.alg) { - case ALG_WEP: + switch (tx->key->conf.cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: return ieee80211_crypto_wep_encrypt(tx); - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: return ieee80211_crypto_tkip_encrypt(tx); - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: return ieee80211_crypto_ccmp_encrypt(tx); - case ALG_AES_CMAC: + case WLAN_CIPHER_SUITE_AES_CMAC: return ieee80211_crypto_aes_cmac_encrypt(tx); } diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index 9ebc8d8a1f5b..f27484c22b9f 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -240,7 +240,7 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local, keyidx = skb->data[hdrlen + 3] >> 6; - if (!key || keyidx != key->conf.keyidx || key->conf.alg != ALG_WEP) + if (!key || keyidx != key->conf.keyidx) return -1; klen = 3 + key->conf.keylen; diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 8d59d27d887e..b08ad94b56da 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -36,8 +36,8 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) int tail; hdr = (struct ieee80211_hdr *)skb->data; - if (!tx->key || tx->key->conf.alg != ALG_TKIP || skb->len < 24 || - !ieee80211_is_data_present(hdr->frame_control)) + if (!tx->key || tx->key->conf.cipher != WLAN_CIPHER_SUITE_TKIP || + skb->len < 24 || !ieee80211_is_data_present(hdr->frame_control)) return TX_CONTINUE; hdrlen = ieee80211_hdrlen(hdr->frame_control); @@ -94,7 +94,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) if (status->flag & RX_FLAG_MMIC_STRIPPED) return RX_CONTINUE; - if (!rx->key || rx->key->conf.alg != ALG_TKIP || + if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_TKIP || !ieee80211_has_protected(hdr->frame_control) || !ieee80211_is_data_present(hdr->frame_control)) return RX_CONTINUE; -- cgit v1.2.3 From bfb564e7391340638afe4ad67744a8f3858e7566 Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Wed, 4 Aug 2010 06:15:52 +0000 Subject: core: Factor out flow calculation from get_rps_cpu Factor out flow calculation code from get_rps_cpu, since other functions can use the same code. Revisions: v2 (Ben): Separate flow calcuation out and use in select queue. v3 (Arnd): Don't re-implement MIN. v4 (Changli): skb->data points to ethernet header in macvtap, and make a fast path. Tested macvtap with this patch. v5 (Changli): - Cache skb->rxhash in skb_get_rxhash - macvtap may not have pow(2) queues, so change code for queue selection. (Arnd): - Use first available queue if all fails. Signed-off-by: Krishna Kumar Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 +++++ net/core/dev.c | 106 +++++++++++++++++++++++++++++-------------------- 2 files changed, 71 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 77eb60d2b496..d8050382b189 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -558,6 +558,15 @@ extern unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, unsigned int to, struct ts_config *config, struct ts_state *state); +extern __u32 __skb_get_rxhash(struct sk_buff *skb); +static inline __u32 skb_get_rxhash(struct sk_buff *skb) +{ + if (!skb->rxhash) + skb->rxhash = __skb_get_rxhash(skb); + + return skb->rxhash; +} + #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { diff --git a/net/core/dev.c b/net/core/dev.c index 1ae654391442..586a11cb4398 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2259,69 +2259,41 @@ static inline void ____napi_schedule(struct softnet_data *sd, __raise_softirq_irqoff(NET_RX_SOFTIRQ); } -#ifdef CONFIG_RPS - -/* One global table that all flow-based protocols share. */ -struct rps_sock_flow_table *rps_sock_flow_table __read_mostly; -EXPORT_SYMBOL(rps_sock_flow_table); - /* - * get_rps_cpu is called from netif_receive_skb and returns the target - * CPU from the RPS map of the receiving queue for a given skb. - * rcu_read_lock must be held on entry. + * __skb_get_rxhash: calculate a flow hash based on src/dst addresses + * and src/dst port numbers. Returns a non-zero hash number on success + * and 0 on failure. */ -static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, - struct rps_dev_flow **rflowp) +__u32 __skb_get_rxhash(struct sk_buff *skb) { + int nhoff, hash = 0; struct ipv6hdr *ip6; struct iphdr *ip; - struct netdev_rx_queue *rxqueue; - struct rps_map *map; - struct rps_dev_flow_table *flow_table; - struct rps_sock_flow_table *sock_flow_table; - int cpu = -1; u8 ip_proto; - u16 tcpu; u32 addr1, addr2, ihl; union { u32 v32; u16 v16[2]; } ports; - if (skb_rx_queue_recorded(skb)) { - u16 index = skb_get_rx_queue(skb); - if (unlikely(index >= dev->num_rx_queues)) { - WARN_ONCE(dev->num_rx_queues > 1, "%s received packet " - "on queue %u, but number of RX queues is %u\n", - dev->name, index, dev->num_rx_queues); - goto done; - } - rxqueue = dev->_rx + index; - } else - rxqueue = dev->_rx; - - if (!rxqueue->rps_map && !rxqueue->rps_flow_table) - goto done; - - if (skb->rxhash) - goto got_hash; /* Skip hash computation on packet header */ + nhoff = skb_network_offset(skb); switch (skb->protocol) { case __constant_htons(ETH_P_IP): - if (!pskb_may_pull(skb, sizeof(*ip))) + if (!pskb_may_pull(skb, sizeof(*ip) + nhoff)) goto done; - ip = (struct iphdr *) skb->data; + ip = (struct iphdr *) skb->data + nhoff; ip_proto = ip->protocol; addr1 = (__force u32) ip->saddr; addr2 = (__force u32) ip->daddr; ihl = ip->ihl; break; case __constant_htons(ETH_P_IPV6): - if (!pskb_may_pull(skb, sizeof(*ip6))) + if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff)) goto done; - ip6 = (struct ipv6hdr *) skb->data; + ip6 = (struct ipv6hdr *) skb->data + nhoff; ip_proto = ip6->nexthdr; addr1 = (__force u32) ip6->saddr.s6_addr32[3]; addr2 = (__force u32) ip6->daddr.s6_addr32[3]; @@ -2330,6 +2302,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, default: goto done; } + switch (ip_proto) { case IPPROTO_TCP: case IPPROTO_UDP: @@ -2338,8 +2311,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, case IPPROTO_AH: case IPPROTO_SCTP: case IPPROTO_UDPLITE: - if (pskb_may_pull(skb, (ihl * 4) + 4)) { - ports.v32 = * (__force u32 *) (skb->data + (ihl * 4)); + if (pskb_may_pull(skb, (ihl * 4) + 4 + nhoff)) { + ports.v32 = * (__force u32 *) (skb->data + nhoff + + (ihl * 4)); if (ports.v16[1] < ports.v16[0]) swap(ports.v16[0], ports.v16[1]); break; @@ -2352,11 +2326,55 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, /* get a consistent hash (same value on both flow directions) */ if (addr2 < addr1) swap(addr1, addr2); - skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd); - if (!skb->rxhash) - skb->rxhash = 1; -got_hash: + hash = jhash_3words(addr1, addr2, ports.v32, hashrnd); + if (!hash) + hash = 1; + +done: + return hash; +} +EXPORT_SYMBOL(__skb_get_rxhash); + +#ifdef CONFIG_RPS + +/* One global table that all flow-based protocols share. */ +struct rps_sock_flow_table *rps_sock_flow_table __read_mostly; +EXPORT_SYMBOL(rps_sock_flow_table); + +/* + * get_rps_cpu is called from netif_receive_skb and returns the target + * CPU from the RPS map of the receiving queue for a given skb. + * rcu_read_lock must be held on entry. + */ +static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, + struct rps_dev_flow **rflowp) +{ + struct netdev_rx_queue *rxqueue; + struct rps_map *map; + struct rps_dev_flow_table *flow_table; + struct rps_sock_flow_table *sock_flow_table; + int cpu = -1; + u16 tcpu; + + if (skb_rx_queue_recorded(skb)) { + u16 index = skb_get_rx_queue(skb); + if (unlikely(index >= dev->num_rx_queues)) { + WARN_ONCE(dev->num_rx_queues > 1, "%s received packet " + "on queue %u, but number of RX queues is %u\n", + dev->name, index, dev->num_rx_queues); + goto done; + } + rxqueue = dev->_rx + index; + } else + rxqueue = dev->_rx; + + if (!rxqueue->rps_map && !rxqueue->rps_flow_table) + goto done; + + if (!skb_get_rxhash(skb)) + goto done; + flow_table = rcu_dereference(rxqueue->rps_flow_table); sock_flow_table = rcu_dereference(rps_sock_flow_table); if (flow_table && sock_flow_table) { -- cgit v1.2.3 From 1565c7c1c4c8e931bdba66abc8aa6f141a406872 Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Wed, 4 Aug 2010 06:15:59 +0000 Subject: macvtap: Implement multiqueue for macvtap driver Implement multiqueue facility for macvtap driver. The idea is that a macvtap device can be opened multiple times and the fd's can be used to register eg, as backend for vhost. Signed-off-by: Krishna Kumar Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 99 ++++++++++++++++++++++++++++++++++++++-------- include/linux/if_macvlan.h | 9 ++++- 2 files changed, 90 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 3b1c54a9c6ef..42567279843e 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -84,26 +84,45 @@ static const struct proto_ops macvtap_socket_ops; static DEFINE_SPINLOCK(macvtap_lock); /* - * Choose the next free queue, for now there is only one + * get_slot: return a [unused/occupied] slot in vlan->taps[]: + * - if 'q' is NULL, return the first empty slot; + * - otherwise, return the slot this pointer occupies. */ +static int get_slot(struct macvlan_dev *vlan, struct macvtap_queue *q) +{ + int i; + + for (i = 0; i < MAX_MACVTAP_QUEUES; i++) { + if (rcu_dereference(vlan->taps[i]) == q) + return i; + } + + /* Should never happen */ + BUG_ON(1); +} + static int macvtap_set_queue(struct net_device *dev, struct file *file, struct macvtap_queue *q) { struct macvlan_dev *vlan = netdev_priv(dev); + int index; int err = -EBUSY; spin_lock(&macvtap_lock); - if (rcu_dereference(vlan->tap)) + if (vlan->numvtaps == MAX_MACVTAP_QUEUES) goto out; err = 0; + index = get_slot(vlan, NULL); rcu_assign_pointer(q->vlan, vlan); - rcu_assign_pointer(vlan->tap, q); + rcu_assign_pointer(vlan->taps[index], q); sock_hold(&q->sk); q->file = file; file->private_data = q; + vlan->numvtaps++; + out: spin_unlock(&macvtap_lock); return err; @@ -124,9 +143,12 @@ static void macvtap_put_queue(struct macvtap_queue *q) spin_lock(&macvtap_lock); vlan = rcu_dereference(q->vlan); if (vlan) { - rcu_assign_pointer(vlan->tap, NULL); + int index = get_slot(vlan, q); + + rcu_assign_pointer(vlan->taps[index], NULL); rcu_assign_pointer(q->vlan, NULL); sock_put(&q->sk); + --vlan->numvtaps; } spin_unlock(&macvtap_lock); @@ -136,39 +158,82 @@ static void macvtap_put_queue(struct macvtap_queue *q) } /* - * Since we only support one queue, just dereference the pointer. + * Select a queue based on the rxq of the device on which this packet + * arrived. If the incoming device is not mq, calculate a flow hash + * to select a queue. If all fails, find the first available queue. + * Cache vlan->numvtaps since it can become zero during the execution + * of this function. */ static struct macvtap_queue *macvtap_get_queue(struct net_device *dev, struct sk_buff *skb) { struct macvlan_dev *vlan = netdev_priv(dev); + struct macvtap_queue *tap = NULL; + int numvtaps = vlan->numvtaps; + __u32 rxq; + + if (!numvtaps) + goto out; + + if (likely(skb_rx_queue_recorded(skb))) { + rxq = skb_get_rx_queue(skb); + + while (unlikely(rxq >= numvtaps)) + rxq -= numvtaps; + + tap = rcu_dereference(vlan->taps[rxq]); + if (tap) + goto out; + } + + /* Check if we can use flow to select a queue */ + rxq = skb_get_rxhash(skb); + if (rxq) { + tap = rcu_dereference(vlan->taps[rxq % numvtaps]); + if (tap) + goto out; + } - return rcu_dereference(vlan->tap); + /* Everything failed - find first available queue */ + for (rxq = 0; rxq < MAX_MACVTAP_QUEUES; rxq++) { + tap = rcu_dereference(vlan->taps[rxq]); + if (tap) + break; + } + +out: + return tap; } /* * The net_device is going away, give up the reference - * that it holds on the queue (all the queues one day) - * and safely set the pointer from the queues to NULL. + * that it holds on all queues and safely set the pointer + * from the queues to NULL. */ static void macvtap_del_queues(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); - struct macvtap_queue *q; + struct macvtap_queue *q, *qlist[MAX_MACVTAP_QUEUES]; + int i, j = 0; + /* macvtap_put_queue can free some slots, so go through all slots */ spin_lock(&macvtap_lock); - q = rcu_dereference(vlan->tap); - if (!q) { - spin_unlock(&macvtap_lock); - return; + for (i = 0; i < MAX_MACVTAP_QUEUES && vlan->numvtaps; i++) { + q = rcu_dereference(vlan->taps[i]); + if (q) { + qlist[j++] = q; + rcu_assign_pointer(vlan->taps[i], NULL); + rcu_assign_pointer(q->vlan, NULL); + vlan->numvtaps--; + } } - - rcu_assign_pointer(vlan->tap, NULL); - rcu_assign_pointer(q->vlan, NULL); + BUG_ON(vlan->numvtaps != 0); spin_unlock(&macvtap_lock); synchronize_rcu(); - sock_put(&q->sk); + + for (--j; j >= 0; j--) + sock_put(&qlist[j]->sk); } /* diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 35280b302290..8a2fd66a8b5f 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -40,6 +40,12 @@ struct macvlan_rx_stats { unsigned long rx_errors; }; +/* + * Maximum times a macvtap device can be opened. This can be used to + * configure the number of receive queue, e.g. for multiqueue virtio. + */ +#define MAX_MACVTAP_QUEUES (NR_CPUS < 16 ? NR_CPUS : 16) + struct macvlan_dev { struct net_device *dev; struct list_head list; @@ -50,7 +56,8 @@ struct macvlan_dev { enum macvlan_mode mode; int (*receive)(struct sk_buff *skb); int (*forward)(struct net_device *dev, struct sk_buff *skb); - struct macvtap_queue *tap; + struct macvtap_queue *taps[MAX_MACVTAP_QUEUES]; + int numvtaps; }; static inline void macvlan_count_rx(const struct macvlan_dev *vlan, -- cgit v1.2.3 From f1b499f029c5dde85d46a8811353c62f29157541 Mon Sep 17 00:00:00 2001 From: John Kacur Date: Thu, 5 Aug 2010 17:10:53 +0200 Subject: lockdep: Remove __debug_show_held_locks There is no longer any functional difference between __debug_show_held_locks() and debug_show_held_locks(), so remove the former. Signed-off-by: John Kacur Cc: Peter Zijlstra LKML-Reference: <1281021054-4228-1-git-send-email-jkacur@redhat.com> Signed-off-by: Ingo Molnar --- include/linux/debug_locks.h | 5 ----- kernel/hung_task.c | 2 +- kernel/lockdep.c | 8 +------- 3 files changed, 2 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 29b3ce3f2a1d..2833452ea01c 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -49,7 +49,6 @@ struct task_struct; #ifdef CONFIG_LOCKDEP extern void debug_show_all_locks(void); -extern void __debug_show_held_locks(struct task_struct *task); extern void debug_show_held_locks(struct task_struct *task); extern void debug_check_no_locks_freed(const void *from, unsigned long len); extern void debug_check_no_locks_held(struct task_struct *task); @@ -58,10 +57,6 @@ static inline void debug_show_all_locks(void) { } -static inline void __debug_show_held_locks(struct task_struct *task) -{ -} - static inline void debug_show_held_locks(struct task_struct *task) { } diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 0c642d51aac2..bca942379559 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -98,7 +98,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" " disables this message.\n"); sched_show_task(t); - __debug_show_held_locks(t); + debug_show_held_locks(t); touch_nmi_watchdog(); diff --git a/kernel/lockdep.c b/kernel/lockdep.c index f2852a510232..84baa71cfda5 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3775,7 +3775,7 @@ EXPORT_SYMBOL_GPL(debug_show_all_locks); * Careful: only use this function if you are sure that * the task cannot run in parallel! */ -void __debug_show_held_locks(struct task_struct *task) +void debug_show_held_locks(struct task_struct *task) { if (unlikely(!debug_locks)) { printk("INFO: lockdep is turned off.\n"); @@ -3783,12 +3783,6 @@ void __debug_show_held_locks(struct task_struct *task) } lockdep_print_held_locks(task); } -EXPORT_SYMBOL_GPL(__debug_show_held_locks); - -void debug_show_held_locks(struct task_struct *task) -{ - __debug_show_held_locks(task); -} EXPORT_SYMBOL_GPL(debug_show_held_locks); void lockdep_sys_exit(void) -- cgit v1.2.3 From 3d529946ce292336793b85198bd59afc75e16bd4 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 18 Aug 2010 09:48:31 +1000 Subject: Fix spelling mistake in jhash Fix a spelling mistake. Signed-off-by: Anton Blanchard Signed-off-by: Jiri Kosina --- include/linux/jhash.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jhash.h b/include/linux/jhash.h index 2a2f99fbcb16..ced1159fa4f2 100644 --- a/include/linux/jhash.h +++ b/include/linux/jhash.h @@ -116,7 +116,7 @@ static inline u32 jhash2(const u32 *k, u32 length, u32 initval) /* A special ultra-optimized versions that knows they are hashing exactly * 3, 2 or 1 word(s). * - * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally + * NOTE: In particular the "c += length; __jhash_mix(a,b,c);" normally * done at the end is not done here. */ static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) -- cgit v1.2.3 From 70791ce9ba68a5921c9905ef05d23f62a90bc10c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 29 Jun 2010 19:34:05 +0200 Subject: perf: Generalize callchain_store() callchain_store() is the same on every archs, inline it in perf_event.h and rename it to perf_callchain_store() to avoid any collision. This removes repetitive code. Signed-off-by: Frederic Weisbecker Acked-by: Paul Mackerras Tested-by: Will Deacon Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: David Miller Cc: Paul Mundt Cc: Borislav Petkov --- arch/arm/kernel/perf_event.c | 15 ++++---------- arch/powerpc/kernel/perf_callchain.c | 40 +++++++++++++----------------------- arch/sh/kernel/perf_callchain.c | 11 +++------- arch/sparc/kernel/perf_event.c | 26 +++++++++-------------- arch/x86/kernel/cpu/perf_event.c | 20 +++++++----------- include/linux/perf_event.h | 7 +++++++ 6 files changed, 45 insertions(+), 74 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index fdcb0be47df1..a07c3b1955f0 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -3001,13 +3001,6 @@ arch_initcall(init_hw_perf_events); /* * Callchain handling code. */ -static inline void -callchain_store(struct perf_callchain_entry *entry, - u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} /* * The registers we're interested in are at the end of the variable @@ -3039,7 +3032,7 @@ user_backtrace(struct frame_tail *tail, if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail))) return NULL; - callchain_store(entry, buftail.lr); + perf_callchain_store(entry, buftail.lr); /* * Frame pointers should strictly progress back up the stack @@ -3057,7 +3050,7 @@ perf_callchain_user(struct pt_regs *regs, { struct frame_tail *tail; - callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, PERF_CONTEXT_USER); if (!user_mode(regs)) regs = task_pt_regs(current); @@ -3078,7 +3071,7 @@ callchain_trace(struct stackframe *fr, void *data) { struct perf_callchain_entry *entry = data; - callchain_store(entry, fr->pc); + perf_callchain_store(entry, fr->pc); return 0; } @@ -3088,7 +3081,7 @@ perf_callchain_kernel(struct pt_regs *regs, { struct stackframe fr; - callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); fr.fp = regs->ARM_fp; fr.sp = regs->ARM_sp; fr.lr = regs->ARM_lr; diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index 95ad9dad298e..a286c2e5a3ea 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c @@ -23,18 +23,6 @@ #include "ppc32.h" #endif -/* - * Store another value in a callchain_entry. - */ -static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - unsigned int nr = entry->nr; - - if (nr < PERF_MAX_STACK_DEPTH) { - entry->ip[nr] = ip; - entry->nr = nr + 1; - } -} /* * Is sp valid as the address of the next kernel stack frame after prev_sp? @@ -69,8 +57,8 @@ static void perf_callchain_kernel(struct pt_regs *regs, lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->nip); + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_store(entry, regs->nip); if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) return; @@ -89,7 +77,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; level = 0; - callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); } else { if (level == 0) @@ -111,7 +99,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, ++level; } - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); if (!valid_next_sp(next_sp, sp)) return; sp = next_sp; @@ -246,8 +234,8 @@ static void perf_callchain_user_64(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); for (;;) { fp = (unsigned long __user *) sp; @@ -276,14 +264,14 @@ static void perf_callchain_user_64(struct pt_regs *regs, read_user_stack_64(&uregs[PT_R1], &sp)) return; level = 0; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); continue; } if (level == 0) next_ip = lr; - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); ++level; sp = next_sp; } @@ -447,8 +435,8 @@ static void perf_callchain_user_32(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); while (entry->nr < PERF_MAX_STACK_DEPTH) { fp = (unsigned int __user *) (unsigned long) sp; @@ -470,14 +458,14 @@ static void perf_callchain_user_32(struct pt_regs *regs, read_user_stack_32(&uregs[PT_R1], &sp)) return; level = 0; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); continue; } if (level == 0) next_ip = lr; - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); ++level; sp = next_sp; } diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c index 1d6dbce7a3bc..00143f3dd196 100644 --- a/arch/sh/kernel/perf_callchain.c +++ b/arch/sh/kernel/perf_callchain.c @@ -14,11 +14,6 @@ #include #include -static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} static void callchain_warning(void *data, char *msg) { @@ -39,7 +34,7 @@ static void callchain_address(void *data, unsigned long addr, int reliable) struct perf_callchain_entry *entry = data; if (reliable) - callchain_store(entry, addr); + perf_callchain_store(entry, addr); } static const struct stacktrace_ops callchain_ops = { @@ -52,8 +47,8 @@ static const struct stacktrace_ops callchain_ops = { static void perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) { - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->pc); + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_store(entry, regs->pc); unwind_stack(NULL, regs, NULL, &callchain_ops, entry); } diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 357ced3c33ff..2a95a9079862 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1283,12 +1283,6 @@ void __init init_hw_perf_events(void) register_die_notifier(&perf_event_nmi_notifier); } -static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} - static void perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) { @@ -1297,8 +1291,8 @@ static void perf_callchain_kernel(struct pt_regs *regs, int graph = 0; #endif - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->tpc); + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_store(entry, regs->tpc); ksp = regs->u_regs[UREG_I6]; fp = ksp + STACK_BIAS; @@ -1322,13 +1316,13 @@ static void perf_callchain_kernel(struct pt_regs *regs, pc = sf->callers_pc; fp = (unsigned long)sf->fp + STACK_BIAS; } - callchain_store(entry, pc); + perf_callchain_store(entry, pc); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if ((pc + 8UL) == (unsigned long) &return_to_handler) { int index = current->curr_ret_stack; if (current->ret_stack && index >= graph) { pc = current->ret_stack[index - graph].ret; - callchain_store(entry, pc); + perf_callchain_store(entry, pc); graph++; } } @@ -1341,8 +1335,8 @@ static void perf_callchain_user_64(struct pt_regs *regs, { unsigned long ufp; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, regs->tpc); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, regs->tpc); ufp = regs->u_regs[UREG_I6] + STACK_BIAS; do { @@ -1355,7 +1349,7 @@ static void perf_callchain_user_64(struct pt_regs *regs, pc = sf.callers_pc; ufp = (unsigned long)sf.fp + STACK_BIAS; - callchain_store(entry, pc); + perf_callchain_store(entry, pc); } while (entry->nr < PERF_MAX_STACK_DEPTH); } @@ -1364,8 +1358,8 @@ static void perf_callchain_user_32(struct pt_regs *regs, { unsigned long ufp; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, regs->tpc); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, regs->tpc); ufp = regs->u_regs[UREG_I6] & 0xffffffffUL; do { @@ -1378,7 +1372,7 @@ static void perf_callchain_user_32(struct pt_regs *regs, pc = sf.callers_pc; ufp = (unsigned long)sf.fp; - callchain_store(entry, pc); + perf_callchain_store(entry, pc); } while (entry->nr < PERF_MAX_STACK_DEPTH); } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 4a4d191f9492..8af28caeafc1 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1571,12 +1571,6 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) * callchain support */ -static inline -void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); @@ -1602,7 +1596,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) { struct perf_callchain_entry *entry = data; - callchain_store(entry, addr); + perf_callchain_store(entry, addr); } static const struct stacktrace_ops backtrace_ops = { @@ -1616,8 +1610,8 @@ static const struct stacktrace_ops backtrace_ops = { static void perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) { - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->ip); + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_store(entry, regs->ip); dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); } @@ -1646,7 +1640,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) if (fp < compat_ptr(regs->sp)) break; - callchain_store(entry, frame.return_address); + perf_callchain_store(entry, frame.return_address); fp = compat_ptr(frame.next_frame); } return 1; @@ -1670,8 +1664,8 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) fp = (void __user *)regs->bp; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, regs->ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, regs->ip); if (perf_callchain_user32(regs, entry)) return; @@ -1688,7 +1682,7 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) if ((unsigned long)fp < regs->sp) break; - callchain_store(entry, frame.return_address); + perf_callchain_store(entry, frame.return_address); fp = frame.next_frame; } } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 937495c25073..358880404b42 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -978,6 +978,13 @@ extern void perf_event_fork(struct task_struct *tsk); extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); +static inline void +perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) +{ + if (entry->nr < PERF_MAX_STACK_DEPTH) + entry->ip[entry->nr++] = ip; +} + extern int sysctl_perf_event_paranoid; extern int sysctl_perf_event_mlock; extern int sysctl_perf_event_sample_rate; -- cgit v1.2.3 From 56962b4449af34070bb1994621ef4f0265eed4d8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 30 Jun 2010 23:03:51 +0200 Subject: perf: Generalize some arch callchain code - Most archs use one callchain buffer per cpu, except x86 that needs to deal with NMIs. Provide a default perf_callchain_buffer() implementation that x86 overrides. - Centralize all the kernel/user regs handling and invoke new arch handlers from there: perf_callchain_user() / perf_callchain_kernel() That avoid all the user_mode(), current->mm checks and so... - Invert some parameters in perf_callchain_*() helpers: entry to the left, regs to the right, following the traditional (dst, src). Signed-off-by: Frederic Weisbecker Acked-by: Paul Mackerras Tested-by: Will Deacon Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: David Miller Cc: Paul Mundt Cc: Borislav Petkov --- arch/arm/kernel/perf_event.c | 43 +++---------------------------- arch/powerpc/kernel/perf_callchain.c | 49 +++++++++++------------------------- arch/sh/kernel/perf_callchain.c | 37 ++------------------------- arch/sparc/kernel/perf_event.c | 46 +++++++++++---------------------- arch/x86/kernel/cpu/perf_event.c | 45 ++++++--------------------------- include/linux/perf_event.h | 10 +++++++- kernel/perf_event.c | 40 +++++++++++++++++++++++++++-- 7 files changed, 90 insertions(+), 180 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index a07c3b1955f0..0e3bbdb15927 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -3044,17 +3044,13 @@ user_backtrace(struct frame_tail *tail, return buftail.fp - 1; } -static void -perf_callchain_user(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct frame_tail *tail; perf_callchain_store(entry, PERF_CONTEXT_USER); - if (!user_mode(regs)) - regs = task_pt_regs(current); - tail = (struct frame_tail *)regs->ARM_fp - 1; while (tail && !((unsigned long)tail & 0x3)) @@ -3075,9 +3071,8 @@ callchain_trace(struct stackframe *fr, return 0; } -static void -perf_callchain_kernel(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct stackframe fr; @@ -3088,33 +3083,3 @@ perf_callchain_kernel(struct pt_regs *regs, fr.pc = regs->ARM_pc; walk_stackframe(&fr, callchain_trace, entry); } - -static void -perf_do_callchain(struct pt_regs *regs, - struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - if (!is_user) - perf_callchain_kernel(regs, entry); - - if (current->mm) - perf_callchain_user(regs, entry); -} - -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); - -struct perf_callchain_entry * -perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry); - - entry->nr = 0; - perf_do_callchain(regs, entry); - return entry; -} diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index a286c2e5a3ea..f7a85ede8407 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c @@ -46,8 +46,8 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp) return 0; } -static void perf_callchain_kernel(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { unsigned long sp, next_sp; unsigned long next_ip; @@ -221,8 +221,8 @@ static int sane_signal_64_frame(unsigned long sp) puc == (unsigned long) &sf->uc; } -static void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_64(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long sp, next_sp; unsigned long next_ip; @@ -303,8 +303,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) return __get_user_inatomic(*ret, ptr); } -static inline void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static inline void perf_callchain_user_64(struct perf_callchain_entry *entry, + struct pt_regs *regs) { } @@ -423,8 +423,8 @@ static unsigned int __user *signal_frame_32_regs(unsigned int sp, return mctx->mc_gregs; } -static void perf_callchain_user_32(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_32(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned int sp, next_sp; unsigned int next_ip; @@ -471,32 +471,11 @@ static void perf_callchain_user_32(struct pt_regs *regs, } } -/* - * Since we can't get PMU interrupts inside a PMU interrupt handler, - * we don't need separate irq and nmi entries here. - */ -static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain); - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain); - - entry->nr = 0; - - if (!user_mode(regs)) { - perf_callchain_kernel(regs, entry); - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; - } - - if (regs) { - if (current_is_64bit()) - perf_callchain_user_64(regs, entry); - else - perf_callchain_user_32(regs, entry); - } - - return entry; + if (current_is_64bit()) + perf_callchain_user_64(entry, regs); + else + perf_callchain_user_32(entry, regs); } diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c index 00143f3dd196..ef076a91292a 100644 --- a/arch/sh/kernel/perf_callchain.c +++ b/arch/sh/kernel/perf_callchain.c @@ -44,44 +44,11 @@ static const struct stacktrace_ops callchain_ops = { .address = callchain_address, }; -static void -perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { perf_callchain_store(entry, PERF_CONTEXT_KERNEL); perf_callchain_store(entry, regs->pc); unwind_stack(NULL, regs, NULL, &callchain_ops, entry); } - -static void -perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - /* - * Only the kernel side is implemented for now. - */ - if (!is_user) - perf_callchain_kernel(regs, entry); -} - -/* - * No need for separate IRQ and NMI entries. - */ -static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry = &__get_cpu_var(callchain); - - entry->nr = 0; - - perf_do_callchain(regs, entry); - - return entry; -} diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 2a95a9079862..460162d74aba 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1283,14 +1283,16 @@ void __init init_hw_perf_events(void) register_die_notifier(&perf_event_nmi_notifier); } -static void perf_callchain_kernel(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long ksp, fp; #ifdef CONFIG_FUNCTION_GRAPH_TRACER int graph = 0; #endif + stack_trace_flush(); + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); perf_callchain_store(entry, regs->tpc); @@ -1330,8 +1332,8 @@ static void perf_callchain_kernel(struct pt_regs *regs, } while (entry->nr < PERF_MAX_STACK_DEPTH); } -static void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_64(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long ufp; @@ -1353,8 +1355,8 @@ static void perf_callchain_user_64(struct pt_regs *regs, } while (entry->nr < PERF_MAX_STACK_DEPTH); } -static void perf_callchain_user_32(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_32(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long ufp; @@ -1376,30 +1378,12 @@ static void perf_callchain_user_32(struct pt_regs *regs, } while (entry->nr < PERF_MAX_STACK_DEPTH); } -/* Like powerpc we can't get PMU interrupts within the PMU handler, - * so no need for separate NMI and IRQ chains as on x86. - */ -static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct perf_callchain_entry *entry = &__get_cpu_var(callchain); - - entry->nr = 0; - if (!user_mode(regs)) { - stack_trace_flush(); - perf_callchain_kernel(regs, entry); - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; - } - if (regs) { - flushw_user(); - if (test_thread_flag(TIF_32BIT)) - perf_callchain_user_32(regs, entry); - else - perf_callchain_user_64(regs, entry); - } - return entry; + flushw_user(); + if (test_thread_flag(TIF_32BIT)) + perf_callchain_user_32(entry, regs); + else + perf_callchain_user_64(entry, regs); } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8af28caeafc1..39f8421b86e6 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1571,9 +1571,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) * callchain support */ - -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); +static DEFINE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry_nmi); static void @@ -1607,8 +1605,8 @@ static const struct stacktrace_ops backtrace_ops = { .walk_stack = print_context_stack_bp, }; -static void -perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { perf_callchain_store(entry, PERF_CONTEXT_KERNEL); perf_callchain_store(entry, regs->ip); @@ -1653,14 +1651,12 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) } #endif -static void -perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct stack_frame frame; const void __user *fp; - if (!user_mode(regs)) - regs = task_pt_regs(current); fp = (void __user *)regs->bp; @@ -1687,42 +1683,17 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) } } -static void -perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - if (!is_user) - perf_callchain_kernel(regs, entry); - - if (current->mm) - perf_callchain_user(regs, entry); -} - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +struct perf_callchain_entry *perf_callchain_buffer(void) { - struct perf_callchain_entry *entry; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { /* TODO: We don't support guest os callchain now */ return NULL; } if (in_nmi()) - entry = &__get_cpu_var(pmc_nmi_entry); - else - entry = &__get_cpu_var(pmc_irq_entry); - - entry->nr = 0; - - perf_do_callchain(regs, entry); + return &__get_cpu_var(perf_callchain_entry_nmi); - return entry; + return &__get_cpu_var(perf_callchain_entry); } unsigned long perf_instruction_pointer(struct pt_regs *regs) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 358880404b42..4db61dded388 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -976,7 +976,15 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks extern void perf_event_comm(struct task_struct *tsk); extern void perf_event_fork(struct task_struct *tsk); -extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); +/* Callchains */ +DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); + +extern void perf_callchain_user(struct perf_callchain_entry *entry, + struct pt_regs *regs); +extern void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs); +extern struct perf_callchain_entry *perf_callchain_buffer(void); + static inline void perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) diff --git a/kernel/perf_event.c b/kernel/perf_event.c index c772a3d4000d..02efde6c8798 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -2937,13 +2937,49 @@ void perf_event_do_pending(void) __perf_pending_run(); } +DEFINE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); + /* * Callchain support -- arch specific */ -__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +__weak struct perf_callchain_entry *perf_callchain_buffer(void) { - return NULL; + return &__get_cpu_var(perf_callchain_entry); +} + +__weak void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ +} + +__weak void perf_callchain_user(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ +} + +static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +{ + struct perf_callchain_entry *entry; + + entry = perf_callchain_buffer(); + if (!entry) + return NULL; + + entry->nr = 0; + + if (!user_mode(regs)) { + perf_callchain_kernel(entry, regs); + if (current->mm) + regs = task_pt_regs(current); + else + regs = NULL; + } + + if (regs) + perf_callchain_user(entry, regs); + + return entry; } -- cgit v1.2.3 From 927c7a9e92c4f69097a6e9e086d11fc2f8a5b40b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 1 Jul 2010 16:20:36 +0200 Subject: perf: Fix race in callchains Now that software events don't have interrupt disabled anymore in the event path, callchains can nest on any context. So seperating nmi and others contexts in two buffers has become racy. Fix this by providing one buffer per nesting level. Given the size of the callchain entries (2040 bytes * 4), we now need to allocate them dynamically. v2: Fixed put_callchain_entry call after recursion. Fix the type of the recursion, it must be an array. v3: Use a manual pr cpu allocation (temporary solution until NMIs can safely access vmalloc'ed memory). Do a better separation between callchain reference tracking and allocation. Make the "put" path lockless for non-release cases. v4: Protect the callchain buffers with rcu. v5: Do the cpu buffers allocations node affine. Signed-off-by: Frederic Weisbecker Tested-by: Will Deacon Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Stephane Eranian Cc: Paul Mundt Cc: David Miller Cc: Borislav Petkov --- arch/x86/kernel/cpu/perf_event.c | 22 ++- include/linux/perf_event.h | 1 - kernel/perf_event.c | 298 ++++++++++++++++++++++++++++++--------- 3 files changed, 238 insertions(+), 83 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index a3c922288cc0..8e91cf34a9c8 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1608,6 +1608,11 @@ static const struct stacktrace_ops backtrace_ops = { void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* TODO: We don't support guest os callchain now */ + return NULL; + } + perf_callchain_store(entry, regs->ip); dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); @@ -1656,6 +1661,10 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) struct stack_frame frame; const void __user *fp; + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* TODO: We don't support guest os callchain now */ + return NULL; + } fp = (void __user *)regs->bp; @@ -1681,19 +1690,6 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) } } -struct perf_callchain_entry *perf_callchain_buffer(void) -{ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - /* TODO: We don't support guest os callchain now */ - return NULL; - } - - if (in_nmi()) - return &__get_cpu_var(perf_callchain_entry_nmi); - - return &__get_cpu_var(perf_callchain_entry); -} - unsigned long perf_instruction_pointer(struct pt_regs *regs) { unsigned long ip; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4db61dded388..d7e8ea690864 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -983,7 +983,6 @@ extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs); extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs); -extern struct perf_callchain_entry *perf_callchain_buffer(void); static inline void diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 615d024894cf..75ab8a2df6b2 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1763,6 +1763,216 @@ static u64 perf_event_read(struct perf_event *event) return perf_event_count(event); } +/* + * Callchain support + */ + +struct callchain_cpus_entries { + struct rcu_head rcu_head; + struct perf_callchain_entry *cpu_entries[0]; +}; + +static DEFINE_PER_CPU(int, callchain_recursion[4]); +static atomic_t nr_callchain_events; +static DEFINE_MUTEX(callchain_mutex); +struct callchain_cpus_entries *callchain_cpus_entries; + + +__weak void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ +} + +__weak void perf_callchain_user(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ +} + +static void release_callchain_buffers_rcu(struct rcu_head *head) +{ + struct callchain_cpus_entries *entries; + int cpu; + + entries = container_of(head, struct callchain_cpus_entries, rcu_head); + + for_each_possible_cpu(cpu) + kfree(entries->cpu_entries[cpu]); + + kfree(entries); +} + +static void release_callchain_buffers(void) +{ + struct callchain_cpus_entries *entries; + + entries = callchain_cpus_entries; + rcu_assign_pointer(callchain_cpus_entries, NULL); + call_rcu(&entries->rcu_head, release_callchain_buffers_rcu); +} + +static int alloc_callchain_buffers(void) +{ + int cpu; + int size; + struct callchain_cpus_entries *entries; + + /* + * We can't use the percpu allocation API for data that can be + * accessed from NMI. Use a temporary manual per cpu allocation + * until that gets sorted out. + */ + size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) * + num_possible_cpus(); + + entries = kzalloc(size, GFP_KERNEL); + if (!entries) + return -ENOMEM; + + size = sizeof(struct perf_callchain_entry) * 4; + + for_each_possible_cpu(cpu) { + entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL, + cpu_to_node(cpu)); + if (!entries->cpu_entries[cpu]) + goto fail; + } + + rcu_assign_pointer(callchain_cpus_entries, entries); + + return 0; + +fail: + for_each_possible_cpu(cpu) + kfree(entries->cpu_entries[cpu]); + kfree(entries); + + return -ENOMEM; +} + +static int get_callchain_buffers(void) +{ + int err = 0; + int count; + + mutex_lock(&callchain_mutex); + + count = atomic_inc_return(&nr_callchain_events); + if (WARN_ON_ONCE(count < 1)) { + err = -EINVAL; + goto exit; + } + + if (count > 1) { + /* If the allocation failed, give up */ + if (!callchain_cpus_entries) + err = -ENOMEM; + goto exit; + } + + err = alloc_callchain_buffers(); + if (err) + release_callchain_buffers(); +exit: + mutex_unlock(&callchain_mutex); + + return err; +} + +static void put_callchain_buffers(void) +{ + if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) { + release_callchain_buffers(); + mutex_unlock(&callchain_mutex); + } +} + +static int get_recursion_context(int *recursion) +{ + int rctx; + + if (in_nmi()) + rctx = 3; + else if (in_irq()) + rctx = 2; + else if (in_softirq()) + rctx = 1; + else + rctx = 0; + + if (recursion[rctx]) + return -1; + + recursion[rctx]++; + barrier(); + + return rctx; +} + +static inline void put_recursion_context(int *recursion, int rctx) +{ + barrier(); + recursion[rctx]--; +} + +static struct perf_callchain_entry *get_callchain_entry(int *rctx) +{ + int cpu; + struct callchain_cpus_entries *entries; + + *rctx = get_recursion_context(__get_cpu_var(callchain_recursion)); + if (*rctx == -1) + return NULL; + + entries = rcu_dereference(callchain_cpus_entries); + if (!entries) + return NULL; + + cpu = smp_processor_id(); + + return &entries->cpu_entries[cpu][*rctx]; +} + +static void +put_callchain_entry(int rctx) +{ + put_recursion_context(__get_cpu_var(callchain_recursion), rctx); +} + +static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +{ + int rctx; + struct perf_callchain_entry *entry; + + + entry = get_callchain_entry(&rctx); + if (rctx == -1) + return NULL; + + if (!entry) + goto exit_put; + + entry->nr = 0; + + if (!user_mode(regs)) { + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_kernel(entry, regs); + if (current->mm) + regs = task_pt_regs(current); + else + regs = NULL; + } + + if (regs) { + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_user(entry, regs); + } + +exit_put: + put_callchain_entry(rctx); + + return entry; +} + /* * Initialize the perf_event context in a task_struct: */ @@ -1895,6 +2105,8 @@ static void free_event(struct perf_event *event) atomic_dec(&nr_comm_events); if (event->attr.task) atomic_dec(&nr_task_events); + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + put_callchain_buffers(); } if (event->buffer) { @@ -2937,55 +3149,6 @@ void perf_event_do_pending(void) __perf_pending_run(); } -DEFINE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); - -/* - * Callchain support -- arch specific - */ - -__weak struct perf_callchain_entry *perf_callchain_buffer(void) -{ - return &__get_cpu_var(perf_callchain_entry); -} - -__weak void perf_callchain_kernel(struct perf_callchain_entry *entry, - struct pt_regs *regs) -{ -} - -__weak void perf_callchain_user(struct perf_callchain_entry *entry, - struct pt_regs *regs) -{ -} - -static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry; - - entry = perf_callchain_buffer(); - if (!entry) - return NULL; - - entry->nr = 0; - - if (!user_mode(regs)) { - perf_callchain_store(entry, PERF_CONTEXT_KERNEL); - perf_callchain_kernel(entry, regs); - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; - } - - if (regs) { - perf_callchain_store(entry, PERF_CONTEXT_USER); - perf_callchain_user(entry, regs); - } - - return entry; -} - - /* * We assume there is only KVM supporting the callbacks. * Later on, we might change it to a list if there is @@ -3480,14 +3643,20 @@ static void perf_event_output(struct perf_event *event, int nmi, struct perf_output_handle handle; struct perf_event_header header; + /* protect the callchain buffers */ + rcu_read_lock(); + perf_prepare_sample(&header, data, event, regs); if (perf_output_begin(&handle, event, header.size, nmi, 1)) - return; + goto exit; perf_output_sample(&handle, &header, data, event); perf_output_end(&handle); + +exit: + rcu_read_unlock(); } /* @@ -4243,32 +4412,16 @@ end: int perf_swevent_get_recursion_context(void) { struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - int rctx; - if (in_nmi()) - rctx = 3; - else if (in_irq()) - rctx = 2; - else if (in_softirq()) - rctx = 1; - else - rctx = 0; - - if (cpuctx->recursion[rctx]) - return -1; - - cpuctx->recursion[rctx]++; - barrier(); - - return rctx; + return get_recursion_context(cpuctx->recursion); } EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); void inline perf_swevent_put_recursion_context(int rctx) { struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - barrier(); - cpuctx->recursion[rctx]--; + + put_recursion_context(cpuctx->recursion, rctx); } void __perf_sw_event(u32 event_id, u64 nr, int nmi, @@ -4968,6 +5121,13 @@ done: atomic_inc(&nr_comm_events); if (event->attr.task) atomic_inc(&nr_task_events); + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { + err = get_callchain_buffers(); + if (err) { + free_event(event); + return ERR_PTR(err); + } + } } return event; -- cgit v1.2.3 From 7ae07ea3a48d30689ee037cb136bc21f0b37d8ae Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 14 Aug 2010 20:45:13 +0200 Subject: perf: Humanize the number of contexts Instead of hardcoding the number of contexts for the recursions barriers, define a cpp constant to make the code more self-explanatory. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Stephane Eranian --- include/linux/perf_event.h | 14 ++++++++------ kernel/perf_event.c | 4 ++-- kernel/trace/trace_event_perf.c | 8 ++++---- 3 files changed, 14 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d7e8ea690864..ae6fa6050925 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -808,6 +808,12 @@ struct perf_event_context { struct rcu_head rcu_head; }; +/* + * Number of contexts where an event can trigger: + * task, softirq, hardirq, nmi. + */ +#define PERF_NR_CONTEXTS 4 + /** * struct perf_event_cpu_context - per cpu event context structure */ @@ -821,12 +827,8 @@ struct perf_cpu_context { struct mutex hlist_mutex; int hlist_refcount; - /* - * Recursion avoidance: - * - * task, softirq, irq, nmi context - */ - int recursion[4]; + /* Recursion avoidance in each contexts */ + int recursion[PERF_NR_CONTEXTS]; }; struct perf_output_handle { diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 75ab8a2df6b2..f416aef242c3 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1772,7 +1772,7 @@ struct callchain_cpus_entries { struct perf_callchain_entry *cpu_entries[0]; }; -static DEFINE_PER_CPU(int, callchain_recursion[4]); +static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]); static atomic_t nr_callchain_events; static DEFINE_MUTEX(callchain_mutex); struct callchain_cpus_entries *callchain_cpus_entries; @@ -1828,7 +1828,7 @@ static int alloc_callchain_buffers(void) if (!entries) return -ENOMEM; - size = sizeof(struct perf_callchain_entry) * 4; + size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS; for_each_possible_cpu(cpu) { entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL, diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 000e6e85b445..db2eae2efcf2 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -9,7 +9,7 @@ #include #include "trace.h" -static char *perf_trace_buf[4]; +static char *perf_trace_buf[PERF_NR_CONTEXTS]; /* * Force it to be aligned to unsigned long to avoid misaligned accesses @@ -45,7 +45,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event, char *buf; int i; - for (i = 0; i < 4; i++) { + for (i = 0; i < PERF_NR_CONTEXTS; i++) { buf = (char *)alloc_percpu(perf_trace_t); if (!buf) goto fail; @@ -65,7 +65,7 @@ fail: if (!total_ref_count) { int i; - for (i = 0; i < 4; i++) { + for (i = 0; i < PERF_NR_CONTEXTS; i++) { free_percpu(perf_trace_buf[i]); perf_trace_buf[i] = NULL; } @@ -140,7 +140,7 @@ void perf_trace_destroy(struct perf_event *p_event) tp_event->perf_events = NULL; if (!--total_ref_count) { - for (i = 0; i < 4; i++) { + for (i = 0; i < PERF_NR_CONTEXTS; i++) { free_percpu(perf_trace_buf[i]); perf_trace_buf[i] = NULL; } -- cgit v1.2.3 From 6016ee13db518ab1cd0cbf43fc2ad5712021e338 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 11 Aug 2010 12:47:59 +0900 Subject: perf, tracing: add missing __percpu markups ftrace_event_call->perf_events, perf_trace_buf, fgraph_data->cpu_data and some local variables are percpu pointers missing __percpu markups. Add them. Signed-off-by: Namhyung Kim Acked-by: Tejun Heo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Stephane Eranian LKML-Reference: <1281498479-28551-1-git-send-email-namhyung@gmail.com> Signed-off-by: Frederic Weisbecker --- include/linux/ftrace_event.h | 4 ++-- kernel/trace/trace_event_perf.c | 15 ++++++++------- kernel/trace/trace_functions_graph.c | 2 +- 3 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 02b8b24f8f51..5f8ad7bec636 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -191,8 +191,8 @@ struct ftrace_event_call { unsigned int flags; #ifdef CONFIG_PERF_EVENTS - int perf_refcount; - struct hlist_head *perf_events; + int perf_refcount; + struct hlist_head __percpu *perf_events; #endif }; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index db2eae2efcf2..92f5477a006a 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -9,7 +9,7 @@ #include #include "trace.h" -static char *perf_trace_buf[PERF_NR_CONTEXTS]; +static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS]; /* * Force it to be aligned to unsigned long to avoid misaligned accesses @@ -24,7 +24,7 @@ static int total_ref_count; static int perf_trace_event_init(struct ftrace_event_call *tp_event, struct perf_event *p_event) { - struct hlist_head *list; + struct hlist_head __percpu *list; int ret = -ENOMEM; int cpu; @@ -42,11 +42,11 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event, tp_event->perf_events = list; if (!total_ref_count) { - char *buf; + char __percpu *buf; int i; for (i = 0; i < PERF_NR_CONTEXTS; i++) { - buf = (char *)alloc_percpu(perf_trace_t); + buf = (char __percpu *)alloc_percpu(perf_trace_t); if (!buf) goto fail; @@ -102,13 +102,14 @@ int perf_trace_init(struct perf_event *p_event) int perf_trace_enable(struct perf_event *p_event) { struct ftrace_event_call *tp_event = p_event->tp_event; + struct hlist_head __percpu *pcpu_list; struct hlist_head *list; - list = tp_event->perf_events; - if (WARN_ON_ONCE(!list)) + pcpu_list = tp_event->perf_events; + if (WARN_ON_ONCE(!pcpu_list)) return -EINVAL; - list = this_cpu_ptr(list); + list = this_cpu_ptr(pcpu_list); hlist_add_head_rcu(&p_event->hlist_entry, list); return 0; diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 6bff23625781..fcb5a542cd21 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -23,7 +23,7 @@ struct fgraph_cpu_data { }; struct fgraph_data { - struct fgraph_cpu_data *cpu_data; + struct fgraph_cpu_data __percpu *cpu_data; /* Place to preserve last processed entry. */ struct ftrace_graph_ent_entry ent; -- cgit v1.2.3 From 2244d07bfa2097cb00600da91c715a8aa547917e Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 17 Aug 2010 08:59:14 +0000 Subject: net: simplify flags for tx timestamping This patch removes the abstraction introduced by the union skb_shared_tx in the shared skb data. The access of the different union elements at several places led to some confusion about accessing the shared tx_flags e.g. in skb_orphan_try(). http://marc.info/?l=linux-netdev&m=128084897415886&w=2 Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- Documentation/networking/timestamping.txt | 22 +++++++++------- drivers/net/bfin_mac.c | 10 +++---- drivers/net/gianfar.c | 15 +++++------ drivers/net/igb/igb.h | 2 +- drivers/net/igb/igb_main.c | 11 ++++---- include/linux/skbuff.h | 44 +++++++++++-------------------- include/net/ip.h | 2 +- include/net/sock.h | 8 ++---- net/can/raw.c | 4 +-- net/core/dev.c | 6 ++--- net/core/skbuff.c | 2 +- net/ipv4/icmp.c | 4 +-- net/ipv4/ip_output.c | 6 ++--- net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 4 +-- net/packet/af_packet.c | 4 +-- net/socket.c | 9 +++---- 17 files changed, 68 insertions(+), 87 deletions(-) (limited to 'include') diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt index e8c8f4f06c67..98097d8cb910 100644 --- a/Documentation/networking/timestamping.txt +++ b/Documentation/networking/timestamping.txt @@ -172,15 +172,19 @@ struct skb_shared_hwtstamps { }; Time stamps for outgoing packets are to be generated as follows: -- In hard_start_xmit(), check if skb_tx(skb)->hardware is set no-zero. - If yes, then the driver is expected to do hardware time stamping. +- In hard_start_xmit(), check if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) + is set no-zero. If yes, then the driver is expected to do hardware time + stamping. - If this is possible for the skb and requested, then declare - that the driver is doing the time stamping by setting the field - skb_tx(skb)->in_progress non-zero. You might want to keep a pointer - to the associated skb for the next step and not free the skb. A driver - not supporting hardware time stamping doesn't do that. A driver must - never touch sk_buff::tstamp! It is used to store software generated - time stamps by the network subsystem. + that the driver is doing the time stamping by setting the flag + SKBTX_IN_PROGRESS in skb_shinfo(skb)->tx_flags , e.g. with + + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + + You might want to keep a pointer to the associated skb for the next step + and not free the skb. A driver not supporting hardware time stamping doesn't + do that. A driver must never touch sk_buff::tstamp! It is used to store + software generated time stamps by the network subsystem. - As soon as the driver has sent the packet and/or obtained a hardware time stamp for it, it passes the time stamp back by calling skb_hwtstamp_tx() with the original skb, the raw @@ -191,6 +195,6 @@ Time stamps for outgoing packets are to be generated as follows: this would occur at a later time in the processing pipeline than other software time stamping and therefore could lead to unexpected deltas between time stamps. -- If the driver did not call set skb_tx(skb)->in_progress, then +- If the driver did not set the SKBTX_IN_PROGRESS flag (see above), then dev_hard_start_xmit() checks whether software time stamping is wanted as fallback and potentially generates the time stamp. diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c index 012613fde3f4..7a0e4156fade 100644 --- a/drivers/net/bfin_mac.c +++ b/drivers/net/bfin_mac.c @@ -803,15 +803,14 @@ static void bfin_dump_hwtamp(char *s, ktime_t *hw, ktime_t *ts, struct timecompa static void bfin_tx_hwtstamp(struct net_device *netdev, struct sk_buff *skb) { struct bfin_mac_local *lp = netdev_priv(netdev); - union skb_shared_tx *shtx = skb_tx(skb); - if (shtx->hardware) { + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) { int timeout_cnt = MAX_TIMEOUT_CNT; /* When doing time stamping, keep the connection to the socket * a while longer */ - shtx->in_progress = 1; + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; /* * The timestamping is done at the EMAC module's MII/RMII interface @@ -991,7 +990,6 @@ static int bfin_mac_hard_start_xmit(struct sk_buff *skb, struct bfin_mac_local *lp = netdev_priv(dev); u16 *data; u32 data_align = (unsigned long)(skb->data) & 0x3; - union skb_shared_tx *shtx = skb_tx(skb); current_tx_ptr->skb = skb; @@ -1005,7 +1003,7 @@ static int bfin_mac_hard_start_xmit(struct sk_buff *skb, * of this field are the length of the packet payload in bytes and the higher * 4 bits are the timestamping enable field. */ - if (shtx->hardware) + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) *data |= 0x1000; current_tx_ptr->desc_a.start_addr = (u32)data; @@ -1015,7 +1013,7 @@ static int bfin_mac_hard_start_xmit(struct sk_buff *skb, } else { *((u16 *)(current_tx_ptr->packet)) = (u16)(skb->len); /* enable timestamping for the sent packet */ - if (shtx->hardware) + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) *((u16 *)(current_tx_ptr->packet)) |= 0x1000; memcpy((u8 *)(current_tx_ptr->packet + 2), skb->data, skb->len); diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index 3d9f958ebd2c..e6048d6ab0ea 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -2048,7 +2048,6 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) u32 bufaddr; unsigned long flags; unsigned int nr_frags, nr_txbds, length; - union skb_shared_tx *shtx; /* * TOE=1 frames larger than 2500 bytes may see excess delays @@ -2069,10 +2068,10 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) txq = netdev_get_tx_queue(dev, rq); base = tx_queue->tx_bd_base; regs = tx_queue->grp->regs; - shtx = skb_tx(skb); /* check if time stamp should be generated */ - if (unlikely(shtx->hardware && priv->hwts_tx_en)) + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + priv->hwts_tx_en)) do_tstamp = 1; /* make space for additional header when fcb is needed */ @@ -2174,7 +2173,7 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) /* Setup tx hardware time stamping if requested */ if (unlikely(do_tstamp)) { - shtx->in_progress = 1; + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; if (fcb == NULL) fcb = gfar_add_fcb(skb); fcb->ptp = 1; @@ -2446,7 +2445,6 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) int howmany = 0; u32 lstatus; size_t buflen; - union skb_shared_tx *shtx; rx_queue = priv->rx_queue[tx_queue->qindex]; bdp = tx_queue->dirty_tx; @@ -2461,8 +2459,7 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) * When time stamping, one additional TxBD must be freed. * Also, we need to dma_unmap_single() the TxPAL. */ - shtx = skb_tx(skb); - if (unlikely(shtx->in_progress)) + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) nr_txbds = frags + 2; else nr_txbds = frags + 1; @@ -2476,7 +2473,7 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) (lstatus & BD_LENGTH_MASK)) break; - if (unlikely(shtx->in_progress)) { + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) { next = next_txbd(bdp, base, tx_ring_size); buflen = next->length + GMAC_FCB_LEN; } else @@ -2485,7 +2482,7 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) dma_unmap_single(&priv->ofdev->dev, bdp->bufPtr, buflen, DMA_TO_DEVICE); - if (unlikely(shtx->in_progress)) { + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) { struct skb_shared_hwtstamps shhwtstamps; u64 *ns = (u64*) (((u32)skb->data + 0x10) & ~0x7); memset(&shhwtstamps, 0, sizeof(shhwtstamps)); diff --git a/drivers/net/igb/igb.h b/drivers/net/igb/igb.h index 6e63d9a7fc75..44e0ff1494e0 100644 --- a/drivers/net/igb/igb.h +++ b/drivers/net/igb/igb.h @@ -143,7 +143,7 @@ struct igb_buffer { u16 next_to_watch; unsigned int bytecount; u16 gso_segs; - union skb_shared_tx shtx; + u8 tx_flags; u8 mapped_as_page; }; /* RX */ diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index 9b4e5895f5f9..985e37cf17b6 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -3954,7 +3954,7 @@ static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb, } tx_ring->buffer_info[i].skb = skb; - tx_ring->buffer_info[i].shtx = skb_shinfo(skb)->tx_flags; + tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags; /* multiply data chunks by size of headers */ tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len; tx_ring->buffer_info[i].gso_segs = gso_segs; @@ -4088,7 +4088,6 @@ netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb, u32 tx_flags = 0; u16 first; u8 hdr_len = 0; - union skb_shared_tx *shtx = skb_tx(skb); /* need: 1 descriptor per page, * + 2 desc gap to keep tail from touching head, @@ -4100,8 +4099,8 @@ netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb, return NETDEV_TX_BUSY; } - if (unlikely(shtx->hardware)) { - shtx->in_progress = 1; + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= IGB_TX_FLAGS_TSTAMP; } @@ -5319,7 +5318,7 @@ static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *bu u64 regval; /* if skb does not support hw timestamp or TX stamp not valid exit */ - if (likely(!buffer_info->shtx.hardware) || + if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) || !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID)) return; @@ -5500,7 +5499,7 @@ static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr, * values must belong to this one here and therefore we don't need to * compare any of the additional attributes stored for it. * - * If nothing went wrong, then it should have a skb_shared_tx that we + * If nothing went wrong, then it should have a shared tx_flags that we * can turn into a skb_shared_hwtstamps. */ if (staterr & E1000_RXDADV_STAT_TSIP) { diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d8050382b189..f067c95cf18a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -163,26 +163,19 @@ struct skb_shared_hwtstamps { ktime_t syststamp; }; -/** - * struct skb_shared_tx - instructions for time stamping of outgoing packets - * @hardware: generate hardware time stamp - * @software: generate software time stamp - * @in_progress: device driver is going to provide - * hardware time stamp - * @prevent_sk_orphan: make sk reference available on driver level - * @flags: all shared_tx flags - * - * These flags are attached to packets as part of the - * &skb_shared_info. Use skb_tx() to get a pointer. - */ -union skb_shared_tx { - struct { - __u8 hardware:1, - software:1, - in_progress:1, - prevent_sk_orphan:1; - }; - __u8 flags; +/* Definitions for tx_flags in struct skb_shared_info */ +enum { + /* generate hardware time stamp */ + SKBTX_HW_TSTAMP = 1 << 0, + + /* generate software time stamp */ + SKBTX_SW_TSTAMP = 1 << 1, + + /* device driver is going to provide hardware time stamp */ + SKBTX_IN_PROGRESS = 1 << 2, + + /* ensure the originating sk reference is available on driver level */ + SKBTX_DRV_NEEDS_SK_REF = 1 << 3, }; /* This data is invariant across clones and lives at @@ -195,7 +188,7 @@ struct skb_shared_info { unsigned short gso_segs; unsigned short gso_type; __be32 ip6_frag_id; - union skb_shared_tx tx_flags; + __u8 tx_flags; struct sk_buff *frag_list; struct skb_shared_hwtstamps hwtstamps; @@ -587,11 +580,6 @@ static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb) return &skb_shinfo(skb)->hwtstamps; } -static inline union skb_shared_tx *skb_tx(struct sk_buff *skb) -{ - return &skb_shinfo(skb)->tx_flags; -} - /** * skb_queue_empty - check if a queue is empty * @list: queue head @@ -1996,8 +1984,8 @@ extern void skb_tstamp_tx(struct sk_buff *orig_skb, static inline void sw_tx_timestamp(struct sk_buff *skb) { - union skb_shared_tx *shtx = skb_tx(skb); - if (shtx->software && !shtx->in_progress) + if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP && + !(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) skb_tstamp_tx(skb, NULL); } diff --git a/include/net/ip.h b/include/net/ip.h index 890f9725d681..7691aca133db 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -53,7 +53,7 @@ struct ipcm_cookie { __be32 addr; int oif; struct ip_options *opt; - union skb_shared_tx shtx; + __u8 tx_flags; }; #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) diff --git a/include/net/sock.h b/include/net/sock.h index ac53bfbdfe16..100e43bf95fb 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1669,17 +1669,13 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, /** * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped - * @msg: outgoing packet * @sk: socket sending this packet - * @shtx: filled with instructions for time stamping + * @tx_flags: filled with instructions for time stamping * * Currently only depends on SOCK_TIMESTAMPING* flags. Returns error code if * parameters are invalid. */ -extern int sock_tx_timestamp(struct msghdr *msg, - struct sock *sk, - union skb_shared_tx *shtx); - +extern int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags); /** * sk_eat_skb - Release a skb if it is no longer needed diff --git a/net/can/raw.c b/net/can/raw.c index a10e3338f084..7d77e67e57af 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -647,12 +647,12 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); if (err < 0) goto free_skb; - err = sock_tx_timestamp(msg, sk, skb_tx(skb)); + err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); if (err < 0) goto free_skb; /* to be able to check the received tx sock reference in raw_rcv() */ - skb_tx(skb)->prevent_sk_orphan = 1; + skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF; skb->dev = dev; skb->sk = sk; diff --git a/net/core/dev.c b/net/core/dev.c index 586a11cb4398..c1dc8a95f6ff 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1902,14 +1902,14 @@ static int dev_gso_segment(struct sk_buff *skb) /* * Try to orphan skb early, right before transmission by the device. - * We cannot orphan skb if tx timestamp is requested, since - * drivers need to call skb_tstamp_tx() to send the timestamp. + * We cannot orphan skb if tx timestamp is requested or the sk-reference + * is needed on driver level for other reasons, e.g. see net/can/raw.c */ static inline void skb_orphan_try(struct sk_buff *skb) { struct sock *sk = skb->sk; - if (sk && !skb_tx(skb)->flags) { + if (sk && !skb_shinfo(skb)->tx_flags) { /* skb_tx_hash() wont be able to get sk. * We copy sk_hash into skb->rxhash */ diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3a2513f0d0c3..99ef721f773d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3016,7 +3016,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, } else { /* * no hardware time stamps available, - * so keep the skb_shared_tx and only + * so keep the shared tx_flags and only * store software time stamp */ skb->tstamp = ktime_get_real(); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index a0d847c7cba5..96bc7f9475a3 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -379,7 +379,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) inet->tos = ip_hdr(skb)->tos; daddr = ipc.addr = rt->rt_src; ipc.opt = NULL; - ipc.shtx.flags = 0; + ipc.tx_flags = 0; if (icmp_param->replyopts.optlen) { ipc.opt = &icmp_param->replyopts; if (ipc.opt->srr) @@ -538,7 +538,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) inet_sk(sk)->tos = tos; ipc.addr = iph->saddr; ipc.opt = &icmp_param.replyopts; - ipc.shtx.flags = 0; + ipc.tx_flags = 0; { struct flowi fl = { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 04b69896df5f..e807492f1777 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -953,7 +953,7 @@ alloc_new_skb: else /* only the initial fragment is time stamped */ - ipc->shtx.flags = 0; + ipc->tx_flags = 0; } if (skb == NULL) goto error; @@ -964,7 +964,7 @@ alloc_new_skb: skb->ip_summed = csummode; skb->csum = 0; skb_reserve(skb, hh_len); - *skb_tx(skb) = ipc->shtx; + skb_shinfo(skb)->tx_flags = ipc->tx_flags; /* * Find where to start putting bytes. @@ -1384,7 +1384,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar daddr = ipc.addr = rt->rt_src; ipc.opt = NULL; - ipc.shtx.flags = 0; + ipc.tx_flags = 0; if (replyopts.opt.optlen) { ipc.opt = &replyopts.opt; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 009a7b2aa1ef..1f85ef289895 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -505,7 +505,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.addr = inet->inet_saddr; ipc.opt = NULL; - ipc.shtx.flags = 0; + ipc.tx_flags = 0; ipc.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 32e0bef60d0a..86e757e162ee 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -797,7 +797,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, return -EOPNOTSUPP; ipc.opt = NULL; - ipc.shtx.flags = 0; + ipc.tx_flags = 0; if (up->pending) { /* @@ -845,7 +845,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.addr = inet->inet_saddr; ipc.oif = sk->sk_bound_dev_if; - err = sock_tx_timestamp(msg, sk, &ipc.shtx); + err = sock_tx_timestamp(sk, &ipc.tx_flags); if (err) return err; if (msg->msg_controllen) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9a17f28b1253..3616f27b9d46 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -488,7 +488,7 @@ retry: skb->dev = dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - err = sock_tx_timestamp(msg, sk, skb_tx(skb)); + err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); if (err < 0) goto out_unlock; @@ -1209,7 +1209,7 @@ static int packet_snd(struct socket *sock, err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); if (err) goto out_free; - err = sock_tx_timestamp(msg, sk, skb_tx(skb)); + err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); if (err < 0) goto out_free; diff --git a/net/socket.c b/net/socket.c index 2270b941bcc7..7848d12f5e4d 100644 --- a/net/socket.c +++ b/net/socket.c @@ -535,14 +535,13 @@ void sock_release(struct socket *sock) } EXPORT_SYMBOL(sock_release); -int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, - union skb_shared_tx *shtx) +int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) { - shtx->flags = 0; + *tx_flags = 0; if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) - shtx->hardware = 1; + *tx_flags |= SKBTX_HW_TSTAMP; if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) - shtx->software = 1; + *tx_flags |= SKBTX_SW_TSTAMP; return 0; } EXPORT_SYMBOL(sock_tx_timestamp); -- cgit v1.2.3 From e760702ed8333588f9f21e7bf6597873993006f1 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Tue, 17 Aug 2010 19:03:44 +0000 Subject: net: introduce proto_ports_offset() Introduce proto_ports_offset() for getting the position of the ports or SPI in the message of a protocol. Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- include/linux/in.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/in.h b/include/linux/in.h index 41d88a4689af..beeb6dee2b49 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -250,6 +250,25 @@ struct sockaddr_in { #ifdef __KERNEL__ +#include + +static inline int proto_ports_offset(int proto) +{ + switch (proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_DCCP: + case IPPROTO_ESP: /* SPI */ + case IPPROTO_SCTP: + case IPPROTO_UDPLITE: + return 0; + case IPPROTO_AH: /* SPI */ + return 4; + default: + return -EINVAL; + } +} + static inline bool ipv4_is_loopback(__be32 addr) { return (addr & htonl(0xff000000)) == htonl(0x7f000000); -- cgit v1.2.3 From d34a16661ed0fed433c9469d7cfa3ca4d30ca42e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 14 Jun 2010 17:06:21 -0700 Subject: net: convert to rcu_dereference_index_check() The task_cls_classid() function applies rcu_dereference() to integers, which does not work with the shiny new sparse-based checking in rcu_dereference(). This commit therefore moves to the new RCU API rcu_dereference_index_check(). Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett Acked-by: David S. Miller Acked-by: Herbert Xu --- include/net/cls_cgroup.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index 726cc3536409..dd1fdb8293f5 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -45,7 +45,8 @@ static inline u32 task_cls_classid(struct task_struct *p) return 0; rcu_read_lock(); - id = rcu_dereference(net_cls_subsys_id); + id = rcu_dereference_index_check(net_cls_subsys_id, + rcu_read_lock_held()); if (id >= 0) classid = container_of(task_subsys_state(p, id), struct cgroup_cls_state, css)->classid; -- cgit v1.2.3 From ca5ecddfa8fcbd948c95530e7e817cee9fb43a3d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 28 Apr 2010 14:39:09 -0700 Subject: rcu: define __rcu address space modifier for sparse This commit provides definitions for the __rcu annotation defined earlier. This annotation permits sparse to check for correct use of RCU-protected pointers. If a pointer that is annotated with __rcu is accessed directly (as opposed to via rcu_dereference(), rcu_assign_pointer(), or one of their variants), sparse can be made to complain. To enable such complaints, use the new default-disabled CONFIG_SPARSE_RCU_POINTER kernel configuration option. Please note that these sparse complaints are intended to be a debugging aid, -not- a code-style-enforcement mechanism. There are special rcu_dereference_protected() and rcu_access_pointer() accessors for use when RCU read-side protection is not required, for example, when no other CPU has access to the data structure in question or while the current CPU hold the update-side lock. This patch also updates a number of docbook comments that were showing their age. Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Christopher Li Reviewed-by: Josh Triplett --- include/linux/compiler.h | 4 + include/linux/rcupdate.h | 352 ++++++++++++++++++++++++++++------------------- include/linux/srcu.h | 27 +++- kernel/rcupdate.c | 6 +- lib/Kconfig.debug | 13 ++ 5 files changed, 257 insertions(+), 145 deletions(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c1a62c56a660..320d6c94ff84 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -16,7 +16,11 @@ # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) # define __percpu __attribute__((noderef, address_space(3))) +#ifdef CONFIG_SPARSE_RCU_POINTER +# define __rcu __attribute__((noderef, address_space(4))) +#else # define __rcu +#endif extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); #else diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9fbc54a2585d..b973dea2d6b0 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -41,6 +41,7 @@ #include #include #include +#include #ifdef CONFIG_RCU_TORTURE_TEST extern int rcutorture_runnable; /* for sysctl */ @@ -120,14 +121,15 @@ extern struct lockdep_map rcu_sched_lock_map; extern int debug_lockdep_rcu_enabled(void); /** - * rcu_read_lock_held - might we be in RCU read-side critical section? + * rcu_read_lock_held() - might we be in RCU read-side critical section? * * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, * this assumes we are in an RCU read-side critical section unless it can - * prove otherwise. + * prove otherwise. This is useful for debug checks in functions that + * require that they be called within an RCU read-side critical section. * - * Check debug_lockdep_rcu_enabled() to prevent false positives during boot + * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot * and while lockdep is disabled. */ static inline int rcu_read_lock_held(void) @@ -144,14 +146,16 @@ static inline int rcu_read_lock_held(void) extern int rcu_read_lock_bh_held(void); /** - * rcu_read_lock_sched_held - might we be in RCU-sched read-side critical section? + * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? * * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an * RCU-sched read-side critical section. In absence of * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side * critical section unless it can prove otherwise. Note that disabling * of preemption (including disabling irqs) counts as an RCU-sched - * read-side critical section. + * read-side critical section. This is useful for debug checks in functions + * that required that they be called within an RCU-sched read-side + * critical section. * * Check debug_lockdep_rcu_enabled() to prevent false positives during boot * and while lockdep is disabled. @@ -220,41 +224,155 @@ extern int rcu_my_thread_group_empty(void); } \ } while (0) +#else /* #ifdef CONFIG_PROVE_RCU */ + +#define __do_rcu_dereference_check(c) do { } while (0) + +#endif /* #else #ifdef CONFIG_PROVE_RCU */ + +/* + * Helper functions for rcu_dereference_check(), rcu_dereference_protected() + * and rcu_assign_pointer(). Some of these could be folded into their + * callers, but they are left separate in order to ease introduction of + * multiple flavors of pointers to match the multiple flavors of RCU + * (e.g., __rcu_bh, * __rcu_sched, and __srcu), should this make sense in + * the future. + */ +#define __rcu_access_pointer(p, space) \ + ({ \ + typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ + (void) (((typeof (*p) space *)p) == p); \ + ((typeof(*p) __force __kernel *)(_________p1)); \ + }) +#define __rcu_dereference_check(p, c, space) \ + ({ \ + typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ + __do_rcu_dereference_check(c); \ + (void) (((typeof (*p) space *)p) == p); \ + smp_read_barrier_depends(); \ + ((typeof(*p) __force __kernel *)(_________p1)); \ + }) +#define __rcu_dereference_protected(p, c, space) \ + ({ \ + __do_rcu_dereference_check(c); \ + (void) (((typeof (*p) space *)p) == p); \ + ((typeof(*p) __force __kernel *)(p)); \ + }) + +#define __rcu_dereference_index_check(p, c) \ + ({ \ + typeof(p) _________p1 = ACCESS_ONCE(p); \ + __do_rcu_dereference_check(c); \ + smp_read_barrier_depends(); \ + (_________p1); \ + }) +#define __rcu_assign_pointer(p, v, space) \ + ({ \ + if (!__builtin_constant_p(v) || \ + ((v) != NULL)) \ + smp_wmb(); \ + (p) = (typeof(*v) __force space *)(v); \ + }) + + +/** + * rcu_access_pointer() - fetch RCU pointer with no dereferencing + * @p: The pointer to read + * + * Return the value of the specified RCU-protected pointer, but omit the + * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful + * when the value of this pointer is accessed, but the pointer is not + * dereferenced, for example, when testing an RCU-protected pointer against + * NULL. Although rcu_access_pointer() may also be used in cases where + * update-side locks prevent the value of the pointer from changing, you + * should instead use rcu_dereference_protected() for this use case. + */ +#define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu) + /** - * rcu_dereference_check - rcu_dereference with debug checking + * rcu_dereference_check() - rcu_dereference with debug checking * @p: The pointer to read, prior to dereferencing * @c: The conditions under which the dereference will take place * * Do an rcu_dereference(), but check that the conditions under which the - * dereference will take place are correct. Typically the conditions indicate - * the various locking conditions that should be held at that point. The check - * should return true if the conditions are satisfied. + * dereference will take place are correct. Typically the conditions + * indicate the various locking conditions that should be held at that + * point. The check should return true if the conditions are satisfied. + * An implicit check for being in an RCU read-side critical section + * (rcu_read_lock()) is included. * * For example: * - * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || - * lockdep_is_held(&foo->lock)); + * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock)); * * could be used to indicate to lockdep that foo->bar may only be dereferenced - * if either the RCU read lock is held, or that the lock required to replace + * if either rcu_read_lock() is held, or that the lock required to replace * the bar struct at foo->bar is held. * * Note that the list of conditions may also include indications of when a lock * need not be held, for example during initialisation or destruction of the * target struct: * - * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || - * lockdep_is_held(&foo->lock) || + * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock) || * atomic_read(&foo->usage) == 0); + * + * Inserts memory barriers on architectures that require them + * (currently only the Alpha), prevents the compiler from refetching + * (and from merging fetches), and, more importantly, documents exactly + * which pointers are protected by RCU and checks that the pointer is + * annotated as __rcu. */ #define rcu_dereference_check(p, c) \ - ({ \ - __do_rcu_dereference_check(c); \ - rcu_dereference_raw(p); \ - }) + __rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu) + +/** + * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-bh counterpart to rcu_dereference_check(). + */ +#define rcu_dereference_bh_check(p, c) \ + __rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu) /** - * rcu_dereference_protected - fetch RCU pointer when updates prevented + * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-sched counterpart to rcu_dereference_check(). + */ +#define rcu_dereference_sched_check(p, c) \ + __rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \ + __rcu) + +#define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/ + +/** + * rcu_dereference_index_check() - rcu_dereference for indices with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * Similar to rcu_dereference_check(), but omits the sparse checking. + * This allows rcu_dereference_index_check() to be used on integers, + * which can then be used as array indices. Attempting to use + * rcu_dereference_check() on an integer will give compiler warnings + * because the sparse address-space mechanism relies on dereferencing + * the RCU-protected pointer. Dereferencing integers is not something + * that even gcc will put up with. + * + * Note that this function does not implicitly check for RCU read-side + * critical sections. If this function gains lots of uses, it might + * make sense to provide versions for each flavor of RCU, but it does + * not make sense as of early 2010. + */ +#define rcu_dereference_index_check(p, c) \ + __rcu_dereference_index_check((p), (c)) + +/** + * rcu_dereference_protected() - fetch RCU pointer when updates prevented + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place * * Return the value of the specified RCU-protected pointer, but omit * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This @@ -263,35 +381,61 @@ extern int rcu_my_thread_group_empty(void); * prevent the compiler from repeating this reference or combining it * with other references, so it should not be used without protection * of appropriate locks. + * + * This function is only for update-side use. Using this function + * when protected only by rcu_read_lock() will result in infrequent + * but very ugly failures. */ #define rcu_dereference_protected(p, c) \ - ({ \ - __do_rcu_dereference_check(c); \ - (p); \ - }) + __rcu_dereference_protected((p), (c), __rcu) -#else /* #ifdef CONFIG_PROVE_RCU */ +/** + * rcu_dereference_bh_protected() - fetch RCU-bh pointer when updates prevented + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-bh counterpart to rcu_dereference_protected(). + */ +#define rcu_dereference_bh_protected(p, c) \ + __rcu_dereference_protected((p), (c), __rcu) -#define rcu_dereference_check(p, c) rcu_dereference_raw(p) -#define rcu_dereference_protected(p, c) (p) +/** + * rcu_dereference_sched_protected() - fetch RCU-sched pointer when updates prevented + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-sched counterpart to rcu_dereference_protected(). + */ +#define rcu_dereference_sched_protected(p, c) \ + __rcu_dereference_protected((p), (c), __rcu) -#endif /* #else #ifdef CONFIG_PROVE_RCU */ /** - * rcu_access_pointer - fetch RCU pointer with no dereferencing + * rcu_dereference() - fetch RCU-protected pointer for dereferencing + * @p: The pointer to read, prior to dereferencing * - * Return the value of the specified RCU-protected pointer, but omit the - * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful - * when the value of this pointer is accessed, but the pointer is not - * dereferenced, for example, when testing an RCU-protected pointer against - * NULL. This may also be used in cases where update-side locks prevent - * the value of the pointer from changing, but rcu_dereference_protected() - * is a lighter-weight primitive for this use case. + * This is a simple wrapper around rcu_dereference_check(). */ -#define rcu_access_pointer(p) ACCESS_ONCE(p) +#define rcu_dereference(p) rcu_dereference_check(p, 0) /** - * rcu_read_lock - mark the beginning of an RCU read-side critical section. + * rcu_dereference_bh() - fetch an RCU-bh-protected pointer for dereferencing + * @p: The pointer to read, prior to dereferencing + * + * Makes rcu_dereference_check() do the dirty work. + */ +#define rcu_dereference_bh(p) rcu_dereference_bh_check(p, 0) + +/** + * rcu_dereference_sched() - fetch RCU-sched-protected pointer for dereferencing + * @p: The pointer to read, prior to dereferencing + * + * Makes rcu_dereference_check() do the dirty work. + */ +#define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0) + +/** + * rcu_read_lock() - mark the beginning of an RCU read-side critical section * * When synchronize_rcu() is invoked on one CPU while other CPUs * are within RCU read-side critical sections, then the @@ -337,7 +481,7 @@ static inline void rcu_read_lock(void) */ /** - * rcu_read_unlock - marks the end of an RCU read-side critical section. + * rcu_read_unlock() - marks the end of an RCU read-side critical section. * * See rcu_read_lock() for more information. */ @@ -349,15 +493,16 @@ static inline void rcu_read_unlock(void) } /** - * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section + * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section * * This is equivalent of rcu_read_lock(), but to be used when updates - * are being done using call_rcu_bh(). Since call_rcu_bh() callbacks - * consider completion of a softirq handler to be a quiescent state, - * a process in RCU read-side critical section must be protected by - * disabling softirqs. Read-side critical sections in interrupt context - * can use just rcu_read_lock(). - * + * are being done using call_rcu_bh() or synchronize_rcu_bh(). Since + * both call_rcu_bh() and synchronize_rcu_bh() consider completion of a + * softirq handler to be a quiescent state, a process in RCU read-side + * critical section must be protected by disabling softirqs. Read-side + * critical sections in interrupt context can use just rcu_read_lock(), + * though this should at least be commented to avoid confusing people + * reading the code. */ static inline void rcu_read_lock_bh(void) { @@ -379,13 +524,12 @@ static inline void rcu_read_unlock_bh(void) } /** - * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section + * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section * - * Should be used with either - * - synchronize_sched() - * or - * - call_rcu_sched() and rcu_barrier_sched() - * on the write-side to insure proper synchronization. + * This is equivalent of rcu_read_lock(), but to be used when updates + * are being done using call_rcu_sched() or synchronize_rcu_sched(). + * Read-side critical sections can also be introduced by anything that + * disables preemption, including local_irq_disable() and friends. */ static inline void rcu_read_lock_sched(void) { @@ -420,54 +564,14 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) preempt_enable_notrace(); } - /** - * rcu_dereference_raw - fetch an RCU-protected pointer + * rcu_assign_pointer() - assign to RCU-protected pointer + * @p: pointer to assign to + * @v: value to assign (publish) * - * The caller must be within some flavor of RCU read-side critical - * section, or must be otherwise preventing the pointer from changing, - * for example, by holding an appropriate lock. This pointer may later - * be safely dereferenced. It is the caller's responsibility to have - * done the right thing, as this primitive does no checking of any kind. - * - * Inserts memory barriers on architectures that require them - * (currently only the Alpha), and, more importantly, documents - * exactly which pointers are protected by RCU. - */ -#define rcu_dereference_raw(p) ({ \ - typeof(p) _________p1 = ACCESS_ONCE(p); \ - smp_read_barrier_depends(); \ - (_________p1); \ - }) - -/** - * rcu_dereference - fetch an RCU-protected pointer, checking for RCU - * - * Makes rcu_dereference_check() do the dirty work. - */ -#define rcu_dereference(p) \ - rcu_dereference_check(p, rcu_read_lock_held()) - -/** - * rcu_dereference_bh - fetch an RCU-protected pointer, checking for RCU-bh - * - * Makes rcu_dereference_check() do the dirty work. - */ -#define rcu_dereference_bh(p) \ - rcu_dereference_check(p, rcu_read_lock_bh_held()) - -/** - * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched - * - * Makes rcu_dereference_check() do the dirty work. - */ -#define rcu_dereference_sched(p) \ - rcu_dereference_check(p, rcu_read_lock_sched_held()) - -/** - * rcu_assign_pointer - assign (publicize) a pointer to a newly - * initialized structure that will be dereferenced by RCU read-side - * critical sections. Returns the value assigned. + * Assigns the specified value to the specified RCU-protected + * pointer, ensuring that any concurrent RCU readers will see + * any prior initialization. Returns the value assigned. * * Inserts memory barriers on architectures that require them * (pretty much all of them other than x86), and also prevents @@ -476,14 +580,17 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * call documents which pointers will be dereferenced by RCU read-side * code. */ - #define rcu_assign_pointer(p, v) \ - ({ \ - if (!__builtin_constant_p(v) || \ - ((v) != NULL)) \ - smp_wmb(); \ - (p) = (v); \ - }) + __rcu_assign_pointer((p), (v), __rcu) + +/** + * RCU_INIT_POINTER() - initialize an RCU protected pointer + * + * Initialize an RCU-protected pointer in such a way to avoid RCU-lockdep + * splats. + */ +#define RCU_INIT_POINTER(p, v) \ + p = (typeof(*v) __force __rcu *)(v) /* Infrastructure to implement the synchronize_() primitives. */ @@ -495,7 +602,7 @@ struct rcu_synchronize { extern void wakeme_after_rcu(struct rcu_head *head); /** - * call_rcu - Queue an RCU callback for invocation after a grace period. + * call_rcu() - Queue an RCU callback for invocation after a grace period. * @head: structure to be used for queueing the RCU updates. * @func: actual update function to be invoked after the grace period * @@ -509,7 +616,7 @@ extern void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *head)); /** - * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. + * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. * @func: actual update function to be invoked after the grace period * @@ -566,37 +673,4 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head) } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ -#ifndef CONFIG_PROVE_RCU -#define __do_rcu_dereference_check(c) do { } while (0) -#endif /* #ifdef CONFIG_PROVE_RCU */ - -#define __rcu_dereference_index_check(p, c) \ - ({ \ - typeof(p) _________p1 = ACCESS_ONCE(p); \ - __do_rcu_dereference_check(c); \ - smp_read_barrier_depends(); \ - (_________p1); \ - }) - -/** - * rcu_dereference_index_check() - rcu_dereference for indices with debug checking - * @p: The pointer to read, prior to dereferencing - * @c: The conditions under which the dereference will take place - * - * Similar to rcu_dereference_check(), but omits the sparse checking. - * This allows rcu_dereference_index_check() to be used on integers, - * which can then be used as array indices. Attempting to use - * rcu_dereference_check() on an integer will give compiler warnings - * because the sparse address-space mechanism relies on dereferencing - * the RCU-protected pointer. Dereferencing integers is not something - * that even gcc will put up with. - * - * Note that this function does not implicitly check for RCU read-side - * critical sections. If this function gains lots of uses, it might - * make sense to provide versions for each flavor of RCU, but it does - * not make sense as of early 2010. - */ -#define rcu_dereference_index_check(p, c) \ - __rcu_dereference_index_check((p), (c)) - #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 4d5d2f546dbf..6f456a720ff0 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -108,12 +108,31 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ /** - * srcu_dereference - fetch SRCU-protected pointer with checking + * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing + * @p: the pointer to fetch and protect for later dereferencing + * @sp: pointer to the srcu_struct, which is used to check that we + * really are in an SRCU read-side critical section. + * @c: condition to check for update-side use * - * Makes rcu_dereference_check() do the dirty work. + * If PROVE_RCU is enabled, invoking this outside of an RCU read-side + * critical section will result in an RCU-lockdep splat, unless @c evaluates + * to 1. The @c argument will normally be a logical expression containing + * lockdep_is_held() calls. */ -#define srcu_dereference(p, sp) \ - rcu_dereference_check(p, srcu_read_lock_held(sp)) +#define srcu_dereference_check(p, sp, c) \ + __rcu_dereference_check((p), srcu_read_lock_held(sp) || (c), __rcu) + +/** + * srcu_dereference - fetch SRCU-protected pointer for later dereferencing + * @p: the pointer to fetch and protect for later dereferencing + * @sp: pointer to the srcu_struct, which is used to check that we + * really are in an SRCU read-side critical section. + * + * Makes rcu_dereference_check() do the dirty work. If PROVE_RCU + * is enabled, invoking this outside of an RCU read-side critical + * section will result in an RCU-lockdep splat. + */ +#define srcu_dereference(p, sp) srcu_dereference_check((p), (sp), 0) /** * srcu_read_lock - register a new reader for an SRCU-protected structure. diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 4d169835fb36..6c79e851521c 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -73,12 +73,14 @@ int debug_lockdep_rcu_enabled(void) EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); /** - * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section? + * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? * * Check for bottom half being disabled, which covers both the * CONFIG_PROVE_RCU and not cases. Note that if someone uses * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled) - * will show the situation. + * will show the situation. This is useful for debug checks in functions + * that require that they be called within an RCU read-side critical + * section. * * Check debug_lockdep_rcu_enabled() to prevent false positives during boot. */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1b4afd2e6ca0..12465f2ef766 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -539,6 +539,19 @@ config PROVE_RCU_REPEATEDLY disabling, allowing multiple RCU-lockdep warnings to be printed on a single reboot. +config SPARSE_RCU_POINTER + bool "RCU debugging: sparse-based checks for pointer usage" + default n + help + This feature enables the __rcu sparse annotation for + RCU-protected pointers. This annotation will cause sparse + to flag any non-RCU used of annotated pointers. This can be + helpful when debugging RCU usage. Please note that this feature + is not intended to enforce code cleanliness; it is instead merely + a debugging aid. + + Say Y to make sparse flag questionable use of RCU-protected pointers + Say N if you are unsure. config LOCKDEP -- cgit v1.2.3 From 67bdbffd696f29a0b68aa8daa285783a06651583 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 25 Feb 2010 16:55:13 +0100 Subject: rculist: avoid __rcu annotations This avoids warnings from missing __rcu annotations in the rculist implementation, making it possible to use the same lists in both RCU and non-RCU cases. We can add rculist annotations later, together with lockdep support for rculist, which is missing as well, but that may involve changing all the users. Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Pavel Emelyanov Cc: Sukadev Bhattiprolu Reviewed-by: Josh Triplett --- include/linux/rculist.h | 53 +++++++++++++++++++++++++++---------------- include/linux/rculist_nulls.h | 16 +++++++++---- kernel/pid.c | 2 +- 3 files changed, 46 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 4ec3b38ce9c5..c10b1050dbe6 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -9,6 +9,12 @@ #include #include +/* + * return the ->next pointer of a list_head in an rcu safe + * way, we must not access it directly + */ +#define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next))) + /* * Insert a new entry between two known consecutive entries. * @@ -20,7 +26,7 @@ static inline void __list_add_rcu(struct list_head *new, { new->next = next; new->prev = prev; - rcu_assign_pointer(prev->next, new); + rcu_assign_pointer(list_next_rcu(prev), new); next->prev = new; } @@ -138,7 +144,7 @@ static inline void list_replace_rcu(struct list_head *old, { new->next = old->next; new->prev = old->prev; - rcu_assign_pointer(new->prev->next, new); + rcu_assign_pointer(list_next_rcu(new->prev), new); new->next->prev = new; old->prev = LIST_POISON2; } @@ -193,7 +199,7 @@ static inline void list_splice_init_rcu(struct list_head *list, */ last->next = at; - rcu_assign_pointer(head->next, first); + rcu_assign_pointer(list_next_rcu(head), first); first->prev = head; at->prev = last; } @@ -208,7 +214,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). */ #define list_entry_rcu(ptr, type, member) \ - container_of(rcu_dereference_raw(ptr), type, member) + ({typeof (*ptr) __rcu *__ptr = (typeof (*ptr) __rcu __force *)ptr; \ + container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \ + }) /** * list_first_entry_rcu - get the first element from a list @@ -225,9 +233,9 @@ static inline void list_splice_init_rcu(struct list_head *list, list_entry_rcu((ptr)->next, type, member) #define __list_for_each_rcu(pos, head) \ - for (pos = rcu_dereference_raw((head)->next); \ + for (pos = rcu_dereference_raw(list_next_rcu(head)); \ pos != (head); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_raw(list_next_rcu((pos))) /** * list_for_each_entry_rcu - iterate over rcu list of given type @@ -257,9 +265,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_continue_rcu(pos, head) \ - for ((pos) = rcu_dereference_raw((pos)->next); \ + for ((pos) = rcu_dereference_raw(list_next_rcu(pos)); \ prefetch((pos)->next), (pos) != (head); \ - (pos) = rcu_dereference_raw((pos)->next)) + (pos) = rcu_dereference_raw(list_next_rcu(pos))) /** * list_for_each_entry_continue_rcu - continue iteration over list of given type @@ -314,12 +322,19 @@ static inline void hlist_replace_rcu(struct hlist_node *old, new->next = next; new->pprev = old->pprev; - rcu_assign_pointer(*new->pprev, new); + rcu_assign_pointer(*(struct hlist_node __rcu **)new->pprev, new); if (next) new->next->pprev = &new->next; old->pprev = LIST_POISON2; } +/* + * return the first or the next element in an RCU protected hlist + */ +#define hlist_first_rcu(head) (*((struct hlist_node __rcu **)(&(head)->first))) +#define hlist_next_rcu(node) (*((struct hlist_node __rcu **)(&(node)->next))) +#define hlist_pprev_rcu(node) (*((struct hlist_node __rcu **)((node)->pprev))) + /** * hlist_add_head_rcu * @n: the element to add to the hash list. @@ -346,7 +361,7 @@ static inline void hlist_add_head_rcu(struct hlist_node *n, n->next = first; n->pprev = &h->first; - rcu_assign_pointer(h->first, n); + rcu_assign_pointer(hlist_first_rcu(h), n); if (first) first->pprev = &n->next; } @@ -374,7 +389,7 @@ static inline void hlist_add_before_rcu(struct hlist_node *n, { n->pprev = next->pprev; n->next = next; - rcu_assign_pointer(*(n->pprev), n); + rcu_assign_pointer(hlist_pprev_rcu(n), n); next->pprev = &n->next; } @@ -401,15 +416,15 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, { n->next = prev->next; n->pprev = &prev->next; - rcu_assign_pointer(prev->next, n); + rcu_assign_pointer(hlist_next_rcu(prev), n); if (n->next) n->next->pprev = &n->next; } -#define __hlist_for_each_rcu(pos, head) \ - for (pos = rcu_dereference((head)->first); \ - pos && ({ prefetch(pos->next); 1; }); \ - pos = rcu_dereference(pos->next)) +#define __hlist_for_each_rcu(pos, head) \ + for (pos = rcu_dereference(hlist_first_rcu(head)); \ + pos && ({ prefetch(pos->next); 1; }); \ + pos = rcu_dereference(hlist_next_rcu(pos))) /** * hlist_for_each_entry_rcu - iterate over rcu list of given type @@ -422,11 +437,11 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ -#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = rcu_dereference_raw((head)->first); \ +#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ + for (pos = rcu_dereference_raw(hlist_first_rcu(head)); \ pos && ({ prefetch(pos->next); 1; }) && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_raw(hlist_next_rcu(pos))) /** * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index b70ffe53cb9f..2ae13714828b 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -37,6 +37,12 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) } } +#define hlist_nulls_first_rcu(head) \ + (*((struct hlist_nulls_node __rcu __force **)&(head)->first)) + +#define hlist_nulls_next_rcu(node) \ + (*((struct hlist_nulls_node __rcu __force **)&(node)->next)) + /** * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization * @n: the element to delete from the hash list. @@ -88,7 +94,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, n->next = first; n->pprev = &h->first; - rcu_assign_pointer(h->first, n); + rcu_assign_pointer(hlist_nulls_first_rcu(h), n); if (!is_a_nulls(first)) first->pprev = &n->next; } @@ -100,11 +106,11 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, * @member: the name of the hlist_nulls_node within the struct. * */ -#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = rcu_dereference_raw((head)->first); \ - (!is_a_nulls(pos)) && \ +#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ + for (pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ + (!is_a_nulls(pos)) && \ ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos))) #endif #endif diff --git a/kernel/pid.c b/kernel/pid.c index d55c6fb8d087..0f90c2f713f1 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -401,7 +401,7 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type) struct task_struct *result = NULL; if (pid) { struct hlist_node *first; - first = rcu_dereference_check(pid->tasks[type].first, + first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]), rcu_read_lock_held() || lockdep_tasklist_lock_is_held()); if (first) -- cgit v1.2.3 From 2c392b8c3450ceb69ba1b93cb0cddb3998fb8cdc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 24 Feb 2010 19:41:39 +0100 Subject: cgroups: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Acked-by: Paul Menage Cc: Li Zefan Reviewed-by: Josh Triplett --- include/linux/cgroup.h | 4 ++-- include/linux/sched.h | 2 +- kernel/cgroup.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ed3e92e41c6e..3cb7d04308cd 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -75,7 +75,7 @@ struct cgroup_subsys_state { unsigned long flags; /* ID for this css, if possible */ - struct css_id *id; + struct css_id __rcu *id; }; /* bits in struct cgroup_subsys_state flags field */ @@ -205,7 +205,7 @@ struct cgroup { struct list_head children; /* my children */ struct cgroup *parent; /* my parent */ - struct dentry *dentry; /* cgroup fs entry, RCU protected */ + struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */ /* Private pointers for each registered subsystem */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; diff --git a/include/linux/sched.h b/include/linux/sched.h index 1e2a6db2d7dd..bbffd087476c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1418,7 +1418,7 @@ struct task_struct { #endif #ifdef CONFIG_CGROUPS /* Control Group info protected by css_set_lock */ - struct css_set *cgroups; + struct css_set __rcu *cgroups; /* cg_list protected by css_set_lock and tsk->alloc_lock */ struct list_head cg_list; #endif diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 192f88c5b0f9..e5c5497a7dca 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -138,7 +138,7 @@ struct css_id { * is called after synchronize_rcu(). But for safe use, css_is_removed() * css_tryget() should be used for avoiding race. */ - struct cgroup_subsys_state *css; + struct cgroup_subsys_state __rcu *css; /* * ID of this css. */ -- cgit v1.2.3 From 1b0ba1c9037b2265d6e5d0165d31e4c0269b603b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 24 Feb 2010 19:45:09 +0100 Subject: credentials: rcu annotation Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Ingo Molnar Acked-by: David Howells Reviewed-by: Josh Triplett --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index bbffd087476c..2c756666c111 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1288,9 +1288,9 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ - const struct cred *real_cred; /* objective and real subjective task + const struct cred __rcu *real_cred; /* objective and real subjective task * credentials (COW) */ - const struct cred *cred; /* effective (overridable) subjective task + const struct cred __rcu *cred; /* effective (overridable) subjective task * credentials (COW) */ struct mutex cred_guard_mutex; /* guard against foreign influences on * credential calculations -- cgit v1.2.3 From e63ba744a64d234c8a07c469ab1806443cb0a6ff Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 26 Feb 2010 18:01:20 +0100 Subject: keys: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Acked-by: David Howells Reviewed-by: Josh Triplett --- include/linux/cred.h | 2 +- include/linux/key.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cred.h b/include/linux/cred.h index 4d2c39573f36..4aaeab376446 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -84,7 +84,7 @@ struct thread_group_cred { atomic_t usage; pid_t tgid; /* thread group process ID */ spinlock_t lock; - struct key *session_keyring; /* keyring inherited over fork */ + struct key __rcu *session_keyring; /* keyring inherited over fork */ struct key *process_keyring; /* keyring private to this process */ struct rcu_head rcu; /* RCU deletion hook */ }; diff --git a/include/linux/key.h b/include/linux/key.h index cd50dfa1d4c2..3db0adce1fda 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -178,8 +178,9 @@ struct key { */ union { unsigned long value; + void __rcu *rcudata; void *data; - struct keyring_list *subscriptions; + struct keyring_list __rcu *subscriptions; } payload; }; -- cgit v1.2.3 From 5b22216e11f717adc344abc7f97b42e03127c6c0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 3 Mar 2010 10:20:10 +0100 Subject: nfs: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Acked-by: Trond Myklebust --- include/linux/nfs_fs.h | 2 +- include/linux/sunrpc/auth_gss.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 508f8cf6da37..d0edf7d823ae 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -185,7 +185,7 @@ struct nfs_inode { struct nfs4_cached_acl *nfs4_acl; /* NFSv4 state */ struct list_head open_states; - struct nfs_delegation *delegation; + struct nfs_delegation __rcu *delegation; fmode_t delegation_state; struct rw_semaphore rwsem; #endif /* CONFIG_NFS_V4*/ diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index 671538d25bc1..8eee9dbbfe7a 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h @@ -69,7 +69,7 @@ struct gss_cl_ctx { enum rpc_gss_proc gc_proc; u32 gc_seq; spinlock_t gc_seq_lock; - struct gss_ctx *gc_gss_ctx; + struct gss_ctx __rcu *gc_gss_ctx; struct xdr_netobj gc_wire_ctx; u32 gc_win; unsigned long gc_expiry; @@ -80,7 +80,7 @@ struct gss_upcall_msg; struct gss_cred { struct rpc_cred gc_base; enum rpc_gss_svc gc_service; - struct gss_cl_ctx *gc_ctx; + struct gss_cl_ctx __rcu *gc_ctx; struct gss_upcall_msg *gc_upcall; unsigned long gc_upcall_timestamp; unsigned char gc_machine_cred : 1; -- cgit v1.2.3 From 2be85279281bafe7de808ca99de59af4fd474c49 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Mar 2010 15:50:28 +0100 Subject: input: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Dmitry Torokhov Acked-by: Dmitry Torokhov Reviewed-by: Josh Triplett --- drivers/input/evdev.c | 2 +- include/linux/input.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index c908c5f83645..5808731f72d2 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -28,7 +28,7 @@ struct evdev { int minor; struct input_handle handle; wait_queue_head_t wait; - struct evdev_client *grab; + struct evdev_client __rcu *grab; struct list_head client_list; spinlock_t client_lock; /* protects client_list */ struct mutex mutex; diff --git a/include/linux/input.h b/include/linux/input.h index 896a92227bc4..d6ae1761be97 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -1196,7 +1196,7 @@ struct input_dev { int (*flush)(struct input_dev *dev, struct file *file); int (*event)(struct input_dev *dev, unsigned int type, unsigned int code, int value); - struct input_handle *grab; + struct input_handle __rcu *grab; spinlock_t event_lock; struct mutex mutex; -- cgit v1.2.3 From 0906a372f2aa0fec1e59bd12b896883b6e41307a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 9 Mar 2010 20:59:15 +0100 Subject: net/netfilter: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Acked-by: Patrick McHardy Cc: "David S. Miller" Cc: Eric Dumazet Reviewed-by: Josh Triplett --- include/net/netfilter/nf_conntrack.h | 2 +- net/ipv4/netfilter/nf_nat_core.c | 2 +- net/netfilter/core.c | 2 +- net/netfilter/nf_conntrack_ecache.c | 4 ++-- net/netfilter/nf_conntrack_extend.c | 2 +- net/netfilter/nf_conntrack_proto.c | 4 ++-- net/netfilter/nf_log.c | 2 +- net/netfilter/nf_queue.c | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index e624dae54fa4..caf17db87dbc 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -75,7 +75,7 @@ struct nf_conntrack_helper; /* nf_conn feature for connections that have a helper */ struct nf_conn_help { /* Helper. if any */ - struct nf_conntrack_helper *helper; + struct nf_conntrack_helper __rcu *helper; union nf_conntrack_help help; diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 8c8632d9b93c..957c9241fb0c 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -38,7 +38,7 @@ static DEFINE_SPINLOCK(nf_nat_lock); static struct nf_conntrack_l3proto *l3proto __read_mostly; #define MAX_IP_NAT_PROTO 256 -static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] +static const struct nf_nat_protocol __rcu *nf_nat_protos[MAX_IP_NAT_PROTO] __read_mostly; static inline const struct nf_nat_protocol * diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 78b505d33bfb..fdaec7daff1d 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -27,7 +27,7 @@ static DEFINE_MUTEX(afinfo_mutex); -const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; +const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; EXPORT_SYMBOL(nf_afinfo); int nf_register_afinfo(const struct nf_afinfo *afinfo) diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index cdcc7649476b..5702de35e2bb 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -26,10 +26,10 @@ static DEFINE_MUTEX(nf_ct_ecache_mutex); -struct nf_ct_event_notifier *nf_conntrack_event_cb __read_mostly; +struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_event_cb); -struct nf_exp_event_notifier *nf_expect_event_cb __read_mostly; +struct nf_exp_event_notifier __rcu *nf_expect_event_cb __read_mostly; EXPORT_SYMBOL_GPL(nf_expect_event_cb); /* deliver cached events and clear cache entry - must be called with locally diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 7dcf7a404190..1d9bdae06161 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -16,7 +16,7 @@ #include #include -static struct nf_ct_ext_type *nf_ct_ext_types[NF_CT_EXT_NUM]; +static struct nf_ct_ext_type __rcu *nf_ct_ext_types[NF_CT_EXT_NUM]; static DEFINE_MUTEX(nf_ct_ext_type_mutex); void __nf_ct_ext_destroy(struct nf_conn *ct) diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 5886ba1d52a0..ed6d92958023 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -28,8 +28,8 @@ #include #include -static struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly; -struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX] __read_mostly; +static struct nf_conntrack_l4proto __rcu **nf_ct_protos[PF_MAX] __read_mostly; +struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX] __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_l3protos); static DEFINE_MUTEX(nf_ct_proto_mutex); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 7df37fd786bc..b07393eab88e 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -16,7 +16,7 @@ #define NF_LOG_PREFIXLEN 128 #define NFLOGGER_NAME_LEN 64 -static const struct nf_logger *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; +static const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly; static DEFINE_MUTEX(nf_log_mutex); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 78b3cf9c519c..74aebed5bd28 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -18,7 +18,7 @@ * long term mutex. The handler must provide an an outfn() to accept packets * for queueing and must reinject all packets it receives, no matter what. */ -static const struct nf_queue_handler *queue_handler[NFPROTO_NUMPROTO] __read_mostly; +static const struct nf_queue_handler __rcu *queue_handler[NFPROTO_NUMPROTO] __read_mostly; static DEFINE_MUTEX(queue_handler_mutex); -- cgit v1.2.3 From 4b6a2872a2a00042ee50024822ab706e5456aad8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Mar 2010 15:59:23 +0100 Subject: kvm: add __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Avi Kivity Cc: Marcelo Tosatti Reviewed-by: Josh Triplett --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c13cc48697aa..ac740b26eb10 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -205,7 +205,7 @@ struct kvm { struct mutex irq_lock; #ifdef CONFIG_HAVE_KVM_IRQCHIP - struct kvm_irq_routing_table *irq_routing; + struct kvm_irq_routing_table __rcu *irq_routing; struct hlist_head mask_notifier_list; struct hlist_head irq_ack_notifier_list; #endif -- cgit v1.2.3 From 4221a9918e38b7494cee341dda7b7b4bb8c04bde Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 26 Jun 2010 01:08:19 +0900 Subject: Add RCU check for find_task_by_vpid(). find_task_by_vpid() says "Must be called under rcu_read_lock().". But due to commit 3120438 "rcu: Disable lockdep checking in RCU list-traversal primitives", we are currently unable to catch "find_task_by_vpid() with tasklist_lock held but RCU lock not held" errors due to the RCU-lockdep checks being suppressed in the RCU variants of the struct list_head traversals. This commit therefore places an explicit check for being in an RCU read-side critical section in find_task_by_pid_ns(). =================================================== [ INFO: suspicious rcu_dereference_check() usage. ] --------------------------------------------------- kernel/pid.c:386 invoked rcu_dereference_check() without protection! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 1 1 lock held by rc.sysinit/1102: #0: (tasklist_lock){.+.+..}, at: [] sys_setpgid+0x40/0x160 stack backtrace: Pid: 1102, comm: rc.sysinit Not tainted 2.6.35-rc3-dirty #1 Call Trace: [] lockdep_rcu_dereference+0x94/0xb0 [] find_task_by_pid_ns+0x6d/0x70 [] find_task_by_vpid+0x18/0x20 [] sys_setpgid+0x47/0x160 [] sysenter_do_call+0x12/0x36 Commit updated to use a new rcu_lockdep_assert() exported API rather than the old internal __do_rcu_dereference(). Signed-off-by: Tetsuo Handa Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 14 +++++++++----- kernel/pid.c | 1 + 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b973dea2d6b0..b124bc6a75ad 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -215,7 +215,11 @@ static inline int rcu_read_lock_sched_held(void) extern int rcu_my_thread_group_empty(void); -#define __do_rcu_dereference_check(c) \ +/** + * rcu_lockdep_assert - emit lockdep splat if specified condition not met + * @c: condition to check + */ +#define rcu_lockdep_assert(c) \ do { \ static bool __warned; \ if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ @@ -226,7 +230,7 @@ extern int rcu_my_thread_group_empty(void); #else /* #ifdef CONFIG_PROVE_RCU */ -#define __do_rcu_dereference_check(c) do { } while (0) +#define rcu_lockdep_assert(c) do { } while (0) #endif /* #else #ifdef CONFIG_PROVE_RCU */ @@ -247,14 +251,14 @@ extern int rcu_my_thread_group_empty(void); #define __rcu_dereference_check(p, c, space) \ ({ \ typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ - __do_rcu_dereference_check(c); \ + rcu_lockdep_assert(c); \ (void) (((typeof (*p) space *)p) == p); \ smp_read_barrier_depends(); \ ((typeof(*p) __force __kernel *)(_________p1)); \ }) #define __rcu_dereference_protected(p, c, space) \ ({ \ - __do_rcu_dereference_check(c); \ + rcu_lockdep_assert(c); \ (void) (((typeof (*p) space *)p) == p); \ ((typeof(*p) __force __kernel *)(p)); \ }) @@ -262,7 +266,7 @@ extern int rcu_my_thread_group_empty(void); #define __rcu_dereference_index_check(p, c) \ ({ \ typeof(p) _________p1 = ACCESS_ONCE(p); \ - __do_rcu_dereference_check(c); \ + rcu_lockdep_assert(c); \ smp_read_barrier_depends(); \ (_________p1); \ }) diff --git a/kernel/pid.c b/kernel/pid.c index 0f90c2f713f1..39b65b69584f 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -416,6 +416,7 @@ EXPORT_SYMBOL(pid_task); */ struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) { + rcu_lockdep_assert(rcu_read_lock_held()); return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); } -- cgit v1.2.3 From 77d8485a8b5416c615b6acd95f01bfcacd7d81ff Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 8 Jul 2010 17:38:59 -0700 Subject: rcu: improve kerneldoc for rcu_read_lock(), call_rcu(), and synchronize_rcu() Make it explicit that new RCU read-side critical sections that start after call_rcu() and synchronize_rcu() start might still be running after the end of the relevant grace period. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 16 +++++++++------- kernel/rcutree_plugin.h | 8 +++++--- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b124bc6a75ad..3e1b6625553b 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -450,7 +450,7 @@ extern int rcu_my_thread_group_empty(void); * until after the all the other CPUs exit their critical sections. * * Note, however, that RCU callbacks are permitted to run concurrently - * with RCU read-side critical sections. One way that this can happen + * with new RCU read-side critical sections. One way that this can happen * is via the following sequence of events: (1) CPU 0 enters an RCU * read-side critical section, (2) CPU 1 invokes call_rcu() to register * an RCU callback, (3) CPU 0 exits the RCU read-side critical section, @@ -608,11 +608,13 @@ extern void wakeme_after_rcu(struct rcu_head *head); /** * call_rcu() - Queue an RCU callback for invocation after a grace period. * @head: structure to be used for queueing the RCU updates. - * @func: actual update function to be invoked after the grace period + * @func: actual callback function to be invoked after the grace period * - * The update function will be invoked some time after a full grace - * period elapses, in other words after all currently executing RCU - * read-side critical sections have completed. RCU read-side critical + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all pre-existing RCU read-side + * critical sections have completed. However, the callback function + * might well execute concurrently with RCU read-side critical sections + * that started after call_rcu() was invoked. RCU read-side critical * sections are delimited by rcu_read_lock() and rcu_read_unlock(), * and may be nested. */ @@ -622,9 +624,9 @@ extern void call_rcu(struct rcu_head *head, /** * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. - * @func: actual update function to be invoked after the grace period + * @func: actual callback function to be invoked after the grace period * - * The update function will be invoked some time after a full grace + * The callback function will be invoked some time after a full grace * period elapses, in other words after all currently executing RCU * read-side critical sections have completed. call_rcu_bh() assumes * that the read-side critical sections end on completion of a softirq diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 9906f85c7780..63bb7714fdeb 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -546,9 +546,11 @@ EXPORT_SYMBOL_GPL(call_rcu); * * Control will return to the caller some time after a full grace * period has elapsed, in other words after all currently executing RCU - * read-side critical sections have completed. RCU read-side critical - * sections are delimited by rcu_read_lock() and rcu_read_unlock(), - * and may be nested. + * read-side critical sections have completed. Note, however, that + * upon return from synchronize_rcu(), the caller might well be executing + * concurrently with new RCU read-side critical sections that began while + * synchronize_rcu() was waiting. RCU read-side critical sections are + * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested. */ void synchronize_rcu(void) { -- cgit v1.2.3 From 374a8e0dc33c984fac284de7d57d77af3cfdbfb7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 24 Feb 2010 20:00:13 +0100 Subject: notifiers: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Alan Cox Reviewed-by: Josh Triplett --- include/linux/notifier.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/notifier.h b/include/linux/notifier.h index b2f1a4d83550..2026f9e1ceb8 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -49,28 +49,28 @@ struct notifier_block { int (*notifier_call)(struct notifier_block *, unsigned long, void *); - struct notifier_block *next; + struct notifier_block __rcu *next; int priority; }; struct atomic_notifier_head { spinlock_t lock; - struct notifier_block *head; + struct notifier_block __rcu *head; }; struct blocking_notifier_head { struct rw_semaphore rwsem; - struct notifier_block *head; + struct notifier_block __rcu *head; }; struct raw_notifier_head { - struct notifier_block *head; + struct notifier_block __rcu *head; }; struct srcu_notifier_head { struct mutex mutex; struct srcu_struct srcu; - struct notifier_block *head; + struct notifier_block __rcu *head; }; #define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \ -- cgit v1.2.3 From a1115570b31091f3e3ab9e6cf7ee8d320a42be84 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 25 Feb 2010 23:43:52 +0100 Subject: radix-tree: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Nick Piggin Reviewed-by: Josh Triplett --- include/linux/radix-tree.h | 4 +++- lib/radix-tree.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 634b8e674ac5..a39cbed9ee17 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -47,6 +47,8 @@ static inline void *radix_tree_indirect_to_ptr(void *ptr) { return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); } +#define radix_tree_indirect_to_ptr(ptr) \ + radix_tree_indirect_to_ptr((void __force *)(ptr)) static inline int radix_tree_is_indirect_ptr(void *ptr) { @@ -61,7 +63,7 @@ static inline int radix_tree_is_indirect_ptr(void *ptr) struct radix_tree_root { unsigned int height; gfp_t gfp_mask; - struct radix_tree_node *rnode; + struct radix_tree_node __rcu *rnode; }; #define RADIX_TREE_INIT(mask) { \ diff --git a/lib/radix-tree.c b/lib/radix-tree.c index e907858498a6..899fb750946f 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -49,7 +49,7 @@ struct radix_tree_node { unsigned int height; /* Height from the bottom */ unsigned int count; struct rcu_head rcu_head; - void *slots[RADIX_TREE_MAP_SIZE]; + void __rcu *slots[RADIX_TREE_MAP_SIZE]; unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; }; -- cgit v1.2.3 From d2c2486bc8e185548490e8edbc84d185de9eaff1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 26 Feb 2010 14:53:26 +0100 Subject: idr: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Manfred Spraul Reviewed-by: Josh Triplett --- include/linux/idr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/idr.h b/include/linux/idr.h index e968db71e33a..cdb715e58e3e 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -50,14 +50,14 @@ struct idr_layer { unsigned long bitmap; /* A zero bit means "space here" */ - struct idr_layer *ary[1< Date: Wed, 24 Feb 2010 20:01:56 +0100 Subject: kernel: __rcu annotations This adds annotations for RCU operations in core kernel components Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Al Viro Cc: Jens Axboe Cc: Andrew Morton Reviewed-by: Josh Triplett --- include/linux/fdtable.h | 6 +++--- include/linux/fs.h | 2 +- include/linux/genhd.h | 6 +++--- include/linux/init_task.h | 4 ++-- include/linux/iocontext.h | 2 +- include/linux/mm_types.h | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index f59ed297b661..133c0ba25e30 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -31,7 +31,7 @@ struct embedded_fd_set { struct fdtable { unsigned int max_fds; - struct file ** fd; /* current fd array */ + struct file __rcu **fd; /* current fd array */ fd_set *close_on_exec; fd_set *open_fds; struct rcu_head rcu; @@ -46,7 +46,7 @@ struct files_struct { * read mostly part */ atomic_t count; - struct fdtable *fdt; + struct fdtable __rcu *fdt; struct fdtable fdtab; /* * written part on a separate cache line in SMP @@ -55,7 +55,7 @@ struct files_struct { int next_fd; struct embedded_fd_set close_on_exec_init; struct embedded_fd_set open_fds_init; - struct file * fd_array[NR_OPEN_DEFAULT]; + struct file __rcu * fd_array[NR_OPEN_DEFAULT]; }; #define rcu_dereference_check_fdtable(files, fdtfd) \ diff --git a/include/linux/fs.h b/include/linux/fs.h index 76041b614758..aa3dc8d20436 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1380,7 +1380,7 @@ struct super_block { * Saved mount options for lazy filesystems using * generic_show_options() */ - char *s_options; + char __rcu *s_options; }; extern struct timespec current_fs_time(struct super_block *sb); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 5f2f4c4d8fb0..af3f06b41dc1 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -129,8 +129,8 @@ struct blk_scsi_cmd_filter { struct disk_part_tbl { struct rcu_head rcu_head; int len; - struct hd_struct *last_lookup; - struct hd_struct *part[]; + struct hd_struct __rcu *last_lookup; + struct hd_struct __rcu *part[]; }; struct gendisk { @@ -149,7 +149,7 @@ struct gendisk { * non-critical accesses use RCU. Always access through * helpers. */ - struct disk_part_tbl *part_tbl; + struct disk_part_tbl __rcu *part_tbl; struct hd_struct part0; const struct block_device_operations *fops; diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 1f43fa56f600..6460fc65ed6b 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -137,8 +137,8 @@ extern struct cred init_cred; .children = LIST_HEAD_INIT(tsk.children), \ .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ - .real_cred = &init_cred, \ - .cred = &init_cred, \ + RCU_INIT_POINTER(.real_cred, &init_cred), \ + RCU_INIT_POINTER(.cred, &init_cred), \ .cred_guard_mutex = \ __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \ .comm = "swapper", \ diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 64d529133031..3e70b21884a9 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -53,7 +53,7 @@ struct io_context { struct radix_tree_root radix_root; struct hlist_head cic_list; - void *ioc_data; + void __rcu *ioc_data; }; static inline struct io_context *ioc_task_link(struct io_context *ioc) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index b8bb9a6a1f37..05537a5eb855 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -299,7 +299,7 @@ struct mm_struct { * new_owner->mm == mm * new_owner->alloc_lock is held */ - struct task_struct *owner; + struct task_struct __rcu *owner; #endif #ifdef CONFIG_PROC_FS -- cgit v1.2.3 From 5e8067adfdbaf97039a97540765b1e16eb8d61cc Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sat, 17 Apr 2010 08:48:41 -0400 Subject: rcu head remove init RCU heads really don't need to be initialized. Their state before call_rcu() really does not matter. We need to keep init/destroy_rcu_head_on_stack() though, since we want debugobjects to be able to keep track of these objects. Signed-off-by: Alexey Dobriyan Signed-off-by: Mathieu Desnoyers CC: David S. Miller CC: "Paul E. McKenney" CC: akpm@linux-foundation.org CC: mingo@elte.hu CC: laijs@cn.fujitsu.com CC: dipankar@in.ibm.com CC: josh@joshtriplett.org CC: dvhltc@us.ibm.com CC: niv@us.ibm.com CC: tglx@linutronix.de CC: peterz@infradead.org CC: rostedt@goodmis.org CC: Valdis.Kletnieks@vt.edu CC: dhowells@redhat.com CC: eric.dumazet@gmail.com CC: Alexey Dobriyan Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 3e1b6625553b..27b44b3e3024 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -75,12 +75,6 @@ extern void rcu_init(void); #error "Unknown RCU implementation specified to kernel configuration" #endif -#define RCU_HEAD_INIT { .next = NULL, .func = NULL } -#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT -#define INIT_RCU_HEAD(ptr) do { \ - (ptr)->next = NULL; (ptr)->func = NULL; \ -} while (0) - /* * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic * initialization and destruction of rcu_head on the stack. rcu_head structures -- cgit v1.2.3 From eb4d40654505e47aa9d2035bb97f631fa61d14b4 Mon Sep 17 00:00:00 2001 From: Grégoire Baron Date: Wed, 18 Aug 2010 13:10:35 +0000 Subject: net/sched: add ACT_CSUM action to update packets checksums net/sched: add ACT_CSUM action to update packets checksums ACT_CSUM can be called just after ACT_PEDIT in order to re-compute some altered checksums in IPv4 and IPv6 packets. The following checksums are supported by this patch: - IPv4: IPv4 header, ICMP, IGMP, TCP, UDP & UDPLite - IPv6: ICMPv6, TCP, UDP & UDPLite It's possible to request in the same action to update different kind of checksums, if the packets flow mix TCP, UDP and UDPLite, ... An example of usage is done in the associated iproute2 patch. Version 3 changes: - remove useless goto instructions - improve IPv6 hop options decoding Version 2 changes: - coding style correction - remove useless arguments of some functions - use stack in tcf_csum_dump() - add tcf_csum_skb_nextlayer() to factor code Signed-off-by: Gregoire Baron Acked-by: jamal Signed-off-by: David S. Miller --- include/linux/tc_act/Kbuild | 1 + include/linux/tc_act/tc_csum.h | 32 +++ include/net/tc_act/tc_csum.h | 15 ++ net/sched/Kconfig | 10 + net/sched/Makefile | 1 + net/sched/act_csum.c | 595 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 654 insertions(+) create mode 100644 include/linux/tc_act/tc_csum.h create mode 100644 include/net/tc_act/tc_csum.h create mode 100644 net/sched/act_csum.c (limited to 'include') diff --git a/include/linux/tc_act/Kbuild b/include/linux/tc_act/Kbuild index 76990937f4c9..67b501c302b2 100644 --- a/include/linux/tc_act/Kbuild +++ b/include/linux/tc_act/Kbuild @@ -4,3 +4,4 @@ header-y += tc_mirred.h header-y += tc_pedit.h header-y += tc_nat.h header-y += tc_skbedit.h +header-y += tc_csum.h diff --git a/include/linux/tc_act/tc_csum.h b/include/linux/tc_act/tc_csum.h new file mode 100644 index 000000000000..a047c49a3153 --- /dev/null +++ b/include/linux/tc_act/tc_csum.h @@ -0,0 +1,32 @@ +#ifndef __LINUX_TC_CSUM_H +#define __LINUX_TC_CSUM_H + +#include +#include + +#define TCA_ACT_CSUM 16 + +enum { + TCA_CSUM_UNSPEC, + TCA_CSUM_PARMS, + TCA_CSUM_TM, + __TCA_CSUM_MAX +}; +#define TCA_CSUM_MAX (__TCA_CSUM_MAX - 1) + +enum { + TCA_CSUM_UPDATE_FLAG_IPV4HDR = 1, + TCA_CSUM_UPDATE_FLAG_ICMP = 2, + TCA_CSUM_UPDATE_FLAG_IGMP = 4, + TCA_CSUM_UPDATE_FLAG_TCP = 8, + TCA_CSUM_UPDATE_FLAG_UDP = 16, + TCA_CSUM_UPDATE_FLAG_UDPLITE = 32 +}; + +struct tc_csum { + tc_gen; + + __u32 update_flags; +}; + +#endif /* __LINUX_TC_CSUM_H */ diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h new file mode 100644 index 000000000000..9e8710be7a04 --- /dev/null +++ b/include/net/tc_act/tc_csum.h @@ -0,0 +1,15 @@ +#ifndef __NET_TC_CSUM_H +#define __NET_TC_CSUM_H + +#include +#include + +struct tcf_csum { + struct tcf_common common; + + u32 update_flags; +}; +#define to_tcf_csum(pc) \ + container_of(pc,struct tcf_csum,common) + +#endif /* __NET_TC_CSUM_H */ diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 2f691fb180d1..522d5a9a2825 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -518,6 +518,16 @@ config NET_ACT_SKBEDIT To compile this code as a module, choose M here: the module will be called act_skbedit. +config NET_ACT_CSUM + tristate "Checksum Updating" + depends on NET_CLS_ACT + ---help--- + Say Y here to update some common checksum after some direct + packet alterations. + + To compile this code as a module, choose M here: the + module will be called act_csum. + config NET_CLS_IND bool "Incoming device classification" depends on NET_CLS_U32 || NET_CLS_FW diff --git a/net/sched/Makefile b/net/sched/Makefile index f14e71bfa58f..960f5dba6304 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_NET_ACT_NAT) += act_nat.o obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o +obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c new file mode 100644 index 000000000000..58d7f36949da --- /dev/null +++ b/net/sched/act_csum.c @@ -0,0 +1,595 @@ +/* + * Checksum updating actions + * + * Copyright (c) 2010 Gregoire Baron + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#define CSUM_TAB_MASK 15 +static struct tcf_common *tcf_csum_ht[CSUM_TAB_MASK + 1]; +static u32 csum_idx_gen; +static DEFINE_RWLOCK(csum_lock); + +static struct tcf_hashinfo csum_hash_info = { + .htab = tcf_csum_ht, + .hmask = CSUM_TAB_MASK, + .lock = &csum_lock, +}; + +static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = { + [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), }, +}; + +static int tcf_csum_init(struct nlattr *nla, struct nlattr *est, + struct tc_action *a, int ovr, int bind) +{ + struct nlattr *tb[TCA_CSUM_MAX + 1]; + struct tc_csum *parm; + struct tcf_common *pc; + struct tcf_csum *p; + int ret = 0, err; + + if (nla == NULL) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_CSUM_MAX, nla,csum_policy); + if (err < 0) + return err; + + if (tb[TCA_CSUM_PARMS] == NULL) + return -EINVAL; + parm = nla_data(tb[TCA_CSUM_PARMS]); + + pc = tcf_hash_check(parm->index, a, bind, &csum_hash_info); + if (!pc) { + pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, &csum_idx_gen, &csum_hash_info); + if (IS_ERR(pc)) + return PTR_ERR(pc); + p = to_tcf_csum(pc); + ret = ACT_P_CREATED; + } else { + p = to_tcf_csum(pc); + if (!ovr) { + tcf_hash_release(pc, bind, &csum_hash_info); + return -EEXIST; + } + } + + spin_lock_bh(&p->tcf_lock); + p->tcf_action = parm->action; + p->update_flags = parm->update_flags; + spin_unlock_bh(&p->tcf_lock); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(pc, &csum_hash_info); + + return ret; +} + +static int tcf_csum_cleanup(struct tc_action *a, int bind) +{ + struct tcf_csum *p = a->priv; + return tcf_hash_release(&p->common, bind, &csum_hash_info); +} + +/** + * tcf_csum_skb_nextlayer - Get next layer pointer + * @skb: sk_buff to use + * @ihl: previous summed headers length + * @ipl: complete packet length + * @jhl: next header length + * + * Check the expected next layer availability in the specified sk_buff. + * Return the next layer pointer if pass, NULL otherwise. + */ +static void *tcf_csum_skb_nextlayer(struct sk_buff *skb, + unsigned int ihl, unsigned int ipl, + unsigned int jhl) +{ + int ntkoff = skb_network_offset(skb); + int hl = ihl + jhl; + + if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) || + (skb_cloned(skb) && + !skb_clone_writable(skb, hl + ntkoff) && + pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + return NULL; + else + return (void *)(skb_network_header(skb) + ihl); +} + +static int tcf_csum_ipv4_icmp(struct sk_buff *skb, + unsigned int ihl, unsigned int ipl) +{ + struct icmphdr *icmph; + + icmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmph)); + if (icmph == NULL) + return 0; + + icmph->checksum = 0; + skb->csum = csum_partial(icmph, ipl - ihl, 0); + icmph->checksum = csum_fold(skb->csum); + + skb->ip_summed = CHECKSUM_NONE; + + return 1; +} + +static int tcf_csum_ipv4_igmp(struct sk_buff *skb, + unsigned int ihl, unsigned int ipl) +{ + struct igmphdr *igmph; + + igmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*igmph)); + if (igmph == NULL) + return 0; + + igmph->csum = 0; + skb->csum = csum_partial(igmph, ipl - ihl, 0); + igmph->csum = csum_fold(skb->csum); + + skb->ip_summed = CHECKSUM_NONE; + + return 1; +} + +static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h, + unsigned int ihl, unsigned int ipl) +{ + struct icmp6hdr *icmp6h; + + icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h)); + if (icmp6h == NULL) + return 0; + + icmp6h->icmp6_cksum = 0; + skb->csum = csum_partial(icmp6h, ipl - ihl, 0); + icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + ipl - ihl, IPPROTO_ICMPV6, + skb->csum); + + skb->ip_summed = CHECKSUM_NONE; + + return 1; +} + +static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph, + unsigned int ihl, unsigned int ipl) +{ + struct tcphdr *tcph; + + tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); + if (tcph == NULL) + return 0; + + tcph->check = 0; + skb->csum = csum_partial(tcph, ipl - ihl, 0); + tcph->check = tcp_v4_check(ipl - ihl, + iph->saddr, iph->daddr, skb->csum); + + skb->ip_summed = CHECKSUM_NONE; + + return 1; +} + +static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h, + unsigned int ihl, unsigned int ipl) +{ + struct tcphdr *tcph; + + tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); + if (tcph == NULL) + return 0; + + tcph->check = 0; + skb->csum = csum_partial(tcph, ipl - ihl, 0); + tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + ipl - ihl, IPPROTO_TCP, + skb->csum); + + skb->ip_summed = CHECKSUM_NONE; + + return 1; +} + +static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph, + unsigned int ihl, unsigned int ipl, int udplite) +{ + struct udphdr *udph; + u16 ul; + + /* Support both UDP and UDPLITE checksum algorithms, + * Don't use udph->len to get the real length without any protocol check, + * UDPLITE uses udph->len for another thing, + * Use iph->tot_len, or just ipl. + */ + + udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph)); + if (udph == NULL) + return 0; + + ul = ntohs(udph->len); + + if (udplite || udph->check) { + + udph->check = 0; + + if (udplite) { + if (ul == 0) + skb->csum = csum_partial(udph, ipl - ihl, 0); + + else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl)) + skb->csum = csum_partial(udph, ul, 0); + + else + goto ignore_obscure_skb; + } else { + if (ul != ipl - ihl) + goto ignore_obscure_skb; + + skb->csum = csum_partial(udph, ul, 0); + } + + udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, + ul, iph->protocol, + skb->csum); + + if (!udph->check) + udph->check = CSUM_MANGLED_0; + } + + skb->ip_summed = CHECKSUM_NONE; + +ignore_obscure_skb: + return 1; +} + +static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h, + unsigned int ihl, unsigned int ipl, int udplite) +{ + struct udphdr *udph; + u16 ul; + + /* Support both UDP and UDPLITE checksum algorithms, + * Don't use udph->len to get the real length without any protocol check, + * UDPLITE uses udph->len for another thing, + * Use ip6h->payload_len + sizeof(*ip6h) ... , or just ipl. + */ + + udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph)); + if (udph == NULL) + return 0; + + ul = ntohs(udph->len); + + udph->check = 0; + + if (udplite) { + if (ul == 0) + skb->csum = csum_partial(udph, ipl - ihl, 0); + + else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl)) + skb->csum = csum_partial(udph, ul, 0); + + else + goto ignore_obscure_skb; + } else { + if (ul != ipl - ihl) + goto ignore_obscure_skb; + + skb->csum = csum_partial(udph, ul, 0); + } + + udph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ul, + udplite ? IPPROTO_UDPLITE : IPPROTO_UDP, + skb->csum); + + if (!udph->check) + udph->check = CSUM_MANGLED_0; + + skb->ip_summed = CHECKSUM_NONE; + +ignore_obscure_skb: + return 1; +} + +static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags) +{ + struct iphdr *iph; + int ntkoff; + + ntkoff = skb_network_offset(skb); + + if (!pskb_may_pull(skb, sizeof(*iph) + ntkoff)) + goto fail; + + iph = ip_hdr(skb); + + switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) { + case IPPROTO_ICMP: + if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP) + if (!tcf_csum_ipv4_icmp(skb, + iph->ihl * 4, ntohs(iph->tot_len))) + goto fail; + break; + case IPPROTO_IGMP: + if (update_flags & TCA_CSUM_UPDATE_FLAG_IGMP) + if (!tcf_csum_ipv4_igmp(skb, + iph->ihl * 4, ntohs(iph->tot_len))) + goto fail; + break; + case IPPROTO_TCP: + if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP) + if (!tcf_csum_ipv4_tcp(skb, iph, + iph->ihl * 4, ntohs(iph->tot_len))) + goto fail; + break; + case IPPROTO_UDP: + if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP) + if (!tcf_csum_ipv4_udp(skb, iph, + iph->ihl * 4, ntohs(iph->tot_len), 0)) + goto fail; + break; + case IPPROTO_UDPLITE: + if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE) + if (!tcf_csum_ipv4_udp(skb, iph, + iph->ihl * 4, ntohs(iph->tot_len), 1)) + goto fail; + break; + } + + if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) { + if (skb_cloned(skb) && + !skb_clone_writable(skb, sizeof(*iph) + ntkoff) && + pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + goto fail; + + ip_send_check(iph); + } + + return 1; + +fail: + return 0; +} + +static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh, + unsigned int ixhl, unsigned int *pl) +{ + int off, len, optlen; + unsigned char *xh = (void *)ip6xh; + + off = sizeof(*ip6xh); + len = ixhl - off; + + while (len > 1) { + switch (xh[off]) + { + case IPV6_TLV_PAD0: + optlen = 1; + break; + case IPV6_TLV_JUMBO: + optlen = xh[off + 1] + 2; + if (optlen != 6 || len < 6 || (off & 3) != 2) + /* wrong jumbo option length/alignment */ + return 0; + *pl = ntohl(*(__be32 *)(xh + off + 2)); + goto done; + default: + optlen = xh[off + 1] + 2; + if (optlen > len) + /* ignore obscure options */ + goto done; + break; + } + off += optlen; + len -= optlen; + } + +done: + return 1; +} + +static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags) +{ + struct ipv6hdr *ip6h; + struct ipv6_opt_hdr *ip6xh; + unsigned int hl, ixhl; + unsigned int pl; + int ntkoff; + u8 nexthdr; + + ntkoff = skb_network_offset(skb); + + hl = sizeof(*ip6h); + + if (!pskb_may_pull(skb, hl + ntkoff)) + goto fail; + + ip6h = ipv6_hdr(skb); + + pl = ntohs(ip6h->payload_len); + nexthdr = ip6h->nexthdr; + + do { + switch (nexthdr) { + case NEXTHDR_FRAGMENT: + goto ignore_skb; + case NEXTHDR_ROUTING: + case NEXTHDR_HOP: + case NEXTHDR_DEST: + if (!pskb_may_pull(skb, hl + sizeof(*ip6xh) + ntkoff)) + goto fail; + ip6xh = (void *)(skb_network_header(skb) + hl); + ixhl = ipv6_optlen(ip6xh); + if (!pskb_may_pull(skb, hl + ixhl + ntkoff)) + goto fail; + if ((nexthdr == NEXTHDR_HOP) && + !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl))) + goto fail; + nexthdr = ip6xh->nexthdr; + hl += ixhl; + break; + case IPPROTO_ICMPV6: + if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP) + if (!tcf_csum_ipv6_icmp(skb, ip6h, + hl, pl + sizeof(*ip6h))) + goto fail; + goto done; + case IPPROTO_TCP: + if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP) + if (!tcf_csum_ipv6_tcp(skb, ip6h, + hl, pl + sizeof(*ip6h))) + goto fail; + goto done; + case IPPROTO_UDP: + if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP) + if (!tcf_csum_ipv6_udp(skb, ip6h, + hl, pl + sizeof(*ip6h), 0)) + goto fail; + goto done; + case IPPROTO_UDPLITE: + if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE) + if (!tcf_csum_ipv6_udp(skb, ip6h, + hl, pl + sizeof(*ip6h), 1)) + goto fail; + goto done; + default: + goto ignore_skb; + } + } while (pskb_may_pull(skb, hl + 1 + ntkoff)); + +done: +ignore_skb: + return 1; + +fail: + return 0; +} + +static int tcf_csum(struct sk_buff *skb, + struct tc_action *a, struct tcf_result *res) +{ + struct tcf_csum *p = a->priv; + int action; + u32 update_flags; + + spin_lock(&p->tcf_lock); + p->tcf_tm.lastuse = jiffies; + p->tcf_bstats.bytes += qdisc_pkt_len(skb); + p->tcf_bstats.packets++; + action = p->tcf_action; + update_flags = p->update_flags; + spin_unlock(&p->tcf_lock); + + if (unlikely(action == TC_ACT_SHOT)) + goto drop; + + switch (skb->protocol) { + case cpu_to_be16(ETH_P_IP): + if (!tcf_csum_ipv4(skb, update_flags)) + goto drop; + break; + case cpu_to_be16(ETH_P_IPV6): + if (!tcf_csum_ipv6(skb, update_flags)) + goto drop; + break; + } + + return action; + +drop: + spin_lock(&p->tcf_lock); + p->tcf_qstats.drops++; + spin_unlock(&p->tcf_lock); + return TC_ACT_SHOT; +} + +static int tcf_csum_dump(struct sk_buff *skb, + struct tc_action *a, int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_csum *p = a->priv; + struct tc_csum opt = { + .update_flags = p->update_flags, + + .index = p->tcf_index, + .action = p->tcf_action, + .refcnt = p->tcf_refcnt - ref, + .bindcnt = p->tcf_bindcnt - bind, + }; + struct tcf_t t; + + NLA_PUT(skb, TCA_CSUM_PARMS, sizeof(opt), &opt); + t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(p->tcf_tm.expires); + NLA_PUT(skb, TCA_CSUM_TM, sizeof(t), &t); + + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static struct tc_action_ops act_csum_ops = { + .kind = "csum", + .hinfo = &csum_hash_info, + .type = TCA_ACT_CSUM, + .capab = TCA_CAP_NONE, + .owner = THIS_MODULE, + .act = tcf_csum, + .dump = tcf_csum_dump, + .cleanup = tcf_csum_cleanup, + .lookup = tcf_hash_search, + .init = tcf_csum_init, + .walk = tcf_generic_walker +}; + +MODULE_DESCRIPTION("Checksum updating actions"); +MODULE_LICENSE("GPL"); + +static int __init csum_init_module(void) +{ + return tcf_register_action(&act_csum_ops); +} + +static void __exit csum_cleanup_module(void) +{ + tcf_unregister_action(&act_csum_ops); +} + +module_init(csum_init_module); +module_exit(csum_cleanup_module); -- cgit v1.2.3 From a57eb940d130477a799dfb24a570ee04979c0f7f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 Jun 2010 16:49:16 -0700 Subject: rcu: Add a TINY_PREEMPT_RCU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement a small-memory-footprint uniprocessor-only implementation of preemptible RCU. This implementation uses but a single blocked-tasks list rather than the combinatorial number used per leaf rcu_node by TREE_PREEMPT_RCU, which reduces memory consumption and greatly simplifies processing. This version also takes advantage of uniprocessor execution to accelerate grace periods in the case where there are no readers. The general design is otherwise broadly similar to that of TREE_PREEMPT_RCU. This implementation is a step towards having RCU implementation driven off of the SMP and PREEMPT kernel configuration variables, which can happen once this implementation has accumulated sufficient experience. Removed ACCESS_ONCE() from __rcu_read_unlock() and added barrier() as suggested by Steve Rostedt in order to avoid the compiler-reordering issue noted by Mathieu Desnoyers (http://lkml.org/lkml/2010/8/16/183). As can be seen below, CONFIG_TINY_PREEMPT_RCU represents almost 5Kbyte savings compared to CONFIG_TREE_PREEMPT_RCU. Of course, for non-real-time workloads, CONFIG_TINY_RCU is even better. CONFIG_TREE_PREEMPT_RCU text data bss dec filename 13 0 0 13 kernel/rcupdate.o 6170 825 28 7023 kernel/rcutree.o ---- 7026 Total CONFIG_TINY_PREEMPT_RCU text data bss dec filename 13 0 0 13 kernel/rcupdate.o 2081 81 8 2170 kernel/rcutiny.o ---- 2183 Total CONFIG_TINY_RCU (non-preemptible) text data bss dec filename 13 0 0 13 kernel/rcupdate.o 719 25 0 744 kernel/rcutiny.o --- 757 Total Requested-by: Loïc Minier Signed-off-by: Paul E. McKenney --- include/linux/hardirq.h | 2 +- include/linux/init_task.h | 10 +- include/linux/rcupdate.h | 3 +- include/linux/rcutiny.h | 126 +++++++--- include/linux/rcutree.h | 2 + include/linux/sched.h | 10 +- init/Kconfig | 16 +- kernel/Makefile | 1 + kernel/rcutiny.c | 33 ++- kernel/rcutiny_plugin.h | 582 +++++++++++++++++++++++++++++++++++++++++++++- 10 files changed, 717 insertions(+), 68 deletions(-) (limited to 'include') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index d5b387669dab..1f4517d55b19 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -139,7 +139,7 @@ static inline void account_system_vtime(struct task_struct *tsk) #endif #if defined(CONFIG_NO_HZ) -#if defined(CONFIG_TINY_RCU) +#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) extern void rcu_enter_nohz(void); extern void rcu_exit_nohz(void); diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 6460fc65ed6b..2fea6c8ef6ba 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -82,11 +82,17 @@ extern struct group_info init_groups; # define CAP_INIT_BSET CAP_FULL_SET #ifdef CONFIG_TREE_PREEMPT_RCU +#define INIT_TASK_RCU_TREE_PREEMPT() \ + .rcu_blocked_node = NULL, +#else +#define INIT_TASK_RCU_TREE_PREEMPT(tsk) +#endif +#ifdef CONFIG_PREEMPT_RCU #define INIT_TASK_RCU_PREEMPT(tsk) \ .rcu_read_lock_nesting = 0, \ .rcu_read_unlock_special = 0, \ - .rcu_blocked_node = NULL, \ - .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), + .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \ + INIT_TASK_RCU_TREE_PREEMPT() #else #define INIT_TASK_RCU_PREEMPT(tsk) #endif diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 27b44b3e3024..24b896649384 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -58,7 +58,6 @@ struct rcu_head { }; /* Exported common interfaces */ -extern void rcu_barrier(void); extern void rcu_barrier_bh(void); extern void rcu_barrier_sched(void); extern void synchronize_sched_expedited(void); @@ -69,7 +68,7 @@ extern void rcu_init(void); #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include -#elif defined(CONFIG_TINY_RCU) +#elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) #include #else #error "Unknown RCU implementation specified to kernel configuration" diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index e2e893144a84..4cc5eba41616 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -29,66 +29,51 @@ void rcu_sched_qs(int cpu); void rcu_bh_qs(int cpu); -static inline void rcu_note_context_switch(int cpu) -{ - rcu_sched_qs(cpu); -} +#ifdef CONFIG_TINY_RCU #define __rcu_read_lock() preempt_disable() #define __rcu_read_unlock() preempt_enable() +#else /* #ifdef CONFIG_TINY_RCU */ +void __rcu_read_lock(void); +void __rcu_read_unlock(void); +#endif /* #else #ifdef CONFIG_TINY_RCU */ #define __rcu_read_lock_bh() local_bh_disable() #define __rcu_read_unlock_bh() local_bh_enable() -#define call_rcu_sched call_rcu +extern void call_rcu_sched(struct rcu_head *head, + void (*func)(struct rcu_head *rcu)); #define rcu_init_sched() do { } while (0) -extern void rcu_check_callbacks(int cpu, int user); -static inline int rcu_needs_cpu(int cpu) -{ - return 0; -} +extern void synchronize_sched(void); -/* - * Return the number of grace periods. - */ -static inline long rcu_batches_completed(void) -{ - return 0; -} +#ifdef CONFIG_TINY_RCU -/* - * Return the number of bottom-half grace periods. - */ -static inline long rcu_batches_completed_bh(void) -{ - return 0; -} +#define call_rcu call_rcu_sched -static inline void rcu_force_quiescent_state(void) +static inline void synchronize_rcu(void) { + synchronize_sched(); } -static inline void rcu_bh_force_quiescent_state(void) +static inline void synchronize_rcu_expedited(void) { + synchronize_sched(); /* Only one CPU, so pretty fast anyway!!! */ } -static inline void rcu_sched_force_quiescent_state(void) +static inline void rcu_barrier(void) { + rcu_barrier_sched(); /* Only one CPU, so only one list of callbacks! */ } -extern void synchronize_sched(void); +#else /* #ifdef CONFIG_TINY_RCU */ -static inline void synchronize_rcu(void) -{ - synchronize_sched(); -} +void synchronize_rcu(void); +void rcu_barrier(void); +void synchronize_rcu_expedited(void); -static inline void synchronize_rcu_bh(void) -{ - synchronize_sched(); -} +#endif /* #else #ifdef CONFIG_TINY_RCU */ -static inline void synchronize_rcu_expedited(void) +static inline void synchronize_rcu_bh(void) { synchronize_sched(); } @@ -117,15 +102,82 @@ static inline void rcu_exit_nohz(void) #endif /* #else #ifdef CONFIG_NO_HZ */ +#ifdef CONFIG_TINY_RCU + +static inline void rcu_preempt_note_context_switch(void) +{ +} + static inline void exit_rcu(void) { } +static inline int rcu_needs_cpu(int cpu) +{ + return 0; +} + static inline int rcu_preempt_depth(void) { return 0; } +#else /* #ifdef CONFIG_TINY_RCU */ + +void rcu_preempt_note_context_switch(void); +extern void exit_rcu(void); +int rcu_preempt_needs_cpu(void); + +static inline int rcu_needs_cpu(int cpu) +{ + return rcu_preempt_needs_cpu(); +} + +/* + * Defined as macro as it is a very low level header + * included from areas that don't even know about current + * FIXME: combine with include/linux/rcutree.h into rcupdate.h. + */ +#define rcu_preempt_depth() (current->rcu_read_lock_nesting) + +#endif /* #else #ifdef CONFIG_TINY_RCU */ + +static inline void rcu_note_context_switch(int cpu) +{ + rcu_sched_qs(cpu); + rcu_preempt_note_context_switch(); +} + +extern void rcu_check_callbacks(int cpu, int user); + +/* + * Return the number of grace periods. + */ +static inline long rcu_batches_completed(void) +{ + return 0; +} + +/* + * Return the number of bottom-half grace periods. + */ +static inline long rcu_batches_completed_bh(void) +{ + return 0; +} + +static inline void rcu_force_quiescent_state(void) +{ +} + +static inline void rcu_bh_force_quiescent_state(void) +{ +} + +static inline void rcu_sched_force_quiescent_state(void) +{ +} + #ifdef CONFIG_DEBUG_LOCK_ALLOC extern int rcu_scheduler_active __read_mostly; diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index c0ed1c056f29..c13b85dd22bc 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -95,6 +95,8 @@ static inline void synchronize_rcu_bh_expedited(void) synchronize_sched_expedited(); } +extern void rcu_barrier(void); + extern void rcu_check_callbacks(int cpu, int user); extern long rcu_batches_completed(void); diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c756666c111..e18473f0eb78 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1202,11 +1202,13 @@ struct task_struct { unsigned int policy; cpumask_t cpus_allowed; -#ifdef CONFIG_TREE_PREEMPT_RCU +#ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; char rcu_read_unlock_special; - struct rcu_node *rcu_blocked_node; struct list_head rcu_node_entry; +#endif /* #ifdef CONFIG_PREEMPT_RCU */ +#ifdef CONFIG_TREE_PREEMPT_RCU + struct rcu_node *rcu_blocked_node; #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) @@ -1740,7 +1742,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current) -#ifdef CONFIG_TREE_PREEMPT_RCU +#ifdef CONFIG_PREEMPT_RCU #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ @@ -1749,7 +1751,9 @@ static inline void rcu_copy_process(struct task_struct *p) { p->rcu_read_lock_nesting = 0; p->rcu_read_unlock_special = 0; +#ifdef CONFIG_TREE_PREEMPT_RCU p->rcu_blocked_node = NULL; +#endif INIT_LIST_HEAD(&p->rcu_node_entry); } diff --git a/init/Kconfig b/init/Kconfig index dbc08baad77e..a619a1ac7f4c 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -348,7 +348,7 @@ config TREE_RCU smaller systems. config TREE_PREEMPT_RCU - bool "Preemptable tree-based hierarchical RCU" + bool "Preemptible tree-based hierarchical RCU" depends on PREEMPT help This option selects the RCU implementation that is @@ -366,8 +366,22 @@ config TINY_RCU is not required. This option greatly reduces the memory footprint of RCU. +config TINY_PREEMPT_RCU + bool "Preemptible UP-only small-memory-footprint RCU" + depends on !SMP && PREEMPT + help + This option selects the RCU implementation that is designed + for real-time UP systems. This option greatly reduces the + memory footprint of RCU. + endchoice +config PREEMPT_RCU + def_bool ( TREE_PREEMPT_RCU || TINY_PREEMPT_RCU ) + help + This option enables preemptible-RCU code that is common between + the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations. + config RCU_TRACE bool "Enable tracing for RCU" depends on TREE_RCU || TREE_PREEMPT_RCU diff --git a/kernel/Makefile b/kernel/Makefile index 0b72d1a74be0..17046b6e7c90 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -86,6 +86,7 @@ obj-$(CONFIG_TREE_RCU) += rcutree.o obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o obj-$(CONFIG_TINY_RCU) += rcutiny.o +obj-$(CONFIG_TINY_PREEMPT_RCU) += rcutiny.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 196ec02f8be0..d806735342ac 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -59,6 +59,14 @@ int rcu_scheduler_active __read_mostly; EXPORT_SYMBOL_GPL(rcu_scheduler_active); #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +/* Forward declarations for rcutiny_plugin.h. */ +static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); +static void __call_rcu(struct rcu_head *head, + void (*func)(struct rcu_head *rcu), + struct rcu_ctrlblk *rcp); + +#include "rcutiny_plugin.h" + #ifdef CONFIG_NO_HZ static long rcu_dynticks_nesting = 1; @@ -140,6 +148,7 @@ void rcu_check_callbacks(int cpu, int user) rcu_sched_qs(cpu); else if (!in_softirq()) rcu_bh_qs(cpu); + rcu_preempt_check_callbacks(); } /* @@ -162,6 +171,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) *rcp->donetail = NULL; if (rcp->curtail == rcp->donetail) rcp->curtail = &rcp->rcucblist; + rcu_preempt_remove_callbacks(rcp); rcp->donetail = &rcp->rcucblist; local_irq_restore(flags); @@ -182,6 +192,7 @@ static void rcu_process_callbacks(struct softirq_action *unused) { __rcu_process_callbacks(&rcu_sched_ctrlblk); __rcu_process_callbacks(&rcu_bh_ctrlblk); + rcu_preempt_process_callbacks(); } /* @@ -223,15 +234,15 @@ static void __call_rcu(struct rcu_head *head, } /* - * Post an RCU callback to be invoked after the end of an RCU grace + * Post an RCU callback to be invoked after the end of an RCU-sched grace * period. But since we have but one CPU, that would be after any * quiescent state. */ -void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) { __call_rcu(head, func, &rcu_sched_ctrlblk); } -EXPORT_SYMBOL_GPL(call_rcu); +EXPORT_SYMBOL_GPL(call_rcu_sched); /* * Post an RCU bottom-half callback to be invoked after any subsequent @@ -243,20 +254,6 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) } EXPORT_SYMBOL_GPL(call_rcu_bh); -void rcu_barrier(void) -{ - struct rcu_synchronize rcu; - - init_rcu_head_on_stack(&rcu.head); - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); - destroy_rcu_head_on_stack(&rcu.head); -} -EXPORT_SYMBOL_GPL(rcu_barrier); - void rcu_barrier_bh(void) { struct rcu_synchronize rcu; @@ -289,5 +286,3 @@ void __init rcu_init(void) { open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); } - -#include "rcutiny_plugin.h" diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index d223a92bc742..e6bc1b447c6c 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -1,7 +1,7 @@ /* - * Read-Copy Update mechanism for mutual exclusion (tree-based version) + * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition * Internal non-public definitions that provide either classic - * or preemptable semantics. + * or preemptible semantics. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,11 +17,587 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * Copyright IBM Corporation, 2009 + * Copyright (c) 2010 Linaro * * Author: Paul E. McKenney */ +#ifdef CONFIG_TINY_PREEMPT_RCU + +#include + +/* FIXME: merge with definitions in kernel/rcutree.h. */ +#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) +#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) + +/* Global control variables for preemptible RCU. */ +struct rcu_preempt_ctrlblk { + struct rcu_ctrlblk rcb; /* curtail: ->next ptr of last CB for GP. */ + struct rcu_head **nexttail; + /* Tasks blocked in a preemptible RCU */ + /* read-side critical section while an */ + /* preemptible-RCU grace period is in */ + /* progress must wait for a later grace */ + /* period. This pointer points to the */ + /* ->next pointer of the last task that */ + /* must wait for a later grace period, or */ + /* to &->rcb.rcucblist if there is no */ + /* such task. */ + struct list_head blkd_tasks; + /* Tasks blocked in RCU read-side critical */ + /* section. Tasks are placed at the head */ + /* of this list and age towards the tail. */ + struct list_head *gp_tasks; + /* Pointer to the first task blocking the */ + /* current grace period, or NULL if there */ + /* is not such task. */ + struct list_head *exp_tasks; + /* Pointer to first task blocking the */ + /* current expedited grace period, or NULL */ + /* if there is no such task. If there */ + /* is no current expedited grace period, */ + /* then there cannot be any such task. */ + u8 gpnum; /* Current grace period. */ + u8 gpcpu; /* Last grace period blocked by the CPU. */ + u8 completed; /* Last grace period completed. */ + /* If all three are equal, RCU is idle. */ +}; + +static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { + .rcb.donetail = &rcu_preempt_ctrlblk.rcb.rcucblist, + .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist, + .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist, + .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks), +}; + +static int rcu_preempted_readers_exp(void); +static void rcu_report_exp_done(void); + +/* + * Return true if the CPU has not yet responded to the current grace period. + */ +static int rcu_cpu_cur_gp(void) +{ + return rcu_preempt_ctrlblk.gpcpu != rcu_preempt_ctrlblk.gpnum; +} + +/* + * Check for a running RCU reader. Because there is only one CPU, + * there can be but one running RCU reader at a time. ;-) + */ +static int rcu_preempt_running_reader(void) +{ + return current->rcu_read_lock_nesting; +} + +/* + * Check for preempted RCU readers blocking any grace period. + * If the caller needs a reliable answer, it must disable hard irqs. + */ +static int rcu_preempt_blocked_readers_any(void) +{ + return !list_empty(&rcu_preempt_ctrlblk.blkd_tasks); +} + +/* + * Check for preempted RCU readers blocking the current grace period. + * If the caller needs a reliable answer, it must disable hard irqs. + */ +static int rcu_preempt_blocked_readers_cgp(void) +{ + return rcu_preempt_ctrlblk.gp_tasks != NULL; +} + +/* + * Return true if another preemptible-RCU grace period is needed. + */ +static int rcu_preempt_needs_another_gp(void) +{ + return *rcu_preempt_ctrlblk.rcb.curtail != NULL; +} + +/* + * Return true if a preemptible-RCU grace period is in progress. + * The caller must disable hardirqs. + */ +static int rcu_preempt_gp_in_progress(void) +{ + return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum; +} + +/* + * Record a preemptible-RCU quiescent state for the specified CPU. Note + * that this just means that the task currently running on the CPU is + * in a quiescent state. There might be any number of tasks blocked + * while in an RCU read-side critical section. + * + * Unlike the other rcu_*_qs() functions, callers to this function + * must disable irqs in order to protect the assignment to + * ->rcu_read_unlock_special. + * + * Because this is a single-CPU implementation, the only way a grace + * period can end is if the CPU is in a quiescent state. The reason is + * that a blocked preemptible-RCU reader can exit its critical section + * only if the CPU is running it at the time. Therefore, when the + * last task blocking the current grace period exits its RCU read-side + * critical section, neither the CPU nor blocked tasks will be stopping + * the current grace period. (In contrast, SMP implementations + * might have CPUs running in RCU read-side critical sections that + * block later grace periods -- but this is not possible given only + * one CPU.) + */ +static void rcu_preempt_cpu_qs(void) +{ + /* Record both CPU and task as having responded to current GP. */ + rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum; + current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; + + /* + * If there is no GP, or if blocked readers are still blocking GP, + * then there is nothing more to do. + */ + if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp()) + return; + + /* Advance callbacks. */ + rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum; + rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.rcb.curtail; + rcu_preempt_ctrlblk.rcb.curtail = rcu_preempt_ctrlblk.nexttail; + + /* If there are no blocked readers, next GP is done instantly. */ + if (!rcu_preempt_blocked_readers_any()) + rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail; + + /* If there are done callbacks, make RCU_SOFTIRQ process them. */ + if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) + raise_softirq(RCU_SOFTIRQ); +} + +/* + * Start a new RCU grace period if warranted. Hard irqs must be disabled. + */ +static void rcu_preempt_start_gp(void) +{ + if (!rcu_preempt_gp_in_progress() && rcu_preempt_needs_another_gp()) { + + /* Official start of GP. */ + rcu_preempt_ctrlblk.gpnum++; + + /* Any blocked RCU readers block new GP. */ + if (rcu_preempt_blocked_readers_any()) + rcu_preempt_ctrlblk.gp_tasks = + rcu_preempt_ctrlblk.blkd_tasks.next; + + /* If there is no running reader, CPU is done with GP. */ + if (!rcu_preempt_running_reader()) + rcu_preempt_cpu_qs(); + } +} + +/* + * We have entered the scheduler, and the current task might soon be + * context-switched away from. If this task is in an RCU read-side + * critical section, we will no longer be able to rely on the CPU to + * record that fact, so we enqueue the task on the blkd_tasks list. + * If the task started after the current grace period began, as recorded + * by ->gpcpu, we enqueue at the beginning of the list. Otherwise + * before the element referenced by ->gp_tasks (or at the tail if + * ->gp_tasks is NULL) and point ->gp_tasks at the newly added element. + * The task will dequeue itself when it exits the outermost enclosing + * RCU read-side critical section. Therefore, the current grace period + * cannot be permitted to complete until the ->gp_tasks pointer becomes + * NULL. + * + * Caller must disable preemption. + */ +void rcu_preempt_note_context_switch(void) +{ + struct task_struct *t = current; + unsigned long flags; + + local_irq_save(flags); /* must exclude scheduler_tick(). */ + if (rcu_preempt_running_reader() && + (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { + + /* Possibly blocking in an RCU read-side critical section. */ + t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; + + /* + * If this CPU has already checked in, then this task + * will hold up the next grace period rather than the + * current grace period. Queue the task accordingly. + * If the task is queued for the current grace period + * (i.e., this CPU has not yet passed through a quiescent + * state for the current grace period), then as long + * as that task remains queued, the current grace period + * cannot end. + */ + list_add(&t->rcu_node_entry, &rcu_preempt_ctrlblk.blkd_tasks); + if (rcu_cpu_cur_gp()) + rcu_preempt_ctrlblk.gp_tasks = &t->rcu_node_entry; + } + + /* + * Either we were not in an RCU read-side critical section to + * begin with, or we have now recorded that critical section + * globally. Either way, we can now note a quiescent state + * for this CPU. Again, if we were in an RCU read-side critical + * section, and if that critical section was blocking the current + * grace period, then the fact that the task has been enqueued + * means that current grace period continues to be blocked. + */ + rcu_preempt_cpu_qs(); + local_irq_restore(flags); +} + +/* + * Tiny-preemptible RCU implementation for rcu_read_lock(). + * Just increment ->rcu_read_lock_nesting, shared state will be updated + * if we block. + */ +void __rcu_read_lock(void) +{ + current->rcu_read_lock_nesting++; + barrier(); /* needed if we ever invoke rcu_read_lock in rcutiny.c */ +} +EXPORT_SYMBOL_GPL(__rcu_read_lock); + +/* + * Handle special cases during rcu_read_unlock(), such as needing to + * notify RCU core processing or task having blocked during the RCU + * read-side critical section. + */ +static void rcu_read_unlock_special(struct task_struct *t) +{ + int empty; + int empty_exp; + unsigned long flags; + struct list_head *np; + int special; + + /* + * NMI handlers cannot block and cannot safely manipulate state. + * They therefore cannot possibly be special, so just leave. + */ + if (in_nmi()) + return; + + local_irq_save(flags); + + /* + * If RCU core is waiting for this CPU to exit critical section, + * let it know that we have done so. + */ + special = t->rcu_read_unlock_special; + if (special & RCU_READ_UNLOCK_NEED_QS) + rcu_preempt_cpu_qs(); + + /* Hardware IRQ handlers cannot block. */ + if (in_irq()) { + local_irq_restore(flags); + return; + } + + /* Clean up if blocked during RCU read-side critical section. */ + if (special & RCU_READ_UNLOCK_BLOCKED) { + t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; + + /* + * Remove this task from the ->blkd_tasks list and adjust + * any pointers that might have been referencing it. + */ + empty = !rcu_preempt_blocked_readers_cgp(); + empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL; + np = t->rcu_node_entry.next; + if (np == &rcu_preempt_ctrlblk.blkd_tasks) + np = NULL; + list_del(&t->rcu_node_entry); + if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks) + rcu_preempt_ctrlblk.gp_tasks = np; + if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks) + rcu_preempt_ctrlblk.exp_tasks = np; + INIT_LIST_HEAD(&t->rcu_node_entry); + + /* + * If this was the last task on the current list, and if + * we aren't waiting on the CPU, report the quiescent state + * and start a new grace period if needed. + */ + if (!empty && !rcu_preempt_blocked_readers_cgp()) { + rcu_preempt_cpu_qs(); + rcu_preempt_start_gp(); + } + + /* + * If this was the last task on the expedited lists, + * then we need wake up the waiting task. + */ + if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL) + rcu_report_exp_done(); + } + local_irq_restore(flags); +} + +/* + * Tiny-preemptible RCU implementation for rcu_read_unlock(). + * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost + * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then + * invoke rcu_read_unlock_special() to clean up after a context switch + * in an RCU read-side critical section and other special cases. + */ +void __rcu_read_unlock(void) +{ + struct task_struct *t = current; + + barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */ + --t->rcu_read_lock_nesting; + barrier(); /* decrement before load of ->rcu_read_unlock_special */ + if (t->rcu_read_lock_nesting == 0 && + unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) + rcu_read_unlock_special(t); +#ifdef CONFIG_PROVE_LOCKING + WARN_ON_ONCE(t->rcu_read_lock_nesting < 0); +#endif /* #ifdef CONFIG_PROVE_LOCKING */ +} +EXPORT_SYMBOL_GPL(__rcu_read_unlock); + +/* + * Check for a quiescent state from the current CPU. When a task blocks, + * the task is recorded in the rcu_preempt_ctrlblk structure, which is + * checked elsewhere. This is called from the scheduling-clock interrupt. + * + * Caller must disable hard irqs. + */ +static void rcu_preempt_check_callbacks(void) +{ + struct task_struct *t = current; + + if (!rcu_preempt_running_reader() && rcu_preempt_gp_in_progress()) + rcu_preempt_cpu_qs(); + if (&rcu_preempt_ctrlblk.rcb.rcucblist != + rcu_preempt_ctrlblk.rcb.donetail) + raise_softirq(RCU_SOFTIRQ); + if (rcu_preempt_gp_in_progress() && rcu_preempt_running_reader()) + t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; +} + +/* + * TINY_PREEMPT_RCU has an extra callback-list tail pointer to + * update, so this is invoked from __rcu_process_callbacks() to + * handle that case. Of course, it is invoked for all flavors of + * RCU, but RCU callbacks can appear only on one of the lists, and + * neither ->nexttail nor ->donetail can possibly be NULL, so there + * is no need for an explicit check. + */ +static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) +{ + if (rcu_preempt_ctrlblk.nexttail == rcp->donetail) + rcu_preempt_ctrlblk.nexttail = &rcp->rcucblist; +} + +/* + * Process callbacks for preemptible RCU. + */ +static void rcu_preempt_process_callbacks(void) +{ + __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); +} + +/* + * Queue a preemptible -RCU callback for invocation after a grace period. + */ +void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +{ + unsigned long flags; + + debug_rcu_head_queue(head); + head->func = func; + head->next = NULL; + + local_irq_save(flags); + *rcu_preempt_ctrlblk.nexttail = head; + rcu_preempt_ctrlblk.nexttail = &head->next; + rcu_preempt_start_gp(); /* checks to see if GP needed. */ + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(call_rcu); + +void rcu_barrier(void) +{ + struct rcu_synchronize rcu; + + init_rcu_head_on_stack(&rcu.head); + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); + destroy_rcu_head_on_stack(&rcu.head); +} +EXPORT_SYMBOL_GPL(rcu_barrier); + +/* + * synchronize_rcu - wait until a grace period has elapsed. + * + * Control will return to the caller some time after a full grace + * period has elapsed, in other words after all currently executing RCU + * read-side critical sections have completed. RCU read-side critical + * sections are delimited by rcu_read_lock() and rcu_read_unlock(), + * and may be nested. + */ +void synchronize_rcu(void) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + if (!rcu_scheduler_active) + return; +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + + WARN_ON_ONCE(rcu_preempt_running_reader()); + if (!rcu_preempt_blocked_readers_any()) + return; + + /* Once we get past the fastpath checks, same code as rcu_barrier(). */ + rcu_barrier(); +} +EXPORT_SYMBOL_GPL(synchronize_rcu); + +static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); +static unsigned long sync_rcu_preempt_exp_count; +static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); + +/* + * Return non-zero if there are any tasks in RCU read-side critical + * sections blocking the current preemptible-RCU expedited grace period. + * If there is no preemptible-RCU expedited grace period currently in + * progress, returns zero unconditionally. + */ +static int rcu_preempted_readers_exp(void) +{ + return rcu_preempt_ctrlblk.exp_tasks != NULL; +} + +/* + * Report the exit from RCU read-side critical section for the last task + * that queued itself during or before the current expedited preemptible-RCU + * grace period. + */ +static void rcu_report_exp_done(void) +{ + wake_up(&sync_rcu_preempt_exp_wq); +} + +/* + * Wait for an rcu-preempt grace period, but expedite it. The basic idea + * is to rely in the fact that there is but one CPU, and that it is + * illegal for a task to invoke synchronize_rcu_expedited() while in a + * preemptible-RCU read-side critical section. Therefore, any such + * critical sections must correspond to blocked tasks, which must therefore + * be on the ->blkd_tasks list. So just record the current head of the + * list in the ->exp_tasks pointer, and wait for all tasks including and + * after the task pointed to by ->exp_tasks to drain. + */ +void synchronize_rcu_expedited(void) +{ + unsigned long flags; + struct rcu_preempt_ctrlblk *rpcp = &rcu_preempt_ctrlblk; + unsigned long snap; + + barrier(); /* ensure prior action seen before grace period. */ + + WARN_ON_ONCE(rcu_preempt_running_reader()); + + /* + * Acquire lock so that there is only one preemptible RCU grace + * period in flight. Of course, if someone does the expedited + * grace period for us while we are acquiring the lock, just leave. + */ + snap = sync_rcu_preempt_exp_count + 1; + mutex_lock(&sync_rcu_preempt_exp_mutex); + if (ULONG_CMP_LT(snap, sync_rcu_preempt_exp_count)) + goto unlock_mb_ret; /* Others did our work for us. */ + + local_irq_save(flags); + + /* + * All RCU readers have to already be on blkd_tasks because + * we cannot legally be executing in an RCU read-side critical + * section. + */ + + /* Snapshot current head of ->blkd_tasks list. */ + rpcp->exp_tasks = rpcp->blkd_tasks.next; + if (rpcp->exp_tasks == &rpcp->blkd_tasks) + rpcp->exp_tasks = NULL; + local_irq_restore(flags); + + /* Wait for tail of ->blkd_tasks list to drain. */ + if (rcu_preempted_readers_exp()) + wait_event(sync_rcu_preempt_exp_wq, + !rcu_preempted_readers_exp()); + + /* Clean up and exit. */ + barrier(); /* ensure expedited GP seen before counter increment. */ + sync_rcu_preempt_exp_count++; +unlock_mb_ret: + mutex_unlock(&sync_rcu_preempt_exp_mutex); + barrier(); /* ensure subsequent action seen after grace period. */ +} +EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); + +/* + * Does preemptible RCU need the CPU to stay out of dynticks mode? + */ +int rcu_preempt_needs_cpu(void) +{ + if (!rcu_preempt_running_reader()) + rcu_preempt_cpu_qs(); + return rcu_preempt_ctrlblk.rcb.rcucblist != NULL; +} + +/* + * Check for a task exiting while in a preemptible -RCU read-side + * critical section, clean up if so. No need to issue warnings, + * as debug_check_no_locks_held() already does this if lockdep + * is enabled. + */ +void exit_rcu(void) +{ + struct task_struct *t = current; + + if (t->rcu_read_lock_nesting == 0) + return; + t->rcu_read_lock_nesting = 1; + rcu_read_unlock(); +} + +#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ + +/* + * Because preemptible RCU does not exist, it never has any callbacks + * to check. + */ +static void rcu_preempt_check_callbacks(void) +{ +} + +/* + * Because preemptible RCU does not exist, it never has any callbacks + * to remove. + */ +static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) +{ +} + +/* + * Because preemptible RCU does not exist, it never has any callbacks + * to process. + */ +static void rcu_preempt_process_callbacks(void) +{ +} + +#endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ + #ifdef CONFIG_DEBUG_LOCK_ALLOC #include -- cgit v1.2.3 From 9079fd7c2e06a92cf27d05224a1f478581916c5b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 7 Aug 2010 21:59:54 -0700 Subject: rcu: update obsolete rcu_read_lock() comment. The comment says that blocking is illegal in rcu_read_lock()-style RCU read-side critical sections, which is no longer entirely true given preemptible RCU. This commit provides a fix. Suggested-by: David Miller Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 24b896649384..d7af96ef6fcf 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -458,7 +458,20 @@ extern int rcu_my_thread_group_empty(void); * will be deferred until the outermost RCU read-side critical section * completes. * - * It is illegal to block while in an RCU read-side critical section. + * You can avoid reading and understanding the next paragraph by + * following this rule: don't put anything in an rcu_read_lock() RCU + * read-side critical section that would block in a !PREEMPT kernel. + * But if you want the full story, read on! + * + * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU), it + * is illegal to block while in an RCU read-side critical section. In + * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU) + * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may + * be preempted, but explicit blocking is illegal. Finally, in preemptible + * RCU implementations in real-time (CONFIG_PREEMPT_RT) kernel builds, + * RCU read-side critical sections may be preempted and they may also + * block, but only when acquiring spinlocks that are subject to priority + * inheritance. */ static inline void rcu_read_lock(void) { -- cgit v1.2.3 From 53d84e004d5e8c018be395c4330dc72fd60bd13e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 10 Aug 2010 14:28:53 -0700 Subject: rcu: permit suppressing current grace period's CPU stall warnings When using a kernel debugger, a long sojourn in the debugger can get you lots of RCU CPU stall warnings once you resume. This might not be helpful, especially if you are using the system console. This patch therefore allows RCU CPU stall warnings to be suppressed, but only for the duration of the current set of grace periods. This differs from Jason's original patch in that it adds support for tiny RCU and preemptible RCU, and uses a slightly different method for suppressing the RCU CPU stall warning messages. Signed-off-by: Jason Wessel Signed-off-by: Paul E. McKenney Tested-by: Jason Wessel --- include/linux/rcutiny.h | 4 ++++ include/linux/rcutree.h | 1 + kernel/rcutree.c | 20 ++++++++++++++++++++ kernel/rcutree.h | 1 + kernel/rcutree_plugin.h | 18 ++++++++++++++++++ 5 files changed, 44 insertions(+) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 4cc5eba41616..3fa179784e18 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -178,6 +178,10 @@ static inline void rcu_sched_force_quiescent_state(void) { } +static inline void rcu_cpu_stall_reset(void) +{ +} + #ifdef CONFIG_DEBUG_LOCK_ALLOC extern int rcu_scheduler_active __read_mostly; diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index c13b85dd22bc..0726809497ba 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -36,6 +36,7 @@ extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); extern void rcu_note_context_switch(int cpu); extern int rcu_needs_cpu(int cpu); +extern void rcu_cpu_stall_reset(void); #ifdef CONFIG_TREE_PREEMPT_RCU diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ff214118e4b8..42140a860bb9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -565,6 +565,22 @@ static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) return NOTIFY_DONE; } +/** + * rcu_cpu_stall_reset - prevent further stall warnings in current grace period + * + * Set the stall-warning timeout way off into the future, thus preventing + * any RCU CPU stall-warning messages from appearing in the current set of + * RCU grace periods. + * + * The caller must disable hard irqs. + */ +void rcu_cpu_stall_reset(void) +{ + rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2; + rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2; + rcu_preempt_stall_reset(); +} + static struct notifier_block rcu_panic_block = { .notifier_call = rcu_panic, }; @@ -584,6 +600,10 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) { } +void rcu_cpu_stall_reset(void) +{ +} + static void __init check_cpu_stall_init(void) { } diff --git a/kernel/rcutree.h b/kernel/rcutree.h index bb4d08695c45..7abd439a7573 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -372,6 +372,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, #ifdef CONFIG_RCU_CPU_STALL_DETECTOR static void rcu_print_detail_task_stall(struct rcu_state *rsp); static void rcu_print_task_stall(struct rcu_node *rnp); +static void rcu_preempt_stall_reset(void); #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); #ifdef CONFIG_HOTPLUG_CPU diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 63bb7714fdeb..561410f70d4a 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -417,6 +417,16 @@ static void rcu_print_task_stall(struct rcu_node *rnp) } } +/* + * Suppress preemptible RCU's CPU stall warnings by pushing the + * time of the next stall-warning message comfortably far into the + * future. + */ +static void rcu_preempt_stall_reset(void) +{ + rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2; +} + #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ /* @@ -867,6 +877,14 @@ static void rcu_print_task_stall(struct rcu_node *rnp) { } +/* + * Because preemptible RCU does not exist, there is no need to suppress + * its CPU stall warnings. + */ +static void rcu_preempt_stall_reset(void) +{ +} + #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ /* -- cgit v1.2.3 From a3dc3fb161f9b4066c0fce22db72638af8baf83b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 13 Aug 2010 16:16:25 -0700 Subject: rcu: repair code-duplication FIXMEs Combine the duplicate definitions of ULONG_CMP_GE(), ULONG_CMP_LT(), and rcu_preempt_depth() into include/linux/rcupdate.h. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 15 +++++++++++++++ include/linux/rcutiny.h | 7 ------- include/linux/rcutree.h | 6 ------ kernel/rcutiny_plugin.h | 4 ---- kernel/rcutree.h | 3 --- 5 files changed, 15 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d7af96ef6fcf..325bad7bbca9 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -47,6 +47,9 @@ extern int rcutorture_runnable; /* for sysctl */ #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ +#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) +#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) + /** * struct rcu_head - callback structure for use with RCU * @next: next update requests in a list @@ -66,6 +69,18 @@ extern int sched_expedited_torture_stats(char *page); /* Internal to kernel */ extern void rcu_init(void); +#ifdef CONFIG_PREEMPT_RCU + +/* + * Defined as a macro as it is a very low level header included from + * areas that don't even know about current. This gives the rcu_read_lock() + * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other + * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. + */ +#define rcu_preempt_depth() (current->rcu_read_lock_nesting) + +#endif /* #ifdef CONFIG_PREEMPT_RCU */ + #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include #elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 3fa179784e18..c6b11dc5ba0a 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -133,13 +133,6 @@ static inline int rcu_needs_cpu(int cpu) return rcu_preempt_needs_cpu(); } -/* - * Defined as macro as it is a very low level header - * included from areas that don't even know about current - * FIXME: combine with include/linux/rcutree.h into rcupdate.h. - */ -#define rcu_preempt_depth() (current->rcu_read_lock_nesting) - #endif /* #else #ifdef CONFIG_TINY_RCU */ static inline void rcu_note_context_switch(int cpu) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 0726809497ba..54a20c11f98d 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -45,12 +45,6 @@ extern void __rcu_read_unlock(void); extern void synchronize_rcu(void); extern void exit_rcu(void); -/* - * Defined as macro as it is a very low level header - * included from areas that don't even know about current - */ -#define rcu_preempt_depth() (current->rcu_read_lock_nesting) - #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ static inline void __rcu_read_lock(void) diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index e6bc1b447c6c..c5bea1137dcb 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -26,10 +26,6 @@ #include -/* FIXME: merge with definitions in kernel/rcutree.h. */ -#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) -#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) - /* Global control variables for preemptible RCU. */ struct rcu_preempt_ctrlblk { struct rcu_ctrlblk rcb; /* curtail: ->next ptr of last CB for GP. */ diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 7abd439a7573..7918ba61873f 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -272,9 +272,6 @@ struct rcu_data { #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ -#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) -#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) - /* * RCU global state, including node hierarchy. This hierarchy is * represented in "heap" form in a dense array. The root (first level) -- cgit v1.2.3 From 73d4da4d360136826b36f78f5cf72b29da82c8a6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 16 Aug 2010 10:50:54 -0700 Subject: rcu: Upgrade srcu_read_lock() docbook about SRCU grace periods It is illegal to wait for an SRCU grace period while within the corresponding flavor of SRCU read-side critical section. Therefore, this commit updates the srcu_read_lock() docbook accordingly. Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 6f456a720ff0..58971e891f48 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -139,7 +139,12 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp) * @sp: srcu_struct in which to register the new reader. * * Enter an SRCU read-side critical section. Note that SRCU read-side - * critical sections may be nested. + * critical sections may be nested. However, it is illegal to + * call anything that waits on an SRCU grace period for the same + * srcu_struct, whether directly or indirectly. Please note that + * one way to indirectly wait on an SRCU grace period is to acquire + * a mutex that is held elsewhere while calling synchronize_srcu() or + * synchronize_srcu_expedited(). */ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) { -- cgit v1.2.3 From 7b0b759b65247cbc66384a912be9acf8d4800636 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 17 Aug 2010 14:18:46 -0700 Subject: rcu: combine duplicate code, courtesy of CONFIG_PREEMPT_RCU The CONFIG_PREEMPT_RCU kernel configuration parameter was recently re-introduced, but as an indication of the type of RCU (preemptible vs. non-preemptible) instead of as selecting a given implementation. This commit uses CONFIG_PREEMPT_RCU to combine duplicate code from include/linux/rcutiny.h and include/linux/rcutree.h into include/linux/rcupdate.h. This commit also combines a few other pieces of duplicate code that have accumulated. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 75 ++++++++++++++++++++++++++++++++++++++++++++++-- include/linux/rcutiny.h | 51 -------------------------------- include/linux/rcutree.h | 50 -------------------------------- kernel/rcutree_plugin.h | 9 ------ 4 files changed, 72 insertions(+), 113 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 325bad7bbca9..89414d67d961 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -61,16 +61,30 @@ struct rcu_head { }; /* Exported common interfaces */ +extern void call_rcu_sched(struct rcu_head *head, + void (*func)(struct rcu_head *rcu)); +extern void synchronize_sched(void); extern void rcu_barrier_bh(void); extern void rcu_barrier_sched(void); extern void synchronize_sched_expedited(void); extern int sched_expedited_torture_stats(char *page); -/* Internal to kernel */ -extern void rcu_init(void); +static inline void __rcu_read_lock_bh(void) +{ + local_bh_disable(); +} + +static inline void __rcu_read_unlock_bh(void) +{ + local_bh_enable(); +} #ifdef CONFIG_PREEMPT_RCU +extern void __rcu_read_lock(void); +extern void __rcu_read_unlock(void); +void synchronize_rcu(void); + /* * Defined as a macro as it is a very low level header included from * areas that don't even know about current. This gives the rcu_read_lock() @@ -79,7 +93,53 @@ extern void rcu_init(void); */ #define rcu_preempt_depth() (current->rcu_read_lock_nesting) -#endif /* #ifdef CONFIG_PREEMPT_RCU */ +#else /* #ifdef CONFIG_PREEMPT_RCU */ + +static inline void __rcu_read_lock(void) +{ + preempt_disable(); +} + +static inline void __rcu_read_unlock(void) +{ + preempt_enable(); +} + +static inline void synchronize_rcu(void) +{ + synchronize_sched(); +} + +static inline int rcu_preempt_depth(void) +{ + return 0; +} + +#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ + +/* Internal to kernel */ +extern void rcu_init(void); +extern void rcu_sched_qs(int cpu); +extern void rcu_bh_qs(int cpu); +extern void rcu_check_callbacks(int cpu, int user); +struct notifier_block; + +#ifdef CONFIG_NO_HZ + +extern void rcu_enter_nohz(void); +extern void rcu_exit_nohz(void); + +#else /* #ifdef CONFIG_NO_HZ */ + +static inline void rcu_enter_nohz(void) +{ +} + +static inline void rcu_exit_nohz(void) +{ +} + +#endif /* #else #ifdef CONFIG_NO_HZ */ #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include @@ -626,6 +686,8 @@ struct rcu_synchronize { extern void wakeme_after_rcu(struct rcu_head *head); +#ifdef CONFIG_PREEMPT_RCU + /** * call_rcu() - Queue an RCU callback for invocation after a grace period. * @head: structure to be used for queueing the RCU updates. @@ -642,6 +704,13 @@ extern void wakeme_after_rcu(struct rcu_head *head); extern void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *head)); +#else /* #ifdef CONFIG_PREEMPT_RCU */ + +/* In classic RCU, call_rcu() is just call_rcu_sched(). */ +#define call_rcu call_rcu_sched + +#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ + /** * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index c6b11dc5ba0a..13877cb93a60 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -27,34 +27,10 @@ #include -void rcu_sched_qs(int cpu); -void rcu_bh_qs(int cpu); - -#ifdef CONFIG_TINY_RCU -#define __rcu_read_lock() preempt_disable() -#define __rcu_read_unlock() preempt_enable() -#else /* #ifdef CONFIG_TINY_RCU */ -void __rcu_read_lock(void); -void __rcu_read_unlock(void); -#endif /* #else #ifdef CONFIG_TINY_RCU */ -#define __rcu_read_lock_bh() local_bh_disable() -#define __rcu_read_unlock_bh() local_bh_enable() -extern void call_rcu_sched(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)); - #define rcu_init_sched() do { } while (0) -extern void synchronize_sched(void); - #ifdef CONFIG_TINY_RCU -#define call_rcu call_rcu_sched - -static inline void synchronize_rcu(void) -{ - synchronize_sched(); -} - static inline void synchronize_rcu_expedited(void) { synchronize_sched(); /* Only one CPU, so pretty fast anyway!!! */ @@ -67,7 +43,6 @@ static inline void rcu_barrier(void) #else /* #ifdef CONFIG_TINY_RCU */ -void synchronize_rcu(void); void rcu_barrier(void); void synchronize_rcu_expedited(void); @@ -83,25 +58,6 @@ static inline void synchronize_rcu_bh_expedited(void) synchronize_sched(); } -struct notifier_block; - -#ifdef CONFIG_NO_HZ - -extern void rcu_enter_nohz(void); -extern void rcu_exit_nohz(void); - -#else /* #ifdef CONFIG_NO_HZ */ - -static inline void rcu_enter_nohz(void) -{ -} - -static inline void rcu_exit_nohz(void) -{ -} - -#endif /* #else #ifdef CONFIG_NO_HZ */ - #ifdef CONFIG_TINY_RCU static inline void rcu_preempt_note_context_switch(void) @@ -117,11 +73,6 @@ static inline int rcu_needs_cpu(int cpu) return 0; } -static inline int rcu_preempt_depth(void) -{ - return 0; -} - #else /* #ifdef CONFIG_TINY_RCU */ void rcu_preempt_note_context_switch(void); @@ -141,8 +92,6 @@ static inline void rcu_note_context_switch(int cpu) rcu_preempt_note_context_switch(); } -extern void rcu_check_callbacks(int cpu, int user); - /* * Return the number of grace periods. */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 54a20c11f98d..95518e628794 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -30,59 +30,23 @@ #ifndef __LINUX_RCUTREE_H #define __LINUX_RCUTREE_H -struct notifier_block; - -extern void rcu_sched_qs(int cpu); -extern void rcu_bh_qs(int cpu); extern void rcu_note_context_switch(int cpu); extern int rcu_needs_cpu(int cpu); extern void rcu_cpu_stall_reset(void); #ifdef CONFIG_TREE_PREEMPT_RCU -extern void __rcu_read_lock(void); -extern void __rcu_read_unlock(void); -extern void synchronize_rcu(void); extern void exit_rcu(void); #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ -static inline void __rcu_read_lock(void) -{ - preempt_disable(); -} - -static inline void __rcu_read_unlock(void) -{ - preempt_enable(); -} - -#define synchronize_rcu synchronize_sched - static inline void exit_rcu(void) { } -static inline int rcu_preempt_depth(void) -{ - return 0; -} - #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ -static inline void __rcu_read_lock_bh(void) -{ - local_bh_disable(); -} -static inline void __rcu_read_unlock_bh(void) -{ - local_bh_enable(); -} - -extern void call_rcu_sched(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)); extern void synchronize_rcu_bh(void); -extern void synchronize_sched(void); extern void synchronize_rcu_expedited(void); static inline void synchronize_rcu_bh_expedited(void) @@ -92,8 +56,6 @@ static inline void synchronize_rcu_bh_expedited(void) extern void rcu_barrier(void); -extern void rcu_check_callbacks(int cpu, int user); - extern long rcu_batches_completed(void); extern long rcu_batches_completed_bh(void); extern long rcu_batches_completed_sched(void); @@ -101,18 +63,6 @@ extern void rcu_force_quiescent_state(void); extern void rcu_bh_force_quiescent_state(void); extern void rcu_sched_force_quiescent_state(void); -#ifdef CONFIG_NO_HZ -void rcu_enter_nohz(void); -void rcu_exit_nohz(void); -#else /* CONFIG_NO_HZ */ -static inline void rcu_enter_nohz(void) -{ -} -static inline void rcu_exit_nohz(void) -{ -} -#endif /* CONFIG_NO_HZ */ - /* A context switch is a grace period for RCU-sched and RCU-bh. */ static inline int rcu_blocking_is_gp(void) { diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 561410f70d4a..87f60f06b18e 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -938,15 +938,6 @@ static void rcu_preempt_process_callbacks(void) { } -/* - * In classic RCU, call_rcu() is just call_rcu_sched(). - */ -void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) -{ - call_rcu_sched(head, func); -} -EXPORT_SYMBOL_GPL(call_rcu); - /* * Wait for an rcu-preempt grace period, but make it happen quickly. * But because preemptable RCU does not exist, map to rcu-sched. -- cgit v1.2.3 From 65e6bf484c497f02d47a0faae69ee398cd59cfda Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 19 Aug 2010 21:43:09 -0700 Subject: rcu: add comment stating that list_empty() applies to RCU-protected lists Because list_empty() does not dereference any RCU-protected pointers, and further does not pass such pointers to the caller (so that the caller does not dereference them either), it is safe to use list_empty() on RCU-protected lists. There is no need for a list_empty_rcu(). This commit adds a comment stating this explicitly. Requested-by: Andrew Morton Signed-off-by: Paul E. McKenney --- include/linux/rculist.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index c10b1050dbe6..f31ef61f1c65 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -9,6 +9,15 @@ #include #include +/* + * Why is there no list_empty_rcu()? Because list_empty() serves this + * purpose. The list_empty() function fetches the RCU-protected pointer + * and compares it to the address of the list head, but neither dereferences + * this pointer itself nor provides this pointer to the caller. Therefore, + * it is not necessary to use rcu_dereference(), so that list_empty() can + * be used anywhere you would want to use a list_empty_rcu(). + */ + /* * return the ->next pointer of a list_head in an rcu safe * way, we must not access it directly -- cgit v1.2.3 From 00959ade36acadc00e757f87060bf6e4501d545f Mon Sep 17 00:00:00 2001 From: Dmitry Kozlov Date: Sat, 21 Aug 2010 23:05:39 -0700 Subject: PPTP: PPP over IPv4 (Point-to-Point Tunneling Protocol) PPP: introduce "pptp" module which implements point-to-point tunneling protocol using pppox framework NET: introduce the "gre" module for demultiplexing GRE packets on version criteria (required to pptp and ip_gre may coexists) NET: ip_gre: update to use the "gre" module This patch introduces then pptp support to the linux kernel which dramatically speeds up pptp vpn connections and decreases cpu usage in comparison of existing user-space implementation (poptop/pptpclient). There is accel-pptp project (https://sourceforge.net/projects/accel-pptp/) to utilize this module, it contains plugin for pppd to use pptp in client-mode and modified pptpd (poptop) to build high-performance pptp NAS. There was many changes from initial submitted patch, most important are: 1. using rcu instead of read-write locks 2. using static bitmap instead of dynamically allocated 3. using vmalloc for memory allocation instead of BITS_PER_LONG + __get_free_pages 4. fixed many coding style issues Thanks to Eric Dumazet. Signed-off-by: Dmitry Kozlov Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- MAINTAINERS | 14 + drivers/net/Kconfig | 11 + drivers/net/Makefile | 1 + drivers/net/pptp.c | 726 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/if_pppox.h | 52 ++-- include/net/gre.h | 18 ++ net/ipv4/Kconfig | 7 + net/ipv4/Makefile | 1 + net/ipv4/gre.c | 151 ++++++++++ net/ipv4/ip_gre.c | 14 +- 10 files changed, 971 insertions(+), 24 deletions(-) create mode 100644 drivers/net/pptp.c create mode 100644 include/net/gre.h create mode 100644 net/ipv4/gre.c (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index b5b8baa1d70e..43c9efcd8e10 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6528,6 +6528,20 @@ M: "Maciej W. Rozycki" S: Maintained F: drivers/serial/zs.* +GRE DEMULTIPLEXER DRIVER +M: Dmitry Kozlov +L: netdev@vger.kernel.org +S: Maintained +F: net/ipv4/gre.c +F: include/net/gre.h + +PPTP DRIVER +M: Dmitry Kozlov +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/pptp.c +W: http://sourceforge.net/projects/accel-pptp + THE REST M: Linus Torvalds L: linux-kernel@vger.kernel.org diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 5a6895320b48..9b2a72089a68 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -3192,6 +3192,17 @@ config PPPOE which contains instruction on how to use this driver (under the heading "Kernel mode PPPoE"). +config PPTP + tristate "PPP over IPv4 (PPTP) (EXPERIMENTAL)" + depends on EXPERIMENTAL && PPP && NET_IPGRE_DEMUX + help + Support for PPP over IPv4.(Point-to-Point Tunneling Protocol) + + This driver requires pppd plugin to work in client mode or + modified pptpd (poptop) to work in server mode. + See http://accel-pptp.sourceforge.net/ for information how to + utilize this module. + config PPPOATM tristate "PPP over ATM" depends on ATM && PPP diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 56e8c27f77ce..0b371083d481 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -162,6 +162,7 @@ obj-$(CONFIG_PPP_BSDCOMP) += bsd_comp.o obj-$(CONFIG_PPP_MPPE) += ppp_mppe.o obj-$(CONFIG_PPPOE) += pppox.o pppoe.o obj-$(CONFIG_PPPOL2TP) += pppox.o +obj-$(CONFIG_PPTP) += pppox.o pptp.o obj-$(CONFIG_SLIP) += slip.o obj-$(CONFIG_SLHC) += slhc.o diff --git a/drivers/net/pptp.c b/drivers/net/pptp.c new file mode 100644 index 000000000000..761f0eced724 --- /dev/null +++ b/drivers/net/pptp.c @@ -0,0 +1,726 @@ +/* + * Point-to-Point Tunneling Protocol for Linux + * + * Authors: Dmitry Kozlov + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#define PPTP_DRIVER_VERSION "0.8.5" + +#define MAX_CALLID 65535 + +static DECLARE_BITMAP(callid_bitmap, MAX_CALLID + 1); +static struct pppox_sock **callid_sock; + +static DEFINE_SPINLOCK(chan_lock); + +static struct proto pptp_sk_proto __read_mostly; +static struct ppp_channel_ops pptp_chan_ops; +static const struct proto_ops pptp_ops; + +#define PPP_LCP_ECHOREQ 0x09 +#define PPP_LCP_ECHOREP 0x0A +#define SC_RCV_BITS (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP) + +#define MISSING_WINDOW 20 +#define WRAPPED(curseq, lastseq)\ + ((((curseq) & 0xffffff00) == 0) &&\ + (((lastseq) & 0xffffff00) == 0xffffff00)) + +#define PPTP_GRE_PROTO 0x880B +#define PPTP_GRE_VER 0x1 + +#define PPTP_GRE_FLAG_C 0x80 +#define PPTP_GRE_FLAG_R 0x40 +#define PPTP_GRE_FLAG_K 0x20 +#define PPTP_GRE_FLAG_S 0x10 +#define PPTP_GRE_FLAG_A 0x80 + +#define PPTP_GRE_IS_C(f) ((f)&PPTP_GRE_FLAG_C) +#define PPTP_GRE_IS_R(f) ((f)&PPTP_GRE_FLAG_R) +#define PPTP_GRE_IS_K(f) ((f)&PPTP_GRE_FLAG_K) +#define PPTP_GRE_IS_S(f) ((f)&PPTP_GRE_FLAG_S) +#define PPTP_GRE_IS_A(f) ((f)&PPTP_GRE_FLAG_A) + +#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header)) +struct pptp_gre_header { + u8 flags; + u8 ver; + u16 protocol; + u16 payload_len; + u16 call_id; + u32 seq; + u32 ack; +} __packed; + +static struct pppox_sock *lookup_chan(u16 call_id, __be32 s_addr) +{ + struct pppox_sock *sock; + struct pptp_opt *opt; + + rcu_read_lock(); + sock = rcu_dereference(callid_sock[call_id]); + if (sock) { + opt = &sock->proto.pptp; + if (opt->dst_addr.sin_addr.s_addr != s_addr) + sock = NULL; + else + sock_hold(sk_pppox(sock)); + } + rcu_read_unlock(); + + return sock; +} + +static int lookup_chan_dst(u16 call_id, __be32 d_addr) +{ + struct pppox_sock *sock; + struct pptp_opt *opt; + int i; + + rcu_read_lock(); + for (i = find_next_bit(callid_bitmap, MAX_CALLID, 1); i < MAX_CALLID; + i = find_next_bit(callid_bitmap, MAX_CALLID, i + 1)) { + sock = rcu_dereference(callid_sock[i]); + if (!sock) + continue; + opt = &sock->proto.pptp; + if (opt->dst_addr.call_id == call_id && + opt->dst_addr.sin_addr.s_addr == d_addr) + break; + } + rcu_read_unlock(); + + return i < MAX_CALLID; +} + +static int add_chan(struct pppox_sock *sock) +{ + static int call_id; + + spin_lock(&chan_lock); + if (!sock->proto.pptp.src_addr.call_id) { + call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, call_id + 1); + if (call_id == MAX_CALLID) { + call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, 1); + if (call_id == MAX_CALLID) + goto out_err; + } + sock->proto.pptp.src_addr.call_id = call_id; + } else if (test_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap)) + goto out_err; + + set_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap); + rcu_assign_pointer(callid_sock[sock->proto.pptp.src_addr.call_id], sock); + spin_unlock(&chan_lock); + + return 0; + +out_err: + spin_unlock(&chan_lock); + return -1; +} + +static void del_chan(struct pppox_sock *sock) +{ + spin_lock(&chan_lock); + clear_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap); + rcu_assign_pointer(callid_sock[sock->proto.pptp.src_addr.call_id], NULL); + spin_unlock(&chan_lock); + synchronize_rcu(); +} + +static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) +{ + struct sock *sk = (struct sock *) chan->private; + struct pppox_sock *po = pppox_sk(sk); + struct pptp_opt *opt = &po->proto.pptp; + struct pptp_gre_header *hdr; + unsigned int header_len = sizeof(*hdr); + int err = 0; + int islcp; + int len; + unsigned char *data; + __u32 seq_recv; + + + struct rtable *rt; + struct net_device *tdev; + struct iphdr *iph; + int max_headroom; + + if (sk_pppox(po)->sk_state & PPPOX_DEAD) + goto tx_error; + + { + struct flowi fl = { .oif = 0, + .nl_u = { + .ip4_u = { + .daddr = opt->dst_addr.sin_addr.s_addr, + .saddr = opt->src_addr.sin_addr.s_addr, + .tos = RT_TOS(0) } }, + .proto = IPPROTO_GRE }; + err = ip_route_output_key(&init_net, &rt, &fl); + if (err) + goto tx_error; + } + tdev = rt->dst.dev; + + max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(*iph) + sizeof(*hdr) + 2; + + if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { + struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); + if (!new_skb) { + ip_rt_put(rt); + goto tx_error; + } + if (skb->sk) + skb_set_owner_w(new_skb, skb->sk); + kfree_skb(skb); + skb = new_skb; + } + + data = skb->data; + islcp = ((data[0] << 8) + data[1]) == PPP_LCP && 1 <= data[2] && data[2] <= 7; + + /* compress protocol field */ + if ((opt->ppp_flags & SC_COMP_PROT) && data[0] == 0 && !islcp) + skb_pull(skb, 1); + + /* Put in the address/control bytes if necessary */ + if ((opt->ppp_flags & SC_COMP_AC) == 0 || islcp) { + data = skb_push(skb, 2); + data[0] = PPP_ALLSTATIONS; + data[1] = PPP_UI; + } + + len = skb->len; + + seq_recv = opt->seq_recv; + + if (opt->ack_sent == seq_recv) + header_len -= sizeof(hdr->ack); + + /* Push down and install GRE header */ + skb_push(skb, header_len); + hdr = (struct pptp_gre_header *)(skb->data); + + hdr->flags = PPTP_GRE_FLAG_K; + hdr->ver = PPTP_GRE_VER; + hdr->protocol = htons(PPTP_GRE_PROTO); + hdr->call_id = htons(opt->dst_addr.call_id); + + hdr->flags |= PPTP_GRE_FLAG_S; + hdr->seq = htonl(++opt->seq_sent); + if (opt->ack_sent != seq_recv) { + /* send ack with this message */ + hdr->ver |= PPTP_GRE_FLAG_A; + hdr->ack = htonl(seq_recv); + opt->ack_sent = seq_recv; + } + hdr->payload_len = htons(len); + + /* Push down and install the IP header. */ + + skb_reset_transport_header(skb); + skb_push(skb, sizeof(*iph)); + skb_reset_network_header(skb); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); + + iph = ip_hdr(skb); + iph->version = 4; + iph->ihl = sizeof(struct iphdr) >> 2; + if (ip_dont_fragment(sk, &rt->dst)) + iph->frag_off = htons(IP_DF); + else + iph->frag_off = 0; + iph->protocol = IPPROTO_GRE; + iph->tos = 0; + iph->daddr = rt->rt_dst; + iph->saddr = rt->rt_src; + iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT); + iph->tot_len = htons(skb->len); + + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + + nf_reset(skb); + + skb->ip_summed = CHECKSUM_NONE; + ip_select_ident(iph, &rt->dst, NULL); + ip_send_check(iph); + + ip_local_out(skb); + +tx_error: + return 1; +} + +static int pptp_rcv_core(struct sock *sk, struct sk_buff *skb) +{ + struct pppox_sock *po = pppox_sk(sk); + struct pptp_opt *opt = &po->proto.pptp; + int headersize, payload_len, seq; + __u8 *payload; + struct pptp_gre_header *header; + + if (!(sk->sk_state & PPPOX_CONNECTED)) { + if (sock_queue_rcv_skb(sk, skb)) + goto drop; + return NET_RX_SUCCESS; + } + + header = (struct pptp_gre_header *)(skb->data); + + /* test if acknowledgement present */ + if (PPTP_GRE_IS_A(header->ver)) { + __u32 ack = (PPTP_GRE_IS_S(header->flags)) ? + header->ack : header->seq; /* ack in different place if S = 0 */ + + ack = ntohl(ack); + + if (ack > opt->ack_recv) + opt->ack_recv = ack; + /* also handle sequence number wrap-around */ + if (WRAPPED(ack, opt->ack_recv)) + opt->ack_recv = ack; + } + + /* test if payload present */ + if (!PPTP_GRE_IS_S(header->flags)) + goto drop; + + headersize = sizeof(*header); + payload_len = ntohs(header->payload_len); + seq = ntohl(header->seq); + + /* no ack present? */ + if (!PPTP_GRE_IS_A(header->ver)) + headersize -= sizeof(header->ack); + /* check for incomplete packet (length smaller than expected) */ + if (skb->len - headersize < payload_len) + goto drop; + + payload = skb->data + headersize; + /* check for expected sequence number */ + if (seq < opt->seq_recv + 1 || WRAPPED(opt->seq_recv, seq)) { + if ((payload[0] == PPP_ALLSTATIONS) && (payload[1] == PPP_UI) && + (PPP_PROTOCOL(payload) == PPP_LCP) && + ((payload[4] == PPP_LCP_ECHOREQ) || (payload[4] == PPP_LCP_ECHOREP))) + goto allow_packet; + } else { + opt->seq_recv = seq; +allow_packet: + skb_pull(skb, headersize); + + if (payload[0] == PPP_ALLSTATIONS && payload[1] == PPP_UI) { + /* chop off address/control */ + if (skb->len < 3) + goto drop; + skb_pull(skb, 2); + } + + if ((*skb->data) & 1) { + /* protocol is compressed */ + skb_push(skb, 1)[0] = 0; + } + + skb->ip_summed = CHECKSUM_NONE; + skb_set_network_header(skb, skb->head-skb->data); + ppp_input(&po->chan, skb); + + return NET_RX_SUCCESS; + } +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +static int pptp_rcv(struct sk_buff *skb) +{ + struct pppox_sock *po; + struct pptp_gre_header *header; + struct iphdr *iph; + + if (skb->pkt_type != PACKET_HOST) + goto drop; + + if (!pskb_may_pull(skb, 12)) + goto drop; + + iph = ip_hdr(skb); + + header = (struct pptp_gre_header *)skb->data; + + if (ntohs(header->protocol) != PPTP_GRE_PROTO || /* PPTP-GRE protocol for PPTP */ + PPTP_GRE_IS_C(header->flags) || /* flag C should be clear */ + PPTP_GRE_IS_R(header->flags) || /* flag R should be clear */ + !PPTP_GRE_IS_K(header->flags) || /* flag K should be set */ + (header->flags&0xF) != 0) /* routing and recursion ctrl = 0 */ + /* if invalid, discard this packet */ + goto drop; + + po = lookup_chan(htons(header->call_id), iph->saddr); + if (po) { + skb_dst_drop(skb); + nf_reset(skb); + return sk_receive_skb(sk_pppox(po), skb, 0); + } +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +static int pptp_bind(struct socket *sock, struct sockaddr *uservaddr, + int sockaddr_len) +{ + struct sock *sk = sock->sk; + struct sockaddr_pppox *sp = (struct sockaddr_pppox *) uservaddr; + struct pppox_sock *po = pppox_sk(sk); + struct pptp_opt *opt = &po->proto.pptp; + int error = 0; + + lock_sock(sk); + + opt->src_addr = sp->sa_addr.pptp; + if (add_chan(po)) { + release_sock(sk); + error = -EBUSY; + } + + release_sock(sk); + return error; +} + +static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr, + int sockaddr_len, int flags) +{ + struct sock *sk = sock->sk; + struct sockaddr_pppox *sp = (struct sockaddr_pppox *) uservaddr; + struct pppox_sock *po = pppox_sk(sk); + struct pptp_opt *opt = &po->proto.pptp; + struct rtable *rt; + int error = 0; + + if (sp->sa_protocol != PX_PROTO_PPTP) + return -EINVAL; + + if (lookup_chan_dst(sp->sa_addr.pptp.call_id, sp->sa_addr.pptp.sin_addr.s_addr)) + return -EALREADY; + + lock_sock(sk); + /* Check for already bound sockets */ + if (sk->sk_state & PPPOX_CONNECTED) { + error = -EBUSY; + goto end; + } + + /* Check for already disconnected sockets, on attempts to disconnect */ + if (sk->sk_state & PPPOX_DEAD) { + error = -EALREADY; + goto end; + } + + if (!opt->src_addr.sin_addr.s_addr || !sp->sa_addr.pptp.sin_addr.s_addr) { + error = -EINVAL; + goto end; + } + + po->chan.private = sk; + po->chan.ops = &pptp_chan_ops; + + { + struct flowi fl = { + .nl_u = { + .ip4_u = { + .daddr = opt->dst_addr.sin_addr.s_addr, + .saddr = opt->src_addr.sin_addr.s_addr, + .tos = RT_CONN_FLAGS(sk) } }, + .proto = IPPROTO_GRE }; + security_sk_classify_flow(sk, &fl); + if (ip_route_output_key(&init_net, &rt, &fl)) { + error = -EHOSTUNREACH; + goto end; + } + sk_setup_caps(sk, &rt->dst); + } + po->chan.mtu = dst_mtu(&rt->dst); + if (!po->chan.mtu) + po->chan.mtu = PPP_MTU; + ip_rt_put(rt); + po->chan.mtu -= PPTP_HEADER_OVERHEAD; + + po->chan.hdrlen = 2 + sizeof(struct pptp_gre_header); + error = ppp_register_channel(&po->chan); + if (error) { + pr_err("PPTP: failed to register PPP channel (%d)\n", error); + goto end; + } + + opt->dst_addr = sp->sa_addr.pptp; + sk->sk_state = PPPOX_CONNECTED; + + end: + release_sock(sk); + return error; +} + +static int pptp_getname(struct socket *sock, struct sockaddr *uaddr, + int *usockaddr_len, int peer) +{ + int len = sizeof(struct sockaddr_pppox); + struct sockaddr_pppox sp; + + sp.sa_family = AF_PPPOX; + sp.sa_protocol = PX_PROTO_PPTP; + sp.sa_addr.pptp = pppox_sk(sock->sk)->proto.pptp.src_addr; + + memcpy(uaddr, &sp, len); + + *usockaddr_len = len; + + return 0; +} + +static int pptp_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + struct pppox_sock *po; + struct pptp_opt *opt; + int error = 0; + + if (!sk) + return 0; + + lock_sock(sk); + + if (sock_flag(sk, SOCK_DEAD)) { + release_sock(sk); + return -EBADF; + } + + po = pppox_sk(sk); + opt = &po->proto.pptp; + del_chan(po); + + pppox_unbind_sock(sk); + sk->sk_state = PPPOX_DEAD; + + sock_orphan(sk); + sock->sk = NULL; + + release_sock(sk); + sock_put(sk); + + return error; +} + +static void pptp_sock_destruct(struct sock *sk) +{ + if (!(sk->sk_state & PPPOX_DEAD)) { + del_chan(pppox_sk(sk)); + pppox_unbind_sock(sk); + } + skb_queue_purge(&sk->sk_receive_queue); +} + +static int pptp_create(struct net *net, struct socket *sock) +{ + int error = -ENOMEM; + struct sock *sk; + struct pppox_sock *po; + struct pptp_opt *opt; + + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pptp_sk_proto); + if (!sk) + goto out; + + sock_init_data(sock, sk); + + sock->state = SS_UNCONNECTED; + sock->ops = &pptp_ops; + + sk->sk_backlog_rcv = pptp_rcv_core; + sk->sk_state = PPPOX_NONE; + sk->sk_type = SOCK_STREAM; + sk->sk_family = PF_PPPOX; + sk->sk_protocol = PX_PROTO_PPTP; + sk->sk_destruct = pptp_sock_destruct; + + po = pppox_sk(sk); + opt = &po->proto.pptp; + + opt->seq_sent = 0; opt->seq_recv = 0; + opt->ack_recv = 0; opt->ack_sent = 0; + + error = 0; +out: + return error; +} + +static int pptp_ppp_ioctl(struct ppp_channel *chan, unsigned int cmd, + unsigned long arg) +{ + struct sock *sk = (struct sock *) chan->private; + struct pppox_sock *po = pppox_sk(sk); + struct pptp_opt *opt = &po->proto.pptp; + void __user *argp = (void __user *)arg; + int __user *p = argp; + int err, val; + + err = -EFAULT; + switch (cmd) { + case PPPIOCGFLAGS: + val = opt->ppp_flags; + if (put_user(val, p)) + break; + err = 0; + break; + case PPPIOCSFLAGS: + if (get_user(val, p)) + break; + opt->ppp_flags = val & ~SC_RCV_BITS; + err = 0; + break; + default: + err = -ENOTTY; + } + + return err; +} + +static struct ppp_channel_ops pptp_chan_ops = { + .start_xmit = pptp_xmit, + .ioctl = pptp_ppp_ioctl, +}; + +static struct proto pptp_sk_proto __read_mostly = { + .name = "PPTP", + .owner = THIS_MODULE, + .obj_size = sizeof(struct pppox_sock), +}; + +static const struct proto_ops pptp_ops = { + .family = AF_PPPOX, + .owner = THIS_MODULE, + .release = pptp_release, + .bind = pptp_bind, + .connect = pptp_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = pptp_getname, + .poll = sock_no_poll, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = sock_no_sendmsg, + .recvmsg = sock_no_recvmsg, + .mmap = sock_no_mmap, + .ioctl = pppox_ioctl, +}; + +static struct pppox_proto pppox_pptp_proto = { + .create = pptp_create, + .owner = THIS_MODULE, +}; + +static struct gre_protocol gre_pptp_protocol = { + .handler = pptp_rcv, +}; + +static int __init pptp_init_module(void) +{ + int err = 0; + pr_info("PPTP driver version " PPTP_DRIVER_VERSION "\n"); + + callid_sock = __vmalloc((MAX_CALLID + 1) * sizeof(void *), + GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); + if (!callid_sock) { + pr_err("PPTP: cann't allocate memory\n"); + return -ENOMEM; + } + + err = gre_add_protocol(&gre_pptp_protocol, GREPROTO_PPTP); + if (err) { + pr_err("PPTP: can't add gre protocol\n"); + goto out_mem_free; + } + + err = proto_register(&pptp_sk_proto, 0); + if (err) { + pr_err("PPTP: can't register sk_proto\n"); + goto out_gre_del_protocol; + } + + err = register_pppox_proto(PX_PROTO_PPTP, &pppox_pptp_proto); + if (err) { + pr_err("PPTP: can't register pppox_proto\n"); + goto out_unregister_sk_proto; + } + + return 0; + +out_unregister_sk_proto: + proto_unregister(&pptp_sk_proto); +out_gre_del_protocol: + gre_del_protocol(&gre_pptp_protocol, GREPROTO_PPTP); +out_mem_free: + vfree(callid_sock); + + return err; +} + +static void __exit pptp_exit_module(void) +{ + unregister_pppox_proto(PX_PROTO_PPTP); + proto_unregister(&pptp_sk_proto); + gre_del_protocol(&gre_pptp_protocol, GREPROTO_PPTP); + vfree(callid_sock); +} + +module_init(pptp_init_module); +module_exit(pptp_exit_module); + +MODULE_DESCRIPTION("Point-to-Point Tunneling Protocol"); +MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 1925e0c3f162..1525b2156b2a 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -40,25 +40,35 @@ * PPPoE addressing definition */ typedef __be16 sid_t; -struct pppoe_addr{ - sid_t sid; /* Session identifier */ - unsigned char remote[ETH_ALEN]; /* Remote address */ - char dev[IFNAMSIZ]; /* Local device to use */ +struct pppoe_addr { + sid_t sid; /* Session identifier */ + unsigned char remote[ETH_ALEN]; /* Remote address */ + char dev[IFNAMSIZ]; /* Local device to use */ }; /************************************************************************ - * Protocols supported by AF_PPPOX - */ + * PPTP addressing definition + */ +struct pptp_addr { + u16 call_id; + struct in_addr sin_addr; +}; + +/************************************************************************ + * Protocols supported by AF_PPPOX + */ #define PX_PROTO_OE 0 /* Currently just PPPoE */ #define PX_PROTO_OL2TP 1 /* Now L2TP also */ -#define PX_MAX_PROTO 2 - -struct sockaddr_pppox { - sa_family_t sa_family; /* address family, AF_PPPOX */ - unsigned int sa_protocol; /* protocol identifier */ - union{ - struct pppoe_addr pppoe; - }sa_addr; +#define PX_PROTO_PPTP 2 +#define PX_MAX_PROTO 3 + +struct sockaddr_pppox { + sa_family_t sa_family; /* address family, AF_PPPOX */ + unsigned int sa_protocol; /* protocol identifier */ + union { + struct pppoe_addr pppoe; + struct pptp_addr pptp; + } sa_addr; } __packed; /* The use of the above union isn't viable because the size of this @@ -101,7 +111,7 @@ struct pppoe_tag { __be16 tag_type; __be16 tag_len; char tag_data[0]; -} __attribute ((packed)); +} __packed; /* Tag identifiers */ #define PTT_EOL __cpu_to_be16(0x0000) @@ -150,15 +160,23 @@ struct pppoe_opt { relayed to (PPPoE relaying) */ }; +struct pptp_opt { + struct pptp_addr src_addr; + struct pptp_addr dst_addr; + u32 ack_sent, ack_recv; + u32 seq_sent, seq_recv; + int ppp_flags; +}; #include struct pppox_sock { /* struct sock must be the first member of pppox_sock */ - struct sock sk; - struct ppp_channel chan; + struct sock sk; + struct ppp_channel chan; struct pppox_sock *next; /* for hash table */ union { struct pppoe_opt pppoe; + struct pptp_opt pptp; } proto; __be16 num; }; diff --git a/include/net/gre.h b/include/net/gre.h new file mode 100644 index 000000000000..82665474bcb7 --- /dev/null +++ b/include/net/gre.h @@ -0,0 +1,18 @@ +#ifndef __LINUX_GRE_H +#define __LINUX_GRE_H + +#include + +#define GREPROTO_CISCO 0 +#define GREPROTO_PPTP 1 +#define GREPROTO_MAX 2 + +struct gre_protocol { + int (*handler)(struct sk_buff *skb); + void (*err_handler)(struct sk_buff *skb, u32 info); +}; + +int gre_add_protocol(const struct gre_protocol *proto, u8 version); +int gre_del_protocol(const struct gre_protocol *proto, u8 version); + +#endif diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 7c3a7d191249..7458bdae7e9f 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -215,8 +215,15 @@ config NET_IPIP be inserted in and removed from the running kernel whenever you want). Most people won't need this and can say N. +config NET_IPGRE_DEMUX + tristate "IP: GRE demultiplexer" + help + This is helper module to demultiplex GRE packets on GRE version field criteria. + Required by ip_gre and pptp modules. + config NET_IPGRE tristate "IP: GRE tunnels over IP" + depends on NET_IPGRE_DEMUX help Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 80ff87ce43aa..4978d22f9a75 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o obj-$(CONFIG_IP_MROUTE) += ipmr.o obj-$(CONFIG_NET_IPIP) += ipip.o +obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o obj-$(CONFIG_NET_IPGRE) += ip_gre.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_INET_AH) += ah4.o diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c new file mode 100644 index 000000000000..b546736da2e1 --- /dev/null +++ b/net/ipv4/gre.c @@ -0,0 +1,151 @@ +/* + * GRE over IPv4 demultiplexer driver + * + * Authors: Dmitry Kozlov (xeb@mail.ru) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +const struct gre_protocol *gre_proto[GREPROTO_MAX] __read_mostly; +static DEFINE_SPINLOCK(gre_proto_lock); + +int gre_add_protocol(const struct gre_protocol *proto, u8 version) +{ + if (version >= GREPROTO_MAX) + goto err_out; + + spin_lock(&gre_proto_lock); + if (gre_proto[version]) + goto err_out_unlock; + + rcu_assign_pointer(gre_proto[version], proto); + spin_unlock(&gre_proto_lock); + return 0; + +err_out_unlock: + spin_unlock(&gre_proto_lock); +err_out: + return -1; +} +EXPORT_SYMBOL_GPL(gre_add_protocol); + +int gre_del_protocol(const struct gre_protocol *proto, u8 version) +{ + if (version >= GREPROTO_MAX) + goto err_out; + + spin_lock(&gre_proto_lock); + if (gre_proto[version] != proto) + goto err_out_unlock; + rcu_assign_pointer(gre_proto[version], NULL); + spin_unlock(&gre_proto_lock); + synchronize_rcu(); + return 0; + +err_out_unlock: + spin_unlock(&gre_proto_lock); +err_out: + return -1; +} +EXPORT_SYMBOL_GPL(gre_del_protocol); + +static int gre_rcv(struct sk_buff *skb) +{ + const struct gre_protocol *proto; + u8 ver; + int ret; + + if (!pskb_may_pull(skb, 12)) + goto drop; + + ver = skb->data[1]&0x7f; + if (ver >= GREPROTO_MAX) + goto drop; + + rcu_read_lock(); + proto = rcu_dereference(gre_proto[ver]); + if (!proto || !proto->handler) + goto drop_unlock; + ret = proto->handler(skb); + rcu_read_unlock(); + return ret; + +drop_unlock: + rcu_read_unlock(); +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +static void gre_err(struct sk_buff *skb, u32 info) +{ + const struct gre_protocol *proto; + u8 ver; + + if (!pskb_may_pull(skb, 12)) + goto drop; + + ver = skb->data[1]&0x7f; + if (ver >= GREPROTO_MAX) + goto drop; + + rcu_read_lock(); + proto = rcu_dereference(gre_proto[ver]); + if (!proto || !proto->err_handler) + goto drop_unlock; + proto->err_handler(skb, info); + rcu_read_unlock(); + return; + +drop_unlock: + rcu_read_unlock(); +drop: + kfree_skb(skb); +} + +static const struct net_protocol net_gre_protocol = { + .handler = gre_rcv, + .err_handler = gre_err, + .netns_ok = 1, +}; + +static int __init gre_init(void) +{ + pr_info("GRE over IPv4 demultiplexor driver"); + + if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) { + pr_err("gre: can't add protocol\n"); + return -EAGAIN; + } + + return 0; +} + +static void __exit gre_exit(void) +{ + inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); +} + +module_init(gre_init); +module_exit(gre_exit); + +MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver"); +MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); +MODULE_LICENSE("GPL"); + diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 945b20a5ad50..85176895495a 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -44,6 +44,7 @@ #include #include #include +#include #ifdef CONFIG_IPV6 #include @@ -1278,10 +1279,9 @@ static void ipgre_fb_tunnel_init(struct net_device *dev) } -static const struct net_protocol ipgre_protocol = { - .handler = ipgre_rcv, - .err_handler = ipgre_err, - .netns_ok = 1, +static const struct gre_protocol ipgre_protocol = { + .handler = ipgre_rcv, + .err_handler = ipgre_err, }; static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) @@ -1663,7 +1663,7 @@ static int __init ipgre_init(void) if (err < 0) return err; - err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE); + err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); if (err < 0) { printk(KERN_INFO "ipgre init: can't add protocol\n"); goto add_proto_failed; @@ -1683,7 +1683,7 @@ out: tap_ops_failed: rtnl_link_unregister(&ipgre_link_ops); rtnl_link_failed: - inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); + gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); add_proto_failed: unregister_pernet_device(&ipgre_net_ops); goto out; @@ -1693,7 +1693,7 @@ static void __exit ipgre_fini(void) { rtnl_link_unregister(&ipgre_tap_ops); rtnl_link_unregister(&ipgre_link_ops); - if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) + if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) printk(KERN_INFO "ipgre close: can't remove protocol\n"); unregister_pernet_device(&ipgre_net_ops); } -- cgit v1.2.3 From 81ce790bd75d49a0d119f5d7b27405e1d9b1bd57 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Aug 2010 23:51:33 +0000 Subject: irda: use net_device_stats from struct net_device struct net_device has its own struct net_device_stats member, so use this one instead of a private copy in the irlan_cb struct. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/irda/irlan_common.h | 1 - net/irda/irlan/irlan_eth.c | 32 +++++++++----------------------- 2 files changed, 9 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/net/irda/irlan_common.h b/include/net/irda/irlan_common.h index 73cacb3ac16c..0af8b8dfbc22 100644 --- a/include/net/irda/irlan_common.h +++ b/include/net/irda/irlan_common.h @@ -171,7 +171,6 @@ struct irlan_cb { int magic; struct list_head dev_list; struct net_device *dev; /* Ethernet device structure*/ - struct net_device_stats stats; __u32 saddr; /* Source device address */ __u32 daddr; /* Destination device address */ diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index 5bb8353105cc..8ee1ff6c742f 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -45,13 +45,11 @@ static int irlan_eth_close(struct net_device *dev); static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, struct net_device *dev); static void irlan_eth_set_multicast_list( struct net_device *dev); -static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev); static const struct net_device_ops irlan_eth_netdev_ops = { .ndo_open = irlan_eth_open, .ndo_stop = irlan_eth_close, .ndo_start_xmit = irlan_eth_xmit, - .ndo_get_stats = irlan_eth_get_stats, .ndo_set_multicast_list = irlan_eth_set_multicast_list, .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, @@ -208,10 +206,10 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, * tried :-) DB */ /* irttp_data_request already free the packet */ - self->stats.tx_dropped++; + dev->stats.tx_dropped++; } else { - self->stats.tx_packets++; - self->stats.tx_bytes += len; + dev->stats.tx_packets++; + dev->stats.tx_bytes += len; } return NETDEV_TX_OK; @@ -226,15 +224,16 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb) { struct irlan_cb *self = instance; + struct net_device *dev = self->dev; if (skb == NULL) { - ++self->stats.rx_dropped; + dev->stats.rx_dropped++; return 0; } if (skb->len < ETH_HLEN) { IRDA_DEBUG(0, "%s() : IrLAN frame too short (%d)\n", __func__, skb->len); - ++self->stats.rx_dropped; + dev->stats.rx_dropped++; dev_kfree_skb(skb); return 0; } @@ -244,10 +243,10 @@ int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb) * might have been previously set by the low level IrDA network * device driver */ - skb->protocol = eth_type_trans(skb, self->dev); /* Remove eth header */ + skb->protocol = eth_type_trans(skb, dev); /* Remove eth header */ - self->stats.rx_packets++; - self->stats.rx_bytes += skb->len; + dev->stats.rx_packets++; + dev->stats.rx_bytes += skb->len; netif_rx(skb); /* Eat it! */ @@ -348,16 +347,3 @@ static void irlan_eth_set_multicast_list(struct net_device *dev) else irlan_set_broadcast_filter(self, FALSE); } - -/* - * Function irlan_get_stats (dev) - * - * Get the current statistics for this device - * - */ -static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev) -{ - struct irlan_cb *self = netdev_priv(dev); - - return &self->stats; -} -- cgit v1.2.3 From 739a91ef0625e0e4a40b835f4f891313c47915df Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sat, 21 Aug 2010 06:23:15 +0000 Subject: net_sched: cls_flow: add key rxhash We can use rxhash to classify the traffic into flows. As rxhash maybe supplied by NIC or RPS, it is cheaper. Signed-off-by: Changli Gao Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/pkt_cls.h | 1 + net/sched/cls_flow.c | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index 7f6ba8658abe..defbde203d07 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -332,6 +332,7 @@ enum { FLOW_KEY_SKUID, FLOW_KEY_SKGID, FLOW_KEY_VLAN_TAG, + FLOW_KEY_RXHASH, __FLOW_KEY_MAX, }; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index cd709f1294df..5b271a18bc3a 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -306,6 +306,11 @@ static u32 flow_get_vlan_tag(const struct sk_buff *skb) return tag & VLAN_VID_MASK; } +static u32 flow_get_rxhash(struct sk_buff *skb) +{ + return skb_get_rxhash(skb); +} + static u32 flow_key_get(struct sk_buff *skb, int key) { switch (key) { @@ -343,6 +348,8 @@ static u32 flow_key_get(struct sk_buff *skb, int key) return flow_get_skgid(skb); case FLOW_KEY_VLAN_TAG: return flow_get_vlan_tag(skb); + case FLOW_KEY_RXHASH: + return flow_get_rxhash(skb); default: WARN_ON(1); return 0; -- cgit v1.2.3 From d8287fc864643beaf1623c92aceb1ab38eae0648 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sun, 22 Aug 2010 18:37:27 -0700 Subject: net: use __be16 instead of u16 for the userspace code Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- include/linux/if_pppox.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 1525b2156b2a..770e8fa669d2 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -50,8 +50,8 @@ struct pppoe_addr { * PPTP addressing definition */ struct pptp_addr { - u16 call_id; - struct in_addr sin_addr; + __be16 call_id; + struct in_addr sin_addr; }; /************************************************************************ -- cgit v1.2.3 From 05532121da0728eaedac2a0a5c3cecad3a95d765 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sun, 22 Aug 2010 21:03:33 -0700 Subject: net: 802.1q: make vlan_hwaccel_do_receive() return void vlan_hwaccel_do_receive() always returns 0, so make it return void. Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 5 ++--- net/8021q/vlan_core.c | 3 +-- net/core/dev.c | 4 ++-- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 3d870fda8c4f..a52320751bfc 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -119,7 +119,7 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev); extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling); -extern int vlan_hwaccel_do_receive(struct sk_buff *skb); +extern void vlan_hwaccel_do_receive(struct sk_buff *skb); extern gro_result_t vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, unsigned int vlan_tci, struct sk_buff *skb); @@ -147,9 +147,8 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, return NET_XMIT_SUCCESS; } -static inline int vlan_hwaccel_do_receive(struct sk_buff *skb) +static inline void vlan_hwaccel_do_receive(struct sk_buff *skb) { - return 0; } static inline gro_result_t diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 01ddb0472f86..07eeb5b99dce 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -35,7 +35,7 @@ drop: } EXPORT_SYMBOL(__vlan_hwaccel_rx); -int vlan_hwaccel_do_receive(struct sk_buff *skb) +void vlan_hwaccel_do_receive(struct sk_buff *skb) { struct net_device *dev = skb->dev; struct vlan_rx_stats *rx_stats; @@ -69,7 +69,6 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb) break; } u64_stats_update_end(&rx_stats->syncp); - return 0; } struct net_device *vlan_dev_real_dev(const struct net_device *dev) diff --git a/net/core/dev.c b/net/core/dev.c index 7cd5237d9822..d569f88bcf80 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2841,8 +2841,8 @@ static int __netif_receive_skb(struct sk_buff *skb) if (!netdev_tstamp_prequeue) net_timestamp_check(skb); - if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) - return NET_RX_SUCCESS; + if (vlan_tx_tag_present(skb)) + vlan_hwaccel_do_receive(skb); /* if we've gotten here through NAPI, check netpoll */ if (netpoll_receive_skb(skb)) -- cgit v1.2.3 From fcb12fd2236f49aa8fdc1568ed4ebdfe4fddc6b5 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sun, 22 Aug 2010 16:41:59 +0000 Subject: net: rds: remove duplication type definitions __be* are defined in linux/types.h now, and in fact, rds.h isn't exported to user space even. Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- include/linux/rds.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/rds.h b/include/linux/rds.h index 24bce3ded9ea..7f3971d9fc5c 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -36,15 +36,6 @@ #include -/* These sparse annotated types shouldn't be in any user - * visible header file. We should clean this up rather - * than kludging around them. */ -#ifndef __KERNEL__ -#define __be16 u_int16_t -#define __be32 u_int32_t -#define __be64 u_int64_t -#endif - #define RDS_IB_ABI_VERSION 0x301 /* -- cgit v1.2.3 From 21dc330157454046dd7c494961277d76e1c957fe Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 23 Aug 2010 00:13:46 -0700 Subject: net: Rename skb_has_frags to skb_has_frag_list SKBs can be "fragmented" in two ways, via a page array (called skb_shinfo(skb)->frags[]) and via a list of SKBs (called skb_shinfo(skb)->frag_list). Since skb_has_frags() tests the latter, it's name is confusing since it sounds more like it's testing the former. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- include/linux/skbuff.h | 4 ++-- net/core/dev.c | 4 ++-- net/core/skbuff.c | 18 +++++++++--------- net/ipv4/ip_fragment.c | 2 +- net/ipv4/ip_output.c | 2 +- net/ipv6/ip6_output.c | 2 +- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- net/ipv6/reassembly.c | 2 +- 9 files changed, 19 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 46c36ffe20ee..ce2de8b64083 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2191,7 +2191,7 @@ static inline int net_gso_ok(int features, int gso_type) static inline int skb_gso_ok(struct sk_buff *skb, int features) { return net_gso_ok(features, skb_shinfo(skb)->gso_type) && - (!skb_has_frags(skb) || (features & NETIF_F_FRAGLIST)); + (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST)); } static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f067c95cf18a..f900ffcd847e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1120,7 +1120,7 @@ extern void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, int size); #define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags) -#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_has_frags(skb)) +#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_has_frag_list(skb)) #define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb)) #ifdef NET_SKBUFF_DATA_USES_OFFSET @@ -1784,7 +1784,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) skb = skb->prev) -static inline bool skb_has_frags(const struct sk_buff *skb) +static inline bool skb_has_frag_list(const struct sk_buff *skb) { return skb_shinfo(skb)->frag_list != NULL; } diff --git a/net/core/dev.c b/net/core/dev.c index d569f88bcf80..859e30ff044a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1930,7 +1930,7 @@ static inline int skb_needs_linearize(struct sk_buff *skb, struct net_device *dev) { return skb_is_nonlinear(skb) && - ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || + ((skb_has_frag_list(skb) && !(dev->features & NETIF_F_FRAGLIST)) || (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)))); } @@ -3090,7 +3090,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) goto normal; - if (skb_is_gso(skb) || skb_has_frags(skb)) + if (skb_is_gso(skb) || skb_has_frag_list(skb)) goto normal; rcu_read_lock(); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 99ef721f773d..e2535fb4985d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -340,7 +340,7 @@ static void skb_release_data(struct sk_buff *skb) put_page(skb_shinfo(skb)->frags[i].page); } - if (skb_has_frags(skb)) + if (skb_has_frag_list(skb)) skb_drop_fraglist(skb); kfree(skb->head); @@ -759,7 +759,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) skb_shinfo(n)->nr_frags = i; } - if (skb_has_frags(skb)) { + if (skb_has_frag_list(skb)) { skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; skb_clone_fraglist(n); } @@ -822,7 +822,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) get_page(skb_shinfo(skb)->frags[i].page); - if (skb_has_frags(skb)) + if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); skb_release_data(skb); @@ -1099,7 +1099,7 @@ drop_pages: for (; i < nfrags; i++) put_page(skb_shinfo(skb)->frags[i].page); - if (skb_has_frags(skb)) + if (skb_has_frag_list(skb)) skb_drop_fraglist(skb); goto done; } @@ -1194,7 +1194,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) /* Optimization: no fragments, no reasons to preestimate * size of pulled pages. Superb. */ - if (!skb_has_frags(skb)) + if (!skb_has_frag_list(skb)) goto pull_pages; /* Estimate size of pulled pages. */ @@ -2323,7 +2323,7 @@ next_skb: st->frag_data = NULL; } - if (st->root_skb == st->cur_skb && skb_has_frags(st->root_skb)) { + if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) { st->cur_skb = skb_shinfo(st->root_skb)->frag_list; st->frag_idx = 0; goto next_skb; @@ -2889,7 +2889,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) return -ENOMEM; /* Easy case. Most of packets will go this way. */ - if (!skb_has_frags(skb)) { + if (!skb_has_frag_list(skb)) { /* A little of trouble, not enough of space for trailer. * This should not happen, when stack is tuned to generate * good frames. OK, on miss we reallocate and reserve even more @@ -2924,7 +2924,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) if (skb1->next == NULL && tailbits) { if (skb_shinfo(skb1)->nr_frags || - skb_has_frags(skb1) || + skb_has_frag_list(skb1) || skb_tailroom(skb1) < tailbits) ntail = tailbits + 128; } @@ -2933,7 +2933,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) skb_cloned(skb1) || ntail || skb_shinfo(skb1)->nr_frags || - skb_has_frags(skb1)) { + skb_has_frag_list(skb1)) { struct sk_buff *skb2; /* Fuck, we are miserable poor guys... */ diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b7c41654dde5..f4dc879e258e 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -542,7 +542,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part * and the second, holding only fragments. */ - if (skb_has_frags(head)) { + if (skb_has_frag_list(head)) { struct sk_buff *clone; int i, plen = 0; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e807492f1777..6d2753c7ffdd 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -487,7 +487,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) * LATER: this step can be merged to real generation of fragments, * we can switch to copy when see the first bad fragment. */ - if (skb_has_frags(skb)) { + if (skb_has_frag_list(skb)) { struct sk_buff *frag; int first_len = skb_pagelen(skb); int truesizes = 0; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d40b330c0ee6..1838927a2243 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -637,7 +637,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) } mtu -= hlen + sizeof(struct frag_hdr); - if (skb_has_frags(skb)) { + if (skb_has_frag_list(skb)) { int first_len = skb_pagelen(skb); int truesizes = 0; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 13ef5bc05cf5..089c598773c7 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -413,7 +413,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part * and the second, holding only fragments. */ - if (skb_has_frags(head)) { + if (skb_has_frag_list(head)) { struct sk_buff *clone; int i, plen = 0; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 545c4141b755..8aea3f3f18d7 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -499,7 +499,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part * and the second, holding only fragments. */ - if (skb_has_frags(head)) { + if (skb_has_frag_list(head)) { struct sk_buff *clone; int i, plen = 0; -- cgit v1.2.3 From 61c77326d1df079f202fa79403c3ccd8c5966a81 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 16 Aug 2010 09:16:55 +0800 Subject: x86, mm: Avoid unnecessary TLB flush In x86, access and dirty bits are set automatically by CPU when CPU accesses memory. When we go into the code path of below flush_tlb_fix_spurious_fault(), we already set dirty bit for pte and don't need flush tlb. This might mean tlb entry in some CPUs hasn't dirty bit set, but this doesn't matter. When the CPUs do page write, they will automatically check the bit and no software involved. On the other hand, flush tlb in below position is harmful. Test creates CPU number of threads, each thread writes to a same but random address in same vma range and we measure the total time. Under a 4 socket system, original time is 1.96s, while with the patch, the time is 0.8s. Under a 2 socket system, there is 20% time cut too. perf shows a lot of time are taking to send ipi/handle ipi for tlb flush. Signed-off-by: Shaohua Li LKML-Reference: <20100816011655.GA362@sli10-desk.sh.intel.com> Acked-by: Suresh Siddha Cc: Andrea Archangeli Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/pgtable.h | 2 ++ include/asm-generic/pgtable.h | 4 ++++ mm/memory.c | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index a34c785c5a63..2d0a33bd2971 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -603,6 +603,8 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, pte_update(mm, addr, ptep); } +#define flush_tlb_fix_spurious_fault(vma, address) + /* * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); * diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index e2bd73e8f9c0..f4d4120e5128 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -129,6 +129,10 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres #define move_pte(pte, prot, old_addr, new_addr) (pte) #endif +#ifndef flush_tlb_fix_spurious_fault +#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address) +#endif + #ifndef pgprot_noncached #define pgprot_noncached(prot) (prot) #endif diff --git a/mm/memory.c b/mm/memory.c index 2ed2267439df..a40da6983961 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3147,7 +3147,7 @@ static inline int handle_pte_fault(struct mm_struct *mm, * with threads. */ if (flags & FAULT_FLAG_WRITE) - flush_tlb_page(vma, address); + flush_tlb_fix_spurious_fault(vma, address); } unlock: pte_unmap_unlock(pte, ptl); -- cgit v1.2.3 From 8b230ed8ec96c933047dd0625cf95f739e4939a6 Mon Sep 17 00:00:00 2001 From: Rasesh Mody Date: Mon, 23 Aug 2010 20:24:12 -0700 Subject: bna: Brocade 10Gb Ethernet device driver This is patch 1/6 which contains linux driver source for Brocade's BR1010/BR1020 10Gb CEE capable ethernet adapter. Signed-off-by: Debashis Dutt Signed-off-by: Rasesh Mody Signed-off-by: David S. Miller --- MAINTAINERS | 7 + drivers/net/Kconfig | 14 + drivers/net/Makefile | 1 + drivers/net/bna/Makefile | 11 + drivers/net/bna/bfa_cee.c | 407 ++++ drivers/net/bna/bfa_cee.h | 72 + drivers/net/bna/bfa_defs.h | 243 ++ drivers/net/bna/bfa_defs_cna.h | 223 ++ drivers/net/bna/bfa_defs_mfg_comm.h | 244 ++ drivers/net/bna/bfa_defs_status.h | 216 ++ drivers/net/bna/bfa_ioc.c | 1839 +++++++++++++++ drivers/net/bna/bfa_ioc.h | 343 +++ drivers/net/bna/bfa_ioc_ct.c | 391 ++++ drivers/net/bna/bfa_sm.h | 88 + drivers/net/bna/bfa_wc.h | 69 + drivers/net/bna/bfi.h | 392 ++++ drivers/net/bna/bfi_cna.h | 199 ++ drivers/net/bna/bfi_ctreg.h | 637 ++++++ drivers/net/bna/bfi_ll.h | 438 ++++ drivers/net/bna/bna.h | 654 ++++++ drivers/net/bna/bna_ctrl.c | 3626 ++++++++++++++++++++++++++++++ drivers/net/bna/bna_hw.h | 1491 +++++++++++++ drivers/net/bna/bna_txrx.c | 4209 +++++++++++++++++++++++++++++++++++ drivers/net/bna/bna_types.h | 1128 ++++++++++ drivers/net/bna/bnad.c | 3270 +++++++++++++++++++++++++++ drivers/net/bna/bnad.h | 334 +++ drivers/net/bna/bnad_ethtool.c | 1282 +++++++++++ drivers/net/bna/cna.h | 81 + drivers/net/bna/cna_fwimg.c | 64 + include/linux/pci_ids.h | 3 + 30 files changed, 21976 insertions(+) create mode 100644 drivers/net/bna/Makefile create mode 100644 drivers/net/bna/bfa_cee.c create mode 100644 drivers/net/bna/bfa_cee.h create mode 100644 drivers/net/bna/bfa_defs.h create mode 100644 drivers/net/bna/bfa_defs_cna.h create mode 100644 drivers/net/bna/bfa_defs_mfg_comm.h create mode 100644 drivers/net/bna/bfa_defs_status.h create mode 100644 drivers/net/bna/bfa_ioc.c create mode 100644 drivers/net/bna/bfa_ioc.h create mode 100644 drivers/net/bna/bfa_ioc_ct.c create mode 100644 drivers/net/bna/bfa_sm.h create mode 100644 drivers/net/bna/bfa_wc.h create mode 100644 drivers/net/bna/bfi.h create mode 100644 drivers/net/bna/bfi_cna.h create mode 100644 drivers/net/bna/bfi_ctreg.h create mode 100644 drivers/net/bna/bfi_ll.h create mode 100644 drivers/net/bna/bna.h create mode 100644 drivers/net/bna/bna_ctrl.c create mode 100644 drivers/net/bna/bna_hw.h create mode 100644 drivers/net/bna/bna_txrx.c create mode 100644 drivers/net/bna/bna_types.h create mode 100644 drivers/net/bna/bnad.c create mode 100644 drivers/net/bna/bnad.h create mode 100644 drivers/net/bna/bnad_ethtool.c create mode 100644 drivers/net/bna/cna.h create mode 100644 drivers/net/bna/cna_fwimg.c (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index 43c9efcd8e10..93198c1980a9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1387,6 +1387,13 @@ L: linux-scsi@vger.kernel.org S: Supported F: drivers/scsi/bfa/ +BROCADE BNA 10 GIGABIT ETHERNET DRIVER +M: Rasesh Mody +M: Debashis Dutt +L: netdev@vger.kernel.org +S: Supported +F: drivers/net/bna/ + BSG (block layer generic sg v4 driver) M: FUJITA Tomonori L: linux-scsi@vger.kernel.org diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 85485287192a..53c4810b119e 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2869,6 +2869,20 @@ config QLGE To compile this driver as a module, choose M here: the module will be called qlge. +config BNA + tristate "Brocade 1010/1020 10Gb Ethernet Driver support" + depends on PCI + ---help--- + This driver supports Brocade 1010/1020 10Gb CEE capable Ethernet + cards. + To compile this driver as a module, choose M here: the module + will be called bna. + + For general information and support, go to the Brocade support + website at: + + + source "drivers/net/sfc/Kconfig" source "drivers/net/benet/Kconfig" diff --git a/drivers/net/Makefile b/drivers/net/Makefile index f34b3baf7ee6..18a277709a2a 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_ENIC) += enic/ obj-$(CONFIG_JME) += jme.o obj-$(CONFIG_BE2NET) += benet/ obj-$(CONFIG_VMXNET3) += vmxnet3/ +obj-$(CONFIG_BNA) += bna/ gianfar_driver-objs := gianfar.o \ gianfar_ethtool.o \ diff --git a/drivers/net/bna/Makefile b/drivers/net/bna/Makefile new file mode 100644 index 000000000000..a5d604de7fea --- /dev/null +++ b/drivers/net/bna/Makefile @@ -0,0 +1,11 @@ +# +# Copyright (c) 2005-2010 Brocade Communications Systems, Inc. +# All rights reserved. +# + +obj-$(CONFIG_BNA) += bna.o + +bna-objs := bnad.o bnad_ethtool.o bna_ctrl.o bna_txrx.o +bna-objs += bfa_ioc.o bfa_ioc_ct.o bfa_cee.o cna_fwimg.o + +EXTRA_CFLAGS := -Idrivers/net/bna diff --git a/drivers/net/bna/bfa_cee.c b/drivers/net/bna/bfa_cee.c new file mode 100644 index 000000000000..1545fc9720f8 --- /dev/null +++ b/drivers/net/bna/bfa_cee.c @@ -0,0 +1,407 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +#include "bfa_defs_cna.h" +#include "cna.h" +#include "bfa_cee.h" +#include "bfi_cna.h" +#include "bfa_ioc.h" + +#define bfa_ioc_portid(__ioc) ((__ioc)->port_id) +#define bfa_lpuid(__arg) bfa_ioc_portid(&(__arg)->ioc) + +static void bfa_cee_format_lldp_cfg(struct bfa_cee_lldp_cfg *lldp_cfg); +static void bfa_cee_format_cee_cfg(void *buffer); + +static void +bfa_cee_format_cee_cfg(void *buffer) +{ + struct bfa_cee_attr *cee_cfg = buffer; + bfa_cee_format_lldp_cfg(&cee_cfg->lldp_remote); +} + +static void +bfa_cee_stats_swap(struct bfa_cee_stats *stats) +{ + u32 *buffer = (u32 *)stats; + int i; + + for (i = 0; i < (sizeof(struct bfa_cee_stats) / sizeof(u32)); + i++) { + buffer[i] = ntohl(buffer[i]); + } +} + +static void +bfa_cee_format_lldp_cfg(struct bfa_cee_lldp_cfg *lldp_cfg) +{ + lldp_cfg->time_to_live = + ntohs(lldp_cfg->time_to_live); + lldp_cfg->enabled_system_cap = + ntohs(lldp_cfg->enabled_system_cap); +} + +/** + * bfa_cee_attr_meminfo() + * + * @brief Returns the size of the DMA memory needed by CEE attributes + * + * @param[in] void + * + * @return Size of DMA region + */ +static u32 +bfa_cee_attr_meminfo(void) +{ + return roundup(sizeof(struct bfa_cee_attr), BFA_DMA_ALIGN_SZ); +} +/** + * bfa_cee_stats_meminfo() + * + * @brief Returns the size of the DMA memory needed by CEE stats + * + * @param[in] void + * + * @return Size of DMA region + */ +static u32 +bfa_cee_stats_meminfo(void) +{ + return roundup(sizeof(struct bfa_cee_stats), BFA_DMA_ALIGN_SZ); +} + +/** + * bfa_cee_get_attr_isr() + * + * @brief CEE ISR for get-attributes responses from f/w + * + * @param[in] cee - Pointer to the CEE module + * status - Return status from the f/w + * + * @return void + */ +static void +bfa_cee_get_attr_isr(struct bfa_cee *cee, enum bfa_status status) +{ + cee->get_attr_status = status; + if (status == BFA_STATUS_OK) { + memcpy(cee->attr, cee->attr_dma.kva, + sizeof(struct bfa_cee_attr)); + bfa_cee_format_cee_cfg(cee->attr); + } + cee->get_attr_pending = false; + if (cee->cbfn.get_attr_cbfn) + cee->cbfn.get_attr_cbfn(cee->cbfn.get_attr_cbarg, status); +} + +/** + * bfa_cee_get_attr_isr() + * + * @brief CEE ISR for get-stats responses from f/w + * + * @param[in] cee - Pointer to the CEE module + * status - Return status from the f/w + * + * @return void + */ +static void +bfa_cee_get_stats_isr(struct bfa_cee *cee, enum bfa_status status) +{ + cee->get_stats_status = status; + if (status == BFA_STATUS_OK) { + memcpy(cee->stats, cee->stats_dma.kva, + sizeof(struct bfa_cee_stats)); + bfa_cee_stats_swap(cee->stats); + } + cee->get_stats_pending = false; + if (cee->cbfn.get_stats_cbfn) + cee->cbfn.get_stats_cbfn(cee->cbfn.get_stats_cbarg, status); +} + +/** + * bfa_cee_get_attr_isr() + * + * @brief CEE ISR for reset-stats responses from f/w + * + * @param[in] cee - Pointer to the CEE module + * status - Return status from the f/w + * + * @return void + */ +static void +bfa_cee_reset_stats_isr(struct bfa_cee *cee, enum bfa_status status) +{ + cee->reset_stats_status = status; + cee->reset_stats_pending = false; + if (cee->cbfn.reset_stats_cbfn) + cee->cbfn.reset_stats_cbfn(cee->cbfn.reset_stats_cbarg, status); +} +/** + * bfa_cee_meminfo() + * + * @brief Returns the size of the DMA memory needed by CEE module + * + * @param[in] void + * + * @return Size of DMA region + */ +u32 +bfa_cee_meminfo(void) +{ + return bfa_cee_attr_meminfo() + bfa_cee_stats_meminfo(); +} + +/** + * bfa_cee_mem_claim() + * + * @brief Initialized CEE DMA Memory + * + * @param[in] cee CEE module pointer + * dma_kva Kernel Virtual Address of CEE DMA Memory + * dma_pa Physical Address of CEE DMA Memory + * + * @return void + */ +void +bfa_cee_mem_claim(struct bfa_cee *cee, u8 *dma_kva, u64 dma_pa) +{ + cee->attr_dma.kva = dma_kva; + cee->attr_dma.pa = dma_pa; + cee->stats_dma.kva = dma_kva + bfa_cee_attr_meminfo(); + cee->stats_dma.pa = dma_pa + bfa_cee_attr_meminfo(); + cee->attr = (struct bfa_cee_attr *) dma_kva; + cee->stats = (struct bfa_cee_stats *) + (dma_kva + bfa_cee_attr_meminfo()); +} + +/** + * bfa_cee_get_attr() + * + * @brief + * Send the request to the f/w to fetch CEE attributes. + * + * @param[in] Pointer to the CEE module data structure. + * + * @return Status + */ + +enum bfa_status +bfa_cee_get_attr(struct bfa_cee *cee, struct bfa_cee_attr *attr, + bfa_cee_get_attr_cbfn_t cbfn, void *cbarg) +{ + struct bfi_cee_get_req *cmd; + + BUG_ON(!((cee != NULL) && (cee->ioc != NULL))); + if (!bfa_ioc_is_operational(cee->ioc)) + return BFA_STATUS_IOC_FAILURE; + if (cee->get_attr_pending == true) + return BFA_STATUS_DEVBUSY; + cee->get_attr_pending = true; + cmd = (struct bfi_cee_get_req *) cee->get_cfg_mb.msg; + cee->attr = attr; + cee->cbfn.get_attr_cbfn = cbfn; + cee->cbfn.get_attr_cbarg = cbarg; + bfi_h2i_set(cmd->mh, BFI_MC_CEE, BFI_CEE_H2I_GET_CFG_REQ, + bfa_ioc_portid(cee->ioc)); + bfa_dma_be_addr_set(cmd->dma_addr, cee->attr_dma.pa); + bfa_ioc_mbox_queue(cee->ioc, &cee->get_cfg_mb); + + return BFA_STATUS_OK; +} + +/** + * bfa_cee_get_stats() + * + * @brief + * Send the request to the f/w to fetch CEE statistics. + * + * @param[in] Pointer to the CEE module data structure. + * + * @return Status + */ + +enum bfa_status +bfa_cee_get_stats(struct bfa_cee *cee, struct bfa_cee_stats *stats, + bfa_cee_get_stats_cbfn_t cbfn, void *cbarg) +{ + struct bfi_cee_get_req *cmd; + + BUG_ON(!((cee != NULL) && (cee->ioc != NULL))); + + if (!bfa_ioc_is_operational(cee->ioc)) + return BFA_STATUS_IOC_FAILURE; + if (cee->get_stats_pending == true) + return BFA_STATUS_DEVBUSY; + cee->get_stats_pending = true; + cmd = (struct bfi_cee_get_req *) cee->get_stats_mb.msg; + cee->stats = stats; + cee->cbfn.get_stats_cbfn = cbfn; + cee->cbfn.get_stats_cbarg = cbarg; + bfi_h2i_set(cmd->mh, BFI_MC_CEE, BFI_CEE_H2I_GET_STATS_REQ, + bfa_ioc_portid(cee->ioc)); + bfa_dma_be_addr_set(cmd->dma_addr, cee->stats_dma.pa); + bfa_ioc_mbox_queue(cee->ioc, &cee->get_stats_mb); + + return BFA_STATUS_OK; +} + +/** + * bfa_cee_reset_stats() + * + * @brief Clears CEE Stats in the f/w. + * + * @param[in] Pointer to the CEE module data structure. + * + * @return Status + */ + +enum bfa_status +bfa_cee_reset_stats(struct bfa_cee *cee, bfa_cee_reset_stats_cbfn_t cbfn, + void *cbarg) +{ + struct bfi_cee_reset_stats *cmd; + + BUG_ON(!((cee != NULL) && (cee->ioc != NULL))); + if (!bfa_ioc_is_operational(cee->ioc)) + return BFA_STATUS_IOC_FAILURE; + if (cee->reset_stats_pending == true) + return BFA_STATUS_DEVBUSY; + cee->reset_stats_pending = true; + cmd = (struct bfi_cee_reset_stats *) cee->reset_stats_mb.msg; + cee->cbfn.reset_stats_cbfn = cbfn; + cee->cbfn.reset_stats_cbarg = cbarg; + bfi_h2i_set(cmd->mh, BFI_MC_CEE, BFI_CEE_H2I_RESET_STATS, + bfa_ioc_portid(cee->ioc)); + bfa_ioc_mbox_queue(cee->ioc, &cee->reset_stats_mb); + return BFA_STATUS_OK; +} + +/** + * bfa_cee_isrs() + * + * @brief Handles Mail-box interrupts for CEE module. + * + * @param[in] Pointer to the CEE module data structure. + * + * @return void + */ + +void +bfa_cee_isr(void *cbarg, struct bfi_mbmsg *m) +{ + union bfi_cee_i2h_msg_u *msg; + struct bfi_cee_get_rsp *get_rsp; + struct bfa_cee *cee = (struct bfa_cee *) cbarg; + msg = (union bfi_cee_i2h_msg_u *) m; + get_rsp = (struct bfi_cee_get_rsp *) m; + switch (msg->mh.msg_id) { + case BFI_CEE_I2H_GET_CFG_RSP: + bfa_cee_get_attr_isr(cee, get_rsp->cmd_status); + break; + case BFI_CEE_I2H_GET_STATS_RSP: + bfa_cee_get_stats_isr(cee, get_rsp->cmd_status); + break; + case BFI_CEE_I2H_RESET_STATS_RSP: + bfa_cee_reset_stats_isr(cee, get_rsp->cmd_status); + break; + default: + BUG_ON(1); + } +} + +/** + * bfa_cee_hbfail() + * + * @brief CEE module heart-beat failure handler. + * + * @param[in] Pointer to the CEE module data structure. + * + * @return void + */ + +void +bfa_cee_hbfail(void *arg) +{ + struct bfa_cee *cee; + cee = (struct bfa_cee *) arg; + + if (cee->get_attr_pending == true) { + cee->get_attr_status = BFA_STATUS_FAILED; + cee->get_attr_pending = false; + if (cee->cbfn.get_attr_cbfn) { + cee->cbfn.get_attr_cbfn(cee->cbfn.get_attr_cbarg, + BFA_STATUS_FAILED); + } + } + if (cee->get_stats_pending == true) { + cee->get_stats_status = BFA_STATUS_FAILED; + cee->get_stats_pending = false; + if (cee->cbfn.get_stats_cbfn) { + cee->cbfn.get_stats_cbfn(cee->cbfn.get_stats_cbarg, + BFA_STATUS_FAILED); + } + } + if (cee->reset_stats_pending == true) { + cee->reset_stats_status = BFA_STATUS_FAILED; + cee->reset_stats_pending = false; + if (cee->cbfn.reset_stats_cbfn) { + cee->cbfn.reset_stats_cbfn(cee->cbfn.reset_stats_cbarg, + BFA_STATUS_FAILED); + } + } +} + +/** + * bfa_cee_attach() + * + * @brief CEE module-attach API + * + * @param[in] cee - Pointer to the CEE module data structure + * ioc - Pointer to the ioc module data structure + * dev - Pointer to the device driver module data structure + * The device driver specific mbox ISR functions have + * this pointer as one of the parameters. + * + * @return void + */ +void +bfa_cee_attach(struct bfa_cee *cee, struct bfa_ioc *ioc, + void *dev) +{ + BUG_ON(!(cee != NULL)); + cee->dev = dev; + cee->ioc = ioc; + + bfa_ioc_mbox_regisr(cee->ioc, BFI_MC_CEE, bfa_cee_isr, cee); + bfa_ioc_hbfail_init(&cee->hbfail, bfa_cee_hbfail, cee); + bfa_ioc_hbfail_register(cee->ioc, &cee->hbfail); +} + +/** + * bfa_cee_detach() + * + * @brief CEE module-detach API + * + * @param[in] cee - Pointer to the CEE module data structure + * + * @return void + */ +void +bfa_cee_detach(struct bfa_cee *cee) +{ +} diff --git a/drivers/net/bna/bfa_cee.h b/drivers/net/bna/bfa_cee.h new file mode 100644 index 000000000000..1208cadeceed --- /dev/null +++ b/drivers/net/bna/bfa_cee.h @@ -0,0 +1,72 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +#ifndef __BFA_CEE_H__ +#define __BFA_CEE_H__ + +#include "bfa_defs_cna.h" +#include "bfa_ioc.h" + +typedef void (*bfa_cee_get_attr_cbfn_t) (void *dev, enum bfa_status status); +typedef void (*bfa_cee_get_stats_cbfn_t) (void *dev, enum bfa_status status); +typedef void (*bfa_cee_reset_stats_cbfn_t) (void *dev, enum bfa_status status); +typedef void (*bfa_cee_hbfail_cbfn_t) (void *dev, enum bfa_status status); + +struct bfa_cee_cbfn { + bfa_cee_get_attr_cbfn_t get_attr_cbfn; + void *get_attr_cbarg; + bfa_cee_get_stats_cbfn_t get_stats_cbfn; + void *get_stats_cbarg; + bfa_cee_reset_stats_cbfn_t reset_stats_cbfn; + void *reset_stats_cbarg; +}; + +struct bfa_cee { + void *dev; + bool get_attr_pending; + bool get_stats_pending; + bool reset_stats_pending; + enum bfa_status get_attr_status; + enum bfa_status get_stats_status; + enum bfa_status reset_stats_status; + struct bfa_cee_cbfn cbfn; + struct bfa_ioc_hbfail_notify hbfail; + struct bfa_cee_attr *attr; + struct bfa_cee_stats *stats; + struct bfa_dma attr_dma; + struct bfa_dma stats_dma; + struct bfa_ioc *ioc; + struct bfa_mbox_cmd get_cfg_mb; + struct bfa_mbox_cmd get_stats_mb; + struct bfa_mbox_cmd reset_stats_mb; +}; + +u32 bfa_cee_meminfo(void); +void bfa_cee_mem_claim(struct bfa_cee *cee, u8 *dma_kva, + u64 dma_pa); +void bfa_cee_attach(struct bfa_cee *cee, struct bfa_ioc *ioc, void *dev); +void bfa_cee_detach(struct bfa_cee *cee); +enum bfa_status bfa_cee_get_attr(struct bfa_cee *cee, + struct bfa_cee_attr *attr, bfa_cee_get_attr_cbfn_t cbfn, void *cbarg); +enum bfa_status bfa_cee_get_stats(struct bfa_cee *cee, + struct bfa_cee_stats *stats, bfa_cee_get_stats_cbfn_t cbfn, + void *cbarg); +enum bfa_status bfa_cee_reset_stats(struct bfa_cee *cee, + bfa_cee_reset_stats_cbfn_t cbfn, void *cbarg); + +#endif /* __BFA_CEE_H__ */ diff --git a/drivers/net/bna/bfa_defs.h b/drivers/net/bna/bfa_defs.h new file mode 100644 index 000000000000..29c1b8de2c2d --- /dev/null +++ b/drivers/net/bna/bfa_defs.h @@ -0,0 +1,243 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +#ifndef __BFA_DEFS_H__ +#define __BFA_DEFS_H__ + +#include "cna.h" +#include "bfa_defs_status.h" +#include "bfa_defs_mfg_comm.h" + +#define BFA_STRING_32 32 +#define BFA_VERSION_LEN 64 + +/** + * ---------------------- adapter definitions ------------ + */ + +/** + * BFA adapter level attributes. + */ +enum { + BFA_ADAPTER_SERIAL_NUM_LEN = STRSZ(BFA_MFG_SERIALNUM_SIZE), + /* + *!< adapter serial num length + */ + BFA_ADAPTER_MODEL_NAME_LEN = 16, /*!< model name length */ + BFA_ADAPTER_MODEL_DESCR_LEN = 128, /*!< model description length */ + BFA_ADAPTER_MFG_NAME_LEN = 8, /*!< manufacturer name length */ + BFA_ADAPTER_SYM_NAME_LEN = 64, /*!< adapter symbolic name length */ + BFA_ADAPTER_OS_TYPE_LEN = 64, /*!< adapter os type length */ +}; + +struct bfa_adapter_attr { + char manufacturer[BFA_ADAPTER_MFG_NAME_LEN]; + char serial_num[BFA_ADAPTER_SERIAL_NUM_LEN]; + u32 card_type; + char model[BFA_ADAPTER_MODEL_NAME_LEN]; + char model_descr[BFA_ADAPTER_MODEL_DESCR_LEN]; + u64 pwwn; + char node_symname[FC_SYMNAME_MAX]; + char hw_ver[BFA_VERSION_LEN]; + char fw_ver[BFA_VERSION_LEN]; + char optrom_ver[BFA_VERSION_LEN]; + char os_type[BFA_ADAPTER_OS_TYPE_LEN]; + struct bfa_mfg_vpd vpd; + struct mac mac; + + u8 nports; + u8 max_speed; + u8 prototype; + char asic_rev; + + u8 pcie_gen; + u8 pcie_lanes_orig; + u8 pcie_lanes; + u8 cna_capable; + + u8 is_mezz; + u8 trunk_capable; +}; + +/** + * ---------------------- IOC definitions ------------ + */ + +enum { + BFA_IOC_DRIVER_LEN = 16, + BFA_IOC_CHIP_REV_LEN = 8, +}; + +/** + * Driver and firmware versions. + */ +struct bfa_ioc_driver_attr { + char driver[BFA_IOC_DRIVER_LEN]; /*!< driver name */ + char driver_ver[BFA_VERSION_LEN]; /*!< driver version */ + char fw_ver[BFA_VERSION_LEN]; /*!< firmware version */ + char bios_ver[BFA_VERSION_LEN]; /*!< bios version */ + char efi_ver[BFA_VERSION_LEN]; /*!< EFI version */ + char ob_ver[BFA_VERSION_LEN]; /*!< openboot version */ +}; + +/** + * IOC PCI device attributes + */ +struct bfa_ioc_pci_attr { + u16 vendor_id; /*!< PCI vendor ID */ + u16 device_id; /*!< PCI device ID */ + u16 ssid; /*!< subsystem ID */ + u16 ssvid; /*!< subsystem vendor ID */ + u32 pcifn; /*!< PCI device function */ + u32 rsvd; /* padding */ + char chip_rev[BFA_IOC_CHIP_REV_LEN]; /*!< chip revision */ +}; + +/** + * IOC states + */ +enum bfa_ioc_state { + BFA_IOC_RESET = 1, /*!< IOC is in reset state */ + BFA_IOC_SEMWAIT = 2, /*!< Waiting for IOC h/w semaphore */ + BFA_IOC_HWINIT = 3, /*!< IOC h/w is being initialized */ + BFA_IOC_GETATTR = 4, /*!< IOC is being configured */ + BFA_IOC_OPERATIONAL = 5, /*!< IOC is operational */ + BFA_IOC_INITFAIL = 6, /*!< IOC hardware failure */ + BFA_IOC_HBFAIL = 7, /*!< IOC heart-beat failure */ + BFA_IOC_DISABLING = 8, /*!< IOC is being disabled */ + BFA_IOC_DISABLED = 9, /*!< IOC is disabled */ + BFA_IOC_FWMISMATCH = 10, /*!< IOC f/w different from drivers */ +}; + +/** + * IOC firmware stats + */ +struct bfa_fw_ioc_stats { + u32 enable_reqs; + u32 disable_reqs; + u32 get_attr_reqs; + u32 dbg_sync; + u32 dbg_dump; + u32 unknown_reqs; +}; + +/** + * IOC driver stats + */ +struct bfa_ioc_drv_stats { + u32 ioc_isrs; + u32 ioc_enables; + u32 ioc_disables; + u32 ioc_hbfails; + u32 ioc_boots; + u32 stats_tmos; + u32 hb_count; + u32 disable_reqs; + u32 enable_reqs; + u32 disable_replies; + u32 enable_replies; +}; + +/** + * IOC statistics + */ +struct bfa_ioc_stats { + struct bfa_ioc_drv_stats drv_stats; /*!< driver IOC stats */ + struct bfa_fw_ioc_stats fw_stats; /*!< firmware IOC stats */ +}; + +enum bfa_ioc_type { + BFA_IOC_TYPE_FC = 1, + BFA_IOC_TYPE_FCoE = 2, + BFA_IOC_TYPE_LL = 3, +}; + +/** + * IOC attributes returned in queries + */ +struct bfa_ioc_attr { + enum bfa_ioc_type ioc_type; + enum bfa_ioc_state state; /*!< IOC state */ + struct bfa_adapter_attr adapter_attr; /*!< HBA attributes */ + struct bfa_ioc_driver_attr driver_attr; /*!< driver attr */ + struct bfa_ioc_pci_attr pci_attr; + u8 port_id; /*!< port number */ + u8 rsvd[7]; /*!< 64bit align */ +}; + +/** + * ---------------------- mfg definitions ------------ + */ + +/** + * Checksum size + */ +#define BFA_MFG_CHKSUM_SIZE 16 + +#define BFA_MFG_PARTNUM_SIZE 14 +#define BFA_MFG_SUPPLIER_ID_SIZE 10 +#define BFA_MFG_SUPPLIER_PARTNUM_SIZE 20 +#define BFA_MFG_SUPPLIER_SERIALNUM_SIZE 20 +#define BFA_MFG_SUPPLIER_REVISION_SIZE 4 + +#pragma pack(1) + +/** + * @brief BFA adapter manufacturing block definition. + * + * All numerical fields are in big-endian format. + */ +struct bfa_mfg_block { + u8 version; /*!< manufacturing block version */ + u8 mfg_sig[3]; /*!< characters 'M', 'F', 'G' */ + u16 mfgsize; /*!< mfg block size */ + u16 u16_chksum; /*!< old u16 checksum */ + char brcd_serialnum[STRSZ(BFA_MFG_SERIALNUM_SIZE)]; + char brcd_partnum[STRSZ(BFA_MFG_PARTNUM_SIZE)]; + u8 mfg_day; /*!< manufacturing day */ + u8 mfg_month; /*!< manufacturing month */ + u16 mfg_year; /*!< manufacturing year */ + u64 mfg_wwn; /*!< wwn base for this adapter */ + u8 num_wwn; /*!< number of wwns assigned */ + u8 mfg_speeds; /*!< speeds allowed for this adapter */ + u8 rsv[2]; + char supplier_id[STRSZ(BFA_MFG_SUPPLIER_ID_SIZE)]; + char supplier_partnum[STRSZ(BFA_MFG_SUPPLIER_PARTNUM_SIZE)]; + char + supplier_serialnum[STRSZ(BFA_MFG_SUPPLIER_SERIALNUM_SIZE)]; + char + supplier_revision[STRSZ(BFA_MFG_SUPPLIER_REVISION_SIZE)]; + mac_t mfg_mac; /*!< mac address */ + u8 num_mac; /*!< number of mac addresses */ + u8 rsv2; + u32 mfg_type; /*!< card type */ + u8 rsv3[108]; + u8 md5_chksum[BFA_MFG_CHKSUM_SIZE]; /*!< md5 checksum */ +}; + +#pragma pack() + +/** + * ---------------------- pci definitions ------------ + */ + +#define bfa_asic_id_ct(devid) \ + ((devid) == PCI_DEVICE_ID_BROCADE_CT || \ + (devid) == PCI_DEVICE_ID_BROCADE_CT_FC) + +#endif /* __BFA_DEFS_H__ */ diff --git a/drivers/net/bna/bfa_defs_cna.h b/drivers/net/bna/bfa_defs_cna.h new file mode 100644 index 000000000000..7e0a9187bdd5 --- /dev/null +++ b/drivers/net/bna/bfa_defs_cna.h @@ -0,0 +1,223 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +#ifndef __BFA_DEFS_CNA_H__ +#define __BFA_DEFS_CNA_H__ + +#include "bfa_defs.h" + +/** + * @brief + * FC physical port statistics. + */ +struct bfa_port_fc_stats { + u64 secs_reset; /*!< Seconds since stats is reset */ + u64 tx_frames; /*!< Tx frames */ + u64 tx_words; /*!< Tx words */ + u64 tx_lip; /*!< Tx LIP */ + u64 tx_nos; /*!< Tx NOS */ + u64 tx_ols; /*!< Tx OLS */ + u64 tx_lr; /*!< Tx LR */ + u64 tx_lrr; /*!< Tx LRR */ + u64 rx_frames; /*!< Rx frames */ + u64 rx_words; /*!< Rx words */ + u64 lip_count; /*!< Rx LIP */ + u64 nos_count; /*!< Rx NOS */ + u64 ols_count; /*!< Rx OLS */ + u64 lr_count; /*!< Rx LR */ + u64 lrr_count; /*!< Rx LRR */ + u64 invalid_crcs; /*!< Rx CRC err frames */ + u64 invalid_crc_gd_eof; /*!< Rx CRC err good EOF frames */ + u64 undersized_frm; /*!< Rx undersized frames */ + u64 oversized_frm; /*!< Rx oversized frames */ + u64 bad_eof_frm; /*!< Rx frames with bad EOF */ + u64 error_frames; /*!< Errored frames */ + u64 dropped_frames; /*!< Dropped frames */ + u64 link_failures; /*!< Link Failure (LF) count */ + u64 loss_of_syncs; /*!< Loss of sync count */ + u64 loss_of_signals; /*!< Loss of signal count */ + u64 primseq_errs; /*!< Primitive sequence protocol err. */ + u64 bad_os_count; /*!< Invalid ordered sets */ + u64 err_enc_out; /*!< Encoding err nonframe_8b10b */ + u64 err_enc; /*!< Encoding err frame_8b10b */ +}; + +/** + * @brief + * Eth Physical Port statistics. + */ +struct bfa_port_eth_stats { + u64 secs_reset; /*!< Seconds since stats is reset */ + u64 frame_64; /*!< Frames 64 bytes */ + u64 frame_65_127; /*!< Frames 65-127 bytes */ + u64 frame_128_255; /*!< Frames 128-255 bytes */ + u64 frame_256_511; /*!< Frames 256-511 bytes */ + u64 frame_512_1023; /*!< Frames 512-1023 bytes */ + u64 frame_1024_1518; /*!< Frames 1024-1518 bytes */ + u64 frame_1519_1522; /*!< Frames 1519-1522 bytes */ + u64 tx_bytes; /*!< Tx bytes */ + u64 tx_packets; /*!< Tx packets */ + u64 tx_mcast_packets; /*!< Tx multicast packets */ + u64 tx_bcast_packets; /*!< Tx broadcast packets */ + u64 tx_control_frame; /*!< Tx control frame */ + u64 tx_drop; /*!< Tx drops */ + u64 tx_jabber; /*!< Tx jabber */ + u64 tx_fcs_error; /*!< Tx FCS errors */ + u64 tx_fragments; /*!< Tx fragments */ + u64 rx_bytes; /*!< Rx bytes */ + u64 rx_packets; /*!< Rx packets */ + u64 rx_mcast_packets; /*!< Rx multicast packets */ + u64 rx_bcast_packets; /*!< Rx broadcast packets */ + u64 rx_control_frames; /*!< Rx control frames */ + u64 rx_unknown_opcode; /*!< Rx unknown opcode */ + u64 rx_drop; /*!< Rx drops */ + u64 rx_jabber; /*!< Rx jabber */ + u64 rx_fcs_error; /*!< Rx FCS errors */ + u64 rx_alignment_error; /*!< Rx alignment errors */ + u64 rx_frame_length_error; /*!< Rx frame len errors */ + u64 rx_code_error; /*!< Rx code errors */ + u64 rx_fragments; /*!< Rx fragments */ + u64 rx_pause; /*!< Rx pause */ + u64 rx_zero_pause; /*!< Rx zero pause */ + u64 tx_pause; /*!< Tx pause */ + u64 tx_zero_pause; /*!< Tx zero pause */ + u64 rx_fcoe_pause; /*!< Rx FCoE pause */ + u64 rx_fcoe_zero_pause; /*!< Rx FCoE zero pause */ + u64 tx_fcoe_pause; /*!< Tx FCoE pause */ + u64 tx_fcoe_zero_pause; /*!< Tx FCoE zero pause */ +}; + +/** + * @brief + * Port statistics. + */ +union bfa_port_stats_u { + struct bfa_port_fc_stats fc; + struct bfa_port_eth_stats eth; +}; + +#pragma pack(1) + +#define BFA_CEE_LLDP_MAX_STRING_LEN (128) +#define BFA_CEE_DCBX_MAX_PRIORITY (8) +#define BFA_CEE_DCBX_MAX_PGID (8) + +#define BFA_CEE_LLDP_SYS_CAP_OTHER 0x0001 +#define BFA_CEE_LLDP_SYS_CAP_REPEATER 0x0002 +#define BFA_CEE_LLDP_SYS_CAP_MAC_BRIDGE 0x0004 +#define BFA_CEE_LLDP_SYS_CAP_WLAN_AP 0x0008 +#define BFA_CEE_LLDP_SYS_CAP_ROUTER 0x0010 +#define BFA_CEE_LLDP_SYS_CAP_TELEPHONE 0x0020 +#define BFA_CEE_LLDP_SYS_CAP_DOCSIS_CD 0x0040 +#define BFA_CEE_LLDP_SYS_CAP_STATION 0x0080 +#define BFA_CEE_LLDP_SYS_CAP_CVLAN 0x0100 +#define BFA_CEE_LLDP_SYS_CAP_SVLAN 0x0200 +#define BFA_CEE_LLDP_SYS_CAP_TPMR 0x0400 + +/* LLDP string type */ +struct bfa_cee_lldp_str { + u8 sub_type; + u8 len; + u8 rsvd[2]; + u8 value[BFA_CEE_LLDP_MAX_STRING_LEN]; +}; + +/* LLDP paramters */ +struct bfa_cee_lldp_cfg { + struct bfa_cee_lldp_str chassis_id; + struct bfa_cee_lldp_str port_id; + struct bfa_cee_lldp_str port_desc; + struct bfa_cee_lldp_str sys_name; + struct bfa_cee_lldp_str sys_desc; + struct bfa_cee_lldp_str mgmt_addr; + u16 time_to_live; + u16 enabled_system_cap; +}; + +enum bfa_cee_dcbx_version { + DCBX_PROTOCOL_PRECEE = 1, + DCBX_PROTOCOL_CEE = 2, +}; + +enum bfa_cee_lls { + /* LLS is down because the TLV not sent by the peer */ + CEE_LLS_DOWN_NO_TLV = 0, + /* LLS is down as advertised by the peer */ + CEE_LLS_DOWN = 1, + CEE_LLS_UP = 2, +}; + +/* CEE/DCBX parameters */ +struct bfa_cee_dcbx_cfg { + u8 pgid[BFA_CEE_DCBX_MAX_PRIORITY]; + u8 pg_percentage[BFA_CEE_DCBX_MAX_PGID]; + u8 pfc_primap; /* bitmap of priorties with PFC enabled */ + u8 fcoe_primap; /* bitmap of priorities used for FcoE traffic */ + u8 iscsi_primap; /* bitmap of priorities used for iSCSI traffic */ + u8 dcbx_version; /* operating version:CEE or preCEE */ + u8 lls_fcoe; /* FCoE Logical Link Status */ + u8 lls_lan; /* LAN Logical Link Status */ + u8 rsvd[2]; +}; + +/* CEE status */ +/* Making this to tri-state for the benefit of port list command */ +enum bfa_cee_status { + CEE_UP = 0, + CEE_PHY_UP = 1, + CEE_LOOPBACK = 2, + CEE_PHY_DOWN = 3, +}; + +/* CEE Query */ +struct bfa_cee_attr { + u8 cee_status; + u8 error_reason; + struct bfa_cee_lldp_cfg lldp_remote; + struct bfa_cee_dcbx_cfg dcbx_remote; + mac_t src_mac; + u8 link_speed; + u8 nw_priority; + u8 filler[2]; +}; + +/* LLDP/DCBX/CEE Statistics */ +struct bfa_cee_stats { + u32 lldp_tx_frames; /*!< LLDP Tx Frames */ + u32 lldp_rx_frames; /*!< LLDP Rx Frames */ + u32 lldp_rx_frames_invalid; /*!< LLDP Rx Frames invalid */ + u32 lldp_rx_frames_new; /*!< LLDP Rx Frames new */ + u32 lldp_tlvs_unrecognized; /*!< LLDP Rx unrecognized TLVs */ + u32 lldp_rx_shutdown_tlvs; /*!< LLDP Rx shutdown TLVs */ + u32 lldp_info_aged_out; /*!< LLDP remote info aged out */ + u32 dcbx_phylink_ups; /*!< DCBX phy link ups */ + u32 dcbx_phylink_downs; /*!< DCBX phy link downs */ + u32 dcbx_rx_tlvs; /*!< DCBX Rx TLVs */ + u32 dcbx_rx_tlvs_invalid; /*!< DCBX Rx TLVs invalid */ + u32 dcbx_control_tlv_error; /*!< DCBX control TLV errors */ + u32 dcbx_feature_tlv_error; /*!< DCBX feature TLV errors */ + u32 dcbx_cee_cfg_new; /*!< DCBX new CEE cfg rcvd */ + u32 cee_status_down; /*!< CEE status down */ + u32 cee_status_up; /*!< CEE status up */ + u32 cee_hw_cfg_changed; /*!< CEE hw cfg changed */ + u32 cee_rx_invalid_cfg; /*!< CEE invalid cfg */ +}; + +#pragma pack() + +#endif /* __BFA_DEFS_CNA_H__ */ diff --git a/drivers/net/bna/bfa_defs_mfg_comm.h b/drivers/net/bna/bfa_defs_mfg_comm.h new file mode 100644 index 000000000000..987978fcb3fe --- /dev/null +++ b/drivers/net/bna/bfa_defs_mfg_comm.h @@ -0,0 +1,244 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ +#ifndef __BFA_DEFS_MFG_COMM_H__ +#define __BFA_DEFS_MFG_COMM_H__ + +#include "cna.h" + +/** + * Manufacturing block version + */ +#define BFA_MFG_VERSION 2 +#define BFA_MFG_VERSION_UNINIT 0xFF + +/** + * Manufacturing block encrypted version + */ +#define BFA_MFG_ENC_VER 2 + +/** + * Manufacturing block version 1 length + */ +#define BFA_MFG_VER1_LEN 128 + +/** + * Manufacturing block header length + */ +#define BFA_MFG_HDR_LEN 4 + +#define BFA_MFG_SERIALNUM_SIZE 11 +#define STRSZ(_n) (((_n) + 4) & ~3) + +/** + * Manufacturing card type + */ +enum { + BFA_MFG_TYPE_CB_MAX = 825, /*!< Crossbow card type max */ + BFA_MFG_TYPE_FC8P2 = 825, /*!< 8G 2port FC card */ + BFA_MFG_TYPE_FC8P1 = 815, /*!< 8G 1port FC card */ + BFA_MFG_TYPE_FC4P2 = 425, /*!< 4G 2port FC card */ + BFA_MFG_TYPE_FC4P1 = 415, /*!< 4G 1port FC card */ + BFA_MFG_TYPE_CNA10P2 = 1020, /*!< 10G 2port CNA card */ + BFA_MFG_TYPE_CNA10P1 = 1010, /*!< 10G 1port CNA card */ + BFA_MFG_TYPE_JAYHAWK = 804, /*!< Jayhawk mezz card */ + BFA_MFG_TYPE_WANCHESE = 1007, /*!< Wanchese mezz card */ + BFA_MFG_TYPE_ASTRA = 807, /*!< Astra mezz card */ + BFA_MFG_TYPE_LIGHTNING_P0 = 902, /*!< Lightning mezz card - old */ + BFA_MFG_TYPE_LIGHTNING = 1741, /*!< Lightning mezz card */ + BFA_MFG_TYPE_INVALID = 0, /*!< Invalid card type */ +}; + +#pragma pack(1) + +/** + * Check if 1-port card + */ +#define bfa_mfg_is_1port(type) (( \ + (type) == BFA_MFG_TYPE_FC8P1 || \ + (type) == BFA_MFG_TYPE_FC4P1 || \ + (type) == BFA_MFG_TYPE_CNA10P1)) + +/** + * Check if Mezz card + */ +#define bfa_mfg_is_mezz(type) (( \ + (type) == BFA_MFG_TYPE_JAYHAWK || \ + (type) == BFA_MFG_TYPE_WANCHESE || \ + (type) == BFA_MFG_TYPE_ASTRA || \ + (type) == BFA_MFG_TYPE_LIGHTNING_P0 || \ + (type) == BFA_MFG_TYPE_LIGHTNING)) + +/** + * Check if card type valid + */ +#define bfa_mfg_is_card_type_valid(type) (( \ + (type) == BFA_MFG_TYPE_FC8P2 || \ + (type) == BFA_MFG_TYPE_FC8P1 || \ + (type) == BFA_MFG_TYPE_FC4P2 || \ + (type) == BFA_MFG_TYPE_FC4P1 || \ + (type) == BFA_MFG_TYPE_CNA10P2 || \ + (type) == BFA_MFG_TYPE_CNA10P1 || \ + bfa_mfg_is_mezz(type))) + +/** + * Check if the card having old wwn/mac handling + */ +#define bfa_mfg_is_old_wwn_mac_model(type) (( \ + (type) == BFA_MFG_TYPE_FC8P2 || \ + (type) == BFA_MFG_TYPE_FC8P1 || \ + (type) == BFA_MFG_TYPE_FC4P2 || \ + (type) == BFA_MFG_TYPE_FC4P1 || \ + (type) == BFA_MFG_TYPE_CNA10P2 || \ + (type) == BFA_MFG_TYPE_CNA10P1 || \ + (type) == BFA_MFG_TYPE_JAYHAWK || \ + (type) == BFA_MFG_TYPE_WANCHESE)) + +#define bfa_mfg_increment_wwn_mac(m, i) \ +do { \ + u32 t = ((m)[0] << 16) | ((m)[1] << 8) | (m)[2]; \ + t += (i); \ + (m)[0] = (t >> 16) & 0xFF; \ + (m)[1] = (t >> 8) & 0xFF; \ + (m)[2] = t & 0xFF; \ +} while (0) + +#define bfa_mfg_adapter_prop_init_flash(card_type, prop) \ +do { \ + switch ((card_type)) { \ + case BFA_MFG_TYPE_FC8P2: \ + case BFA_MFG_TYPE_JAYHAWK: \ + case BFA_MFG_TYPE_ASTRA: \ + (prop) = BFI_ADAPTER_SETP(NPORTS, 2) | \ + BFI_ADAPTER_SETP(SPEED, 8); \ + break; \ + case BFA_MFG_TYPE_FC8P1: \ + (prop) = BFI_ADAPTER_SETP(NPORTS, 1) | \ + BFI_ADAPTER_SETP(SPEED, 8); \ + break; \ + case BFA_MFG_TYPE_FC4P2: \ + (prop) = BFI_ADAPTER_SETP(NPORTS, 2) | \ + BFI_ADAPTER_SETP(SPEED, 4); \ + break; \ + case BFA_MFG_TYPE_FC4P1: \ + (prop) = BFI_ADAPTER_SETP(NPORTS, 1) | \ + BFI_ADAPTER_SETP(SPEED, 4); \ + break; \ + case BFA_MFG_TYPE_CNA10P2: \ + case BFA_MFG_TYPE_WANCHESE: \ + case BFA_MFG_TYPE_LIGHTNING_P0: \ + case BFA_MFG_TYPE_LIGHTNING: \ + (prop) = BFI_ADAPTER_SETP(NPORTS, 2); \ + (prop) |= BFI_ADAPTER_SETP(SPEED, 10); \ + break; \ + case BFA_MFG_TYPE_CNA10P1: \ + (prop) = BFI_ADAPTER_SETP(NPORTS, 1); \ + (prop) |= BFI_ADAPTER_SETP(SPEED, 10); \ + break; \ + default: \ + (prop) = BFI_ADAPTER_UNSUPP; \ + } \ +} while (0) + +enum { + CB_GPIO_TTV = (1), /*!< TTV debug capable cards */ + CB_GPIO_FC8P2 = (2), /*!< 8G 2port FC card */ + CB_GPIO_FC8P1 = (3), /*!< 8G 1port FC card */ + CB_GPIO_FC4P2 = (4), /*!< 4G 2port FC card */ + CB_GPIO_FC4P1 = (5), /*!< 4G 1port FC card */ + CB_GPIO_DFLY = (6), /*!< 8G 2port FC mezzanine card */ + CB_GPIO_PROTO = (1 << 7) /*!< 8G 2port FC prototypes */ +}; + +#define bfa_mfg_adapter_prop_init_gpio(gpio, card_type, prop) \ +do { \ + if ((gpio) & CB_GPIO_PROTO) { \ + (prop) |= BFI_ADAPTER_PROTO; \ + (gpio) &= ~CB_GPIO_PROTO; \ + } \ + switch ((gpio)) { \ + case CB_GPIO_TTV: \ + (prop) |= BFI_ADAPTER_TTV; \ + case CB_GPIO_DFLY: \ + case CB_GPIO_FC8P2: \ + (prop) |= BFI_ADAPTER_SETP(NPORTS, 2); \ + (prop) |= BFI_ADAPTER_SETP(SPEED, 8); \ + (card_type) = BFA_MFG_TYPE_FC8P2; \ + break; \ + case CB_GPIO_FC8P1: \ + (prop) |= BFI_ADAPTER_SETP(NPORTS, 1); \ + (prop) |= BFI_ADAPTER_SETP(SPEED, 8); \ + (card_type) = BFA_MFG_TYPE_FC8P1; \ + break; \ + case CB_GPIO_FC4P2: \ + (prop) |= BFI_ADAPTER_SETP(NPORTS, 2); \ + (prop) |= BFI_ADAPTER_SETP(SPEED, 4); \ + (card_type) = BFA_MFG_TYPE_FC4P2; \ + break; \ + case CB_GPIO_FC4P1: \ + (prop) |= BFI_ADAPTER_SETP(NPORTS, 1); \ + (prop) |= BFI_ADAPTER_SETP(SPEED, 4); \ + (card_type) = BFA_MFG_TYPE_FC4P1; \ + break; \ + default: \ + (prop) |= BFI_ADAPTER_UNSUPP; \ + (card_type) = BFA_MFG_TYPE_INVALID; \ + } \ +} while (0) + +/** + * VPD data length + */ +#define BFA_MFG_VPD_LEN 512 +#define BFA_MFG_VPD_LEN_INVALID 0 + +#define BFA_MFG_VPD_PCI_HDR_OFF 137 +#define BFA_MFG_VPD_PCI_VER_MASK 0x07 /*!< version mask 3 bits */ +#define BFA_MFG_VPD_PCI_VDR_MASK 0xf8 /*!< vendor mask 5 bits */ + +/** + * VPD vendor tag + */ +enum { + BFA_MFG_VPD_UNKNOWN = 0, /*!< vendor unknown */ + BFA_MFG_VPD_IBM = 1, /*!< vendor IBM */ + BFA_MFG_VPD_HP = 2, /*!< vendor HP */ + BFA_MFG_VPD_DELL = 3, /*!< vendor DELL */ + BFA_MFG_VPD_PCI_IBM = 0x08, /*!< PCI VPD IBM */ + BFA_MFG_VPD_PCI_HP = 0x10, /*!< PCI VPD HP */ + BFA_MFG_VPD_PCI_DELL = 0x20, /*!< PCI VPD DELL */ + BFA_MFG_VPD_PCI_BRCD = 0xf8, /*!< PCI VPD Brocade */ +}; + +/** + * @brief BFA adapter flash vpd data definition. + * + * All numerical fields are in big-endian format. + */ +struct bfa_mfg_vpd { + u8 version; /*!< vpd data version */ + u8 vpd_sig[3]; /*!< characters 'V', 'P', 'D' */ + u8 chksum; /*!< u8 checksum */ + u8 vendor; /*!< vendor */ + u8 len; /*!< vpd data length excluding header */ + u8 rsv; + u8 data[BFA_MFG_VPD_LEN]; /*!< vpd data */ +}; + +#pragma pack() + +#endif /* __BFA_DEFS_MFG_H__ */ diff --git a/drivers/net/bna/bfa_defs_status.h b/drivers/net/bna/bfa_defs_status.h new file mode 100644 index 000000000000..af951126375c --- /dev/null +++ b/drivers/net/bna/bfa_defs_status.h @@ -0,0 +1,216 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ +#ifndef __BFA_DEFS_STATUS_H__ +#define __BFA_DEFS_STATUS_H__ + +/** + * API status return values + * + * NOTE: The error msgs are auto generated from the comments. Only singe line + * comments are supported + */ +enum bfa_status { + BFA_STATUS_OK = 0, + BFA_STATUS_FAILED = 1, + BFA_STATUS_EINVAL = 2, + BFA_STATUS_ENOMEM = 3, + BFA_STATUS_ENOSYS = 4, + BFA_STATUS_ETIMER = 5, + BFA_STATUS_EPROTOCOL = 6, + BFA_STATUS_ENOFCPORTS = 7, + BFA_STATUS_NOFLASH = 8, + BFA_STATUS_BADFLASH = 9, + BFA_STATUS_SFP_UNSUPP = 10, + BFA_STATUS_UNKNOWN_VFID = 11, + BFA_STATUS_DATACORRUPTED = 12, + BFA_STATUS_DEVBUSY = 13, + BFA_STATUS_ABORTED = 14, + BFA_STATUS_NODEV = 15, + BFA_STATUS_HDMA_FAILED = 16, + BFA_STATUS_FLASH_BAD_LEN = 17, + BFA_STATUS_UNKNOWN_LWWN = 18, + BFA_STATUS_UNKNOWN_RWWN = 19, + BFA_STATUS_FCPT_LS_RJT = 20, + BFA_STATUS_VPORT_EXISTS = 21, + BFA_STATUS_VPORT_MAX = 22, + BFA_STATUS_UNSUPP_SPEED = 23, + BFA_STATUS_INVLD_DFSZ = 24, + BFA_STATUS_CNFG_FAILED = 25, + BFA_STATUS_CMD_NOTSUPP = 26, + BFA_STATUS_NO_ADAPTER = 27, + BFA_STATUS_LINKDOWN = 28, + BFA_STATUS_FABRIC_RJT = 29, + BFA_STATUS_UNKNOWN_VWWN = 30, + BFA_STATUS_NSLOGIN_FAILED = 31, + BFA_STATUS_NO_RPORTS = 32, + BFA_STATUS_NSQUERY_FAILED = 33, + BFA_STATUS_PORT_OFFLINE = 34, + BFA_STATUS_RPORT_OFFLINE = 35, + BFA_STATUS_TGTOPEN_FAILED = 36, + BFA_STATUS_BAD_LUNS = 37, + BFA_STATUS_IO_FAILURE = 38, + BFA_STATUS_NO_FABRIC = 39, + BFA_STATUS_EBADF = 40, + BFA_STATUS_EINTR = 41, + BFA_STATUS_EIO = 42, + BFA_STATUS_ENOTTY = 43, + BFA_STATUS_ENXIO = 44, + BFA_STATUS_EFOPEN = 45, + BFA_STATUS_VPORT_WWN_BP = 46, + BFA_STATUS_PORT_NOT_DISABLED = 47, + BFA_STATUS_BADFRMHDR = 48, + BFA_STATUS_BADFRMSZ = 49, + BFA_STATUS_MISSINGFRM = 50, + BFA_STATUS_LINKTIMEOUT = 51, + BFA_STATUS_NO_FCPIM_NEXUS = 52, + BFA_STATUS_CHECKSUM_FAIL = 53, + BFA_STATUS_GZME_FAILED = 54, + BFA_STATUS_SCSISTART_REQD = 55, + BFA_STATUS_IOC_FAILURE = 56, + BFA_STATUS_INVALID_WWN = 57, + BFA_STATUS_MISMATCH = 58, + BFA_STATUS_IOC_ENABLED = 59, + BFA_STATUS_ADAPTER_ENABLED = 60, + BFA_STATUS_IOC_NON_OP = 61, + BFA_STATUS_ADDR_MAP_FAILURE = 62, + BFA_STATUS_SAME_NAME = 63, + BFA_STATUS_PENDING = 64, + BFA_STATUS_8G_SPD = 65, + BFA_STATUS_4G_SPD = 66, + BFA_STATUS_AD_IS_ENABLE = 67, + BFA_STATUS_EINVAL_TOV = 68, + BFA_STATUS_EINVAL_QDEPTH = 69, + BFA_STATUS_VERSION_FAIL = 70, + BFA_STATUS_DIAG_BUSY = 71, + BFA_STATUS_BEACON_ON = 72, + BFA_STATUS_BEACON_OFF = 73, + BFA_STATUS_LBEACON_ON = 74, + BFA_STATUS_LBEACON_OFF = 75, + BFA_STATUS_PORT_NOT_INITED = 76, + BFA_STATUS_RPSC_ENABLED = 77, + BFA_STATUS_ENOFSAVE = 78, + BFA_STATUS_BAD_FILE = 79, + BFA_STATUS_RLIM_EN = 80, + BFA_STATUS_RLIM_DIS = 81, + BFA_STATUS_IOC_DISABLED = 82, + BFA_STATUS_ADAPTER_DISABLED = 83, + BFA_STATUS_BIOS_DISABLED = 84, + BFA_STATUS_AUTH_ENABLED = 85, + BFA_STATUS_AUTH_DISABLED = 86, + BFA_STATUS_ERROR_TRL_ENABLED = 87, + BFA_STATUS_ERROR_QOS_ENABLED = 88, + BFA_STATUS_NO_SFP_DEV = 89, + BFA_STATUS_MEMTEST_FAILED = 90, + BFA_STATUS_INVALID_DEVID = 91, + BFA_STATUS_QOS_ENABLED = 92, + BFA_STATUS_QOS_DISABLED = 93, + BFA_STATUS_INCORRECT_DRV_CONFIG = 94, + BFA_STATUS_REG_FAIL = 95, + BFA_STATUS_IM_INV_CODE = 96, + BFA_STATUS_IM_INV_VLAN = 97, + BFA_STATUS_IM_INV_ADAPT_NAME = 98, + BFA_STATUS_IM_LOW_RESOURCES = 99, + BFA_STATUS_IM_VLANID_IS_PVID = 100, + BFA_STATUS_IM_VLANID_EXISTS = 101, + BFA_STATUS_IM_FW_UPDATE_FAIL = 102, + BFA_STATUS_PORTLOG_ENABLED = 103, + BFA_STATUS_PORTLOG_DISABLED = 104, + BFA_STATUS_FILE_NOT_FOUND = 105, + BFA_STATUS_QOS_FC_ONLY = 106, + BFA_STATUS_RLIM_FC_ONLY = 107, + BFA_STATUS_CT_SPD = 108, + BFA_STATUS_LEDTEST_OP = 109, + BFA_STATUS_CEE_NOT_DN = 110, + BFA_STATUS_10G_SPD = 111, + BFA_STATUS_IM_INV_TEAM_NAME = 112, + BFA_STATUS_IM_DUP_TEAM_NAME = 113, + BFA_STATUS_IM_ADAPT_ALREADY_IN_TEAM = 114, + BFA_STATUS_IM_ADAPT_HAS_VLANS = 115, + BFA_STATUS_IM_PVID_MISMATCH = 116, + BFA_STATUS_IM_LINK_SPEED_MISMATCH = 117, + BFA_STATUS_IM_MTU_MISMATCH = 118, + BFA_STATUS_IM_RSS_MISMATCH = 119, + BFA_STATUS_IM_HDS_MISMATCH = 120, + BFA_STATUS_IM_OFFLOAD_MISMATCH = 121, + BFA_STATUS_IM_PORT_PARAMS = 122, + BFA_STATUS_IM_PORT_NOT_IN_TEAM = 123, + BFA_STATUS_IM_CANNOT_REM_PRI = 124, + BFA_STATUS_IM_MAX_PORTS_REACHED = 125, + BFA_STATUS_IM_LAST_PORT_DELETE = 126, + BFA_STATUS_IM_NO_DRIVER = 127, + BFA_STATUS_IM_MAX_VLANS_REACHED = 128, + BFA_STATUS_TOMCAT_SPD_NOT_ALLOWED = 129, + BFA_STATUS_NO_MINPORT_DRIVER = 130, + BFA_STATUS_CARD_TYPE_MISMATCH = 131, + BFA_STATUS_BAD_ASICBLK = 132, + BFA_STATUS_NO_DRIVER = 133, + BFA_STATUS_INVALID_MAC = 134, + BFA_STATUS_IM_NO_VLAN = 135, + BFA_STATUS_IM_ETH_LB_FAILED = 136, + BFA_STATUS_IM_PVID_REMOVE = 137, + BFA_STATUS_IM_PVID_EDIT = 138, + BFA_STATUS_CNA_NO_BOOT = 139, + BFA_STATUS_IM_PVID_NON_ZERO = 140, + BFA_STATUS_IM_INETCFG_LOCK_FAILED = 141, + BFA_STATUS_IM_GET_INETCFG_FAILED = 142, + BFA_STATUS_IM_NOT_BOUND = 143, + BFA_STATUS_INSUFFICIENT_PERMS = 144, + BFA_STATUS_IM_INV_VLAN_NAME = 145, + BFA_STATUS_CMD_NOTSUPP_CNA = 146, + BFA_STATUS_IM_PASSTHRU_EDIT = 147, + BFA_STATUS_IM_BIND_FAILED = 148, + BFA_STATUS_IM_UNBIND_FAILED = 149, + BFA_STATUS_IM_PORT_IN_TEAM = 150, + BFA_STATUS_IM_VLAN_NOT_FOUND = 151, + BFA_STATUS_IM_TEAM_NOT_FOUND = 152, + BFA_STATUS_IM_TEAM_CFG_NOT_ALLOWED = 153, + BFA_STATUS_PBC = 154, + BFA_STATUS_DEVID_MISSING = 155, + BFA_STATUS_BAD_FWCFG = 156, + BFA_STATUS_CREATE_FILE = 157, + BFA_STATUS_INVALID_VENDOR = 158, + BFA_STATUS_SFP_NOT_READY = 159, + BFA_STATUS_FLASH_UNINIT = 160, + BFA_STATUS_FLASH_EMPTY = 161, + BFA_STATUS_FLASH_CKFAIL = 162, + BFA_STATUS_TRUNK_UNSUPP = 163, + BFA_STATUS_TRUNK_ENABLED = 164, + BFA_STATUS_TRUNK_DISABLED = 165, + BFA_STATUS_TRUNK_ERROR_TRL_ENABLED = 166, + BFA_STATUS_BOOT_CODE_UPDATED = 167, + BFA_STATUS_BOOT_VERSION = 168, + BFA_STATUS_CARDTYPE_MISSING = 169, + BFA_STATUS_INVALID_CARDTYPE = 170, + BFA_STATUS_NO_TOPOLOGY_FOR_CNA = 171, + BFA_STATUS_IM_VLAN_OVER_TEAM_DELETE_FAILED = 172, + BFA_STATUS_ETHBOOT_ENABLED = 173, + BFA_STATUS_ETHBOOT_DISABLED = 174, + BFA_STATUS_IOPROFILE_OFF = 175, + BFA_STATUS_NO_PORT_INSTANCE = 176, + BFA_STATUS_BOOT_CODE_TIMEDOUT = 177, + BFA_STATUS_NO_VPORT_LOCK = 178, + BFA_STATUS_VPORT_NO_CNFG = 179, + BFA_STATUS_MAX_VAL +}; + +enum bfa_eproto_status { + BFA_EPROTO_BAD_ACCEPT = 0, + BFA_EPROTO_UNKNOWN_RSP = 1 +}; + +#endif /* __BFA_DEFS_STATUS_H__ */ diff --git a/drivers/net/bna/bfa_ioc.c b/drivers/net/bna/bfa_ioc.c new file mode 100644 index 000000000000..cdc2cb1597ec --- /dev/null +++ b/drivers/net/bna/bfa_ioc.c @@ -0,0 +1,1839 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +#include "bfa_ioc.h" +#include "cna.h" +#include "bfi.h" +#include "bfi_ctreg.h" +#include "bfa_defs.h" + +/** + * IOC local definitions + */ + +#define bfa_ioc_timer_start(__ioc) \ + mod_timer(&(__ioc)->ioc_timer, jiffies + \ + msecs_to_jiffies(BFA_IOC_TOV)) +#define bfa_ioc_timer_stop(__ioc) del_timer(&(__ioc)->ioc_timer) + +#define bfa_ioc_recovery_timer_start(__ioc) \ + mod_timer(&(__ioc)->ioc_timer, jiffies + \ + msecs_to_jiffies(BFA_IOC_TOV_RECOVER)) + +#define bfa_sem_timer_start(__ioc) \ + mod_timer(&(__ioc)->sem_timer, jiffies + \ + msecs_to_jiffies(BFA_IOC_HWSEM_TOV)) +#define bfa_sem_timer_stop(__ioc) del_timer(&(__ioc)->sem_timer) + +#define bfa_hb_timer_start(__ioc) \ + mod_timer(&(__ioc)->hb_timer, jiffies + \ + msecs_to_jiffies(BFA_IOC_HB_TOV)) +#define bfa_hb_timer_stop(__ioc) del_timer(&(__ioc)->hb_timer) + +/** + * Asic specific macros : see bfa_hw_cb.c and bfa_hw_ct.c for details. + */ + +#define bfa_ioc_firmware_lock(__ioc) \ + ((__ioc)->ioc_hwif->ioc_firmware_lock(__ioc)) +#define bfa_ioc_firmware_unlock(__ioc) \ + ((__ioc)->ioc_hwif->ioc_firmware_unlock(__ioc)) +#define bfa_ioc_reg_init(__ioc) ((__ioc)->ioc_hwif->ioc_reg_init(__ioc)) +#define bfa_ioc_map_port(__ioc) ((__ioc)->ioc_hwif->ioc_map_port(__ioc)) +#define bfa_ioc_notify_hbfail(__ioc) \ + ((__ioc)->ioc_hwif->ioc_notify_hbfail(__ioc)) + +#define bfa_ioc_is_optrom(__ioc) \ + (bfa_cb_image_get_size(BFA_IOC_FWIMG_TYPE(__ioc)) < BFA_IOC_FWIMG_MINSZ) + +#define bfa_ioc_mbox_cmd_pending(__ioc) \ + (!list_empty(&((__ioc)->mbox_mod.cmd_q)) || \ + readl((__ioc)->ioc_regs.hfn_mbox_cmd)) + +bool bfa_auto_recover = true; + +/* + * forward declarations + */ +static void bfa_ioc_hw_sem_get(struct bfa_ioc *ioc); +static void bfa_ioc_hw_sem_get_cancel(struct bfa_ioc *ioc); +static void bfa_ioc_hwinit(struct bfa_ioc *ioc, bool force); +static void bfa_ioc_send_enable(struct bfa_ioc *ioc); +static void bfa_ioc_send_disable(struct bfa_ioc *ioc); +static void bfa_ioc_send_getattr(struct bfa_ioc *ioc); +static void bfa_ioc_hb_monitor(struct bfa_ioc *ioc); +static void bfa_ioc_hb_stop(struct bfa_ioc *ioc); +static void bfa_ioc_reset(struct bfa_ioc *ioc, bool force); +static void bfa_ioc_mbox_poll(struct bfa_ioc *ioc); +static void bfa_ioc_mbox_hbfail(struct bfa_ioc *ioc); +static void bfa_ioc_recover(struct bfa_ioc *ioc); +static void bfa_ioc_check_attr_wwns(struct bfa_ioc *ioc); +static void bfa_ioc_disable_comp(struct bfa_ioc *ioc); +static void bfa_ioc_lpu_stop(struct bfa_ioc *ioc); + +/** + * IOC state machine events + */ +enum ioc_event { + IOC_E_ENABLE = 1, /*!< IOC enable request */ + IOC_E_DISABLE = 2, /*!< IOC disable request */ + IOC_E_TIMEOUT = 3, /*!< f/w response timeout */ + IOC_E_FWREADY = 4, /*!< f/w initialization done */ + IOC_E_FWRSP_GETATTR = 5, /*!< IOC get attribute response */ + IOC_E_FWRSP_ENABLE = 6, /*!< enable f/w response */ + IOC_E_FWRSP_DISABLE = 7, /*!< disable f/w response */ + IOC_E_HBFAIL = 8, /*!< heartbeat failure */ + IOC_E_HWERROR = 9, /*!< hardware error interrupt */ + IOC_E_SEMLOCKED = 10, /*!< h/w semaphore is locked */ + IOC_E_DETACH = 11, /*!< driver detach cleanup */ +}; + +bfa_fsm_state_decl(bfa_ioc, reset, struct bfa_ioc, enum ioc_event); +bfa_fsm_state_decl(bfa_ioc, fwcheck, struct bfa_ioc, enum ioc_event); +bfa_fsm_state_decl(bfa_ioc, mismatch, struct bfa_ioc, enum ioc_event); +bfa_fsm_state_decl(bfa_ioc, semwait, struct bfa_ioc, enum ioc_event); +bfa_fsm_state_decl(bfa_ioc, hwinit, struct bfa_ioc, enum ioc_event); +bfa_fsm_state_decl(bfa_ioc, enabling, struct bfa_ioc, enum ioc_event); +bfa_fsm_state_decl(bfa_ioc, getattr, struct bfa_ioc, enum ioc_event); +bfa_fsm_state_decl(bfa_ioc, op, struct bfa_ioc, enum ioc_event); +bfa_fsm_state_decl(bfa_ioc, initfail, struct bfa_ioc, enum ioc_event); +bfa_fsm_state_decl(bfa_ioc, hbfail, struct bfa_ioc, enum ioc_event); +bfa_fsm_state_decl(bfa_ioc, disabling, struct bfa_ioc, enum ioc_event); +bfa_fsm_state_decl(bfa_ioc, disabled, struct bfa_ioc, enum ioc_event); + +static struct bfa_sm_table ioc_sm_table[] = { + {BFA_SM(bfa_ioc_sm_reset), BFA_IOC_RESET}, + {BFA_SM(bfa_ioc_sm_fwcheck), BFA_IOC_FWMISMATCH}, + {BFA_SM(bfa_ioc_sm_mismatch), BFA_IOC_FWMISMATCH}, + {BFA_SM(bfa_ioc_sm_semwait), BFA_IOC_SEMWAIT}, + {BFA_SM(bfa_ioc_sm_hwinit), BFA_IOC_HWINIT}, + {BFA_SM(bfa_ioc_sm_enabling), BFA_IOC_HWINIT}, + {BFA_SM(bfa_ioc_sm_getattr), BFA_IOC_GETATTR}, + {BFA_SM(bfa_ioc_sm_op), BFA_IOC_OPERATIONAL}, + {BFA_SM(bfa_ioc_sm_initfail), BFA_IOC_INITFAIL}, + {BFA_SM(bfa_ioc_sm_hbfail), BFA_IOC_HBFAIL}, + {BFA_SM(bfa_ioc_sm_disabling), BFA_IOC_DISABLING}, + {BFA_SM(bfa_ioc_sm_disabled), BFA_IOC_DISABLED}, +}; + +/** + * Reset entry actions -- initialize state machine + */ +static void +bfa_ioc_sm_reset_entry(struct bfa_ioc *ioc) +{ + ioc->retry_count = 0; + ioc->auto_recover = bfa_auto_recover; +} + +/** + * Beginning state. IOC is in reset state. + */ +static void +bfa_ioc_sm_reset(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + case IOC_E_ENABLE: + bfa_fsm_set_state(ioc, bfa_ioc_sm_fwcheck); + break; + + case IOC_E_DISABLE: + bfa_ioc_disable_comp(ioc); + break; + + case IOC_E_DETACH: + break; + + default: + bfa_sm_fault(ioc, event); + } +} + +/** + * Semaphore should be acquired for version check. + */ +static void +bfa_ioc_sm_fwcheck_entry(struct bfa_ioc *ioc) +{ + bfa_ioc_hw_sem_get(ioc); +} + +/** + * Awaiting h/w semaphore to continue with version check. + */ +static void +bfa_ioc_sm_fwcheck(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + case IOC_E_SEMLOCKED: + if (bfa_ioc_firmware_lock(ioc)) { + ioc->retry_count = 0; + bfa_fsm_set_state(ioc, bfa_ioc_sm_hwinit); + } else { + bfa_ioc_hw_sem_release(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_mismatch); + } + break; + + case IOC_E_DISABLE: + bfa_ioc_disable_comp(ioc); + /* fall through */ + + case IOC_E_DETACH: + bfa_ioc_hw_sem_get_cancel(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_reset); + break; + + case IOC_E_FWREADY: + break; + + default: + bfa_sm_fault(ioc, event); + } +} + +/** + * Notify enable completion callback and generate mismatch AEN. + */ +static void +bfa_ioc_sm_mismatch_entry(struct bfa_ioc *ioc) +{ + /** + * Provide enable completion callback and AEN notification only once. + */ + if (ioc->retry_count == 0) + ioc->cbfn->enable_cbfn(ioc->bfa, BFA_STATUS_IOC_FAILURE); + ioc->retry_count++; + bfa_ioc_timer_start(ioc); +} + +/** + * Awaiting firmware version match. + */ +static void +bfa_ioc_sm_mismatch(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + case IOC_E_TIMEOUT: + bfa_fsm_set_state(ioc, bfa_ioc_sm_fwcheck); + break; + + case IOC_E_DISABLE: + bfa_ioc_disable_comp(ioc); + /* fall through */ + + case IOC_E_DETACH: + bfa_ioc_timer_stop(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_reset); + break; + + case IOC_E_FWREADY: + break; + + default: + bfa_sm_fault(ioc, event); + } +} + +/** + * Request for semaphore. + */ +static void +bfa_ioc_sm_semwait_entry(struct bfa_ioc *ioc) +{ + bfa_ioc_hw_sem_get(ioc); +} + +/** + * Awaiting semaphore for h/w initialzation. + */ +static void +bfa_ioc_sm_semwait(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + case IOC_E_SEMLOCKED: + ioc->retry_count = 0; + bfa_fsm_set_state(ioc, bfa_ioc_sm_hwinit); + break; + + case IOC_E_DISABLE: + bfa_ioc_hw_sem_get_cancel(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled); + break; + + default: + bfa_sm_fault(ioc, event); + } +} + +static void +bfa_ioc_sm_hwinit_entry(struct bfa_ioc *ioc) +{ + bfa_ioc_timer_start(ioc); + bfa_ioc_reset(ioc, false); +} + +/** + * @brief + * Hardware is being initialized. Interrupts are enabled. + * Holding hardware semaphore lock. + */ +static void +bfa_ioc_sm_hwinit(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + case IOC_E_FWREADY: + bfa_ioc_timer_stop(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_enabling); + break; + + case IOC_E_HWERROR: + bfa_ioc_timer_stop(ioc); + /* fall through */ + + case IOC_E_TIMEOUT: + ioc->retry_count++; + if (ioc->retry_count < BFA_IOC_HWINIT_MAX) { + bfa_ioc_timer_start(ioc); + bfa_ioc_reset(ioc, true); + break; + } + + bfa_ioc_hw_sem_release(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_initfail); + break; + + case IOC_E_DISABLE: + bfa_ioc_hw_sem_release(ioc); + bfa_ioc_timer_stop(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled); + break; + + default: + bfa_sm_fault(ioc, event); + } +} + +static void +bfa_ioc_sm_enabling_entry(struct bfa_ioc *ioc) +{ + bfa_ioc_timer_start(ioc); + bfa_ioc_send_enable(ioc); +} + +/** + * Host IOC function is being enabled, awaiting response from firmware. + * Semaphore is acquired. + */ +static void +bfa_ioc_sm_enabling(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + case IOC_E_FWRSP_ENABLE: + bfa_ioc_timer_stop(ioc); + bfa_ioc_hw_sem_release(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_getattr); + break; + + case IOC_E_HWERROR: + bfa_ioc_timer_stop(ioc); + /* fall through */ + + case IOC_E_TIMEOUT: + ioc->retry_count++; + if (ioc->retry_count < BFA_IOC_HWINIT_MAX) { + writel(BFI_IOC_UNINIT, + ioc->ioc_regs.ioc_fwstate); + bfa_fsm_set_state(ioc, bfa_ioc_sm_hwinit); + break; + } + + bfa_ioc_hw_sem_release(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_initfail); + break; + + case IOC_E_DISABLE: + bfa_ioc_timer_stop(ioc); + bfa_ioc_hw_sem_release(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled); + break; + + case IOC_E_FWREADY: + bfa_ioc_send_enable(ioc); + break; + + default: + bfa_sm_fault(ioc, event); + } +} + +static void +bfa_ioc_sm_getattr_entry(struct bfa_ioc *ioc) +{ + bfa_ioc_timer_start(ioc); + bfa_ioc_send_getattr(ioc); +} + +/** + * @brief + * IOC configuration in progress. Timer is active. + */ +static void +bfa_ioc_sm_getattr(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + case IOC_E_FWRSP_GETATTR: + bfa_ioc_timer_stop(ioc); + bfa_ioc_check_attr_wwns(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_op); + break; + + case IOC_E_HWERROR: + bfa_ioc_timer_stop(ioc); + /* fall through */ + + case IOC_E_TIMEOUT: + bfa_fsm_set_state(ioc, bfa_ioc_sm_initfail); + break; + + case IOC_E_DISABLE: + bfa_ioc_timer_stop(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled); + break; + + default: + bfa_sm_fault(ioc, event); + } +} + +static void +bfa_ioc_sm_op_entry(struct bfa_ioc *ioc) +{ + ioc->cbfn->enable_cbfn(ioc->bfa, BFA_STATUS_OK); + bfa_ioc_hb_monitor(ioc); +} + +static void +bfa_ioc_sm_op(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + case IOC_E_ENABLE: + break; + + case IOC_E_DISABLE: + bfa_ioc_hb_stop(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_disabling); + break; + + case IOC_E_HWERROR: + case IOC_E_FWREADY: + /** + * Hard error or IOC recovery by other function. + * Treat it same as heartbeat failure. + */ + bfa_ioc_hb_stop(ioc); + /* !!! fall through !!! */ + + case IOC_E_HBFAIL: + bfa_fsm_set_state(ioc, bfa_ioc_sm_hbfail); + break; + + default: + bfa_sm_fault(ioc, event); + } +} + +static void +bfa_ioc_sm_disabling_entry(struct bfa_ioc *ioc) +{ + bfa_ioc_timer_start(ioc); + bfa_ioc_send_disable(ioc); +} + +/** + * IOC is being disabled + */ +static void +bfa_ioc_sm_disabling(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + case IOC_E_FWRSP_DISABLE: + bfa_ioc_timer_stop(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled); + break; + + case IOC_E_HWERROR: + bfa_ioc_timer_stop(ioc); + /* + * !!! fall through !!! + */ + + case IOC_E_TIMEOUT: + writel(BFI_IOC_FAIL, ioc->ioc_regs.ioc_fwstate); + bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled); + break; + + default: + bfa_sm_fault(ioc, event); + } +} + +/** + * IOC disable completion entry. + */ +static void +bfa_ioc_sm_disabled_entry(struct bfa_ioc *ioc) +{ + bfa_ioc_disable_comp(ioc); +} + +static void +bfa_ioc_sm_disabled(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + case IOC_E_ENABLE: + bfa_fsm_set_state(ioc, bfa_ioc_sm_semwait); + break; + + case IOC_E_DISABLE: + ioc->cbfn->disable_cbfn(ioc->bfa); + break; + + case IOC_E_FWREADY: + break; + + case IOC_E_DETACH: + bfa_ioc_firmware_unlock(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_reset); + break; + + default: + bfa_sm_fault(ioc, event); + } +} + +static void +bfa_ioc_sm_initfail_entry(struct bfa_ioc *ioc) +{ + ioc->cbfn->enable_cbfn(ioc->bfa, BFA_STATUS_IOC_FAILURE); + bfa_ioc_timer_start(ioc); +} + +/** + * @brief + * Hardware initialization failed. + */ +static void +bfa_ioc_sm_initfail(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + case IOC_E_DISABLE: + bfa_ioc_timer_stop(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled); + break; + + case IOC_E_DETACH: + bfa_ioc_timer_stop(ioc); + bfa_ioc_firmware_unlock(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_reset); + break; + + case IOC_E_TIMEOUT: + bfa_fsm_set_state(ioc, bfa_ioc_sm_semwait); + break; + + default: + bfa_sm_fault(ioc, event); + } +} + +static void +bfa_ioc_sm_hbfail_entry(struct bfa_ioc *ioc) +{ + struct list_head *qe; + struct bfa_ioc_hbfail_notify *notify; + + /** + * Mark IOC as failed in hardware and stop firmware. + */ + bfa_ioc_lpu_stop(ioc); + writel(BFI_IOC_FAIL, ioc->ioc_regs.ioc_fwstate); + + /** + * Notify other functions on HB failure. + */ + bfa_ioc_notify_hbfail(ioc); + + /** + * Notify driver and common modules registered for notification. + */ + ioc->cbfn->hbfail_cbfn(ioc->bfa); + list_for_each(qe, &ioc->hb_notify_q) { + notify = (struct bfa_ioc_hbfail_notify *) qe; + notify->cbfn(notify->cbarg); + } + + /** + * Flush any queued up mailbox requests. + */ + bfa_ioc_mbox_hbfail(ioc); + + /** + * Trigger auto-recovery after a delay. + */ + if (ioc->auto_recover) + mod_timer(&ioc->ioc_timer, jiffies + + msecs_to_jiffies(BFA_IOC_TOV_RECOVER)); +} + +/** + * @brief + * IOC heartbeat failure. + */ +static void +bfa_ioc_sm_hbfail(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + + case IOC_E_ENABLE: + ioc->cbfn->enable_cbfn(ioc->bfa, BFA_STATUS_IOC_FAILURE); + break; + + case IOC_E_DISABLE: + if (ioc->auto_recover) + bfa_ioc_timer_stop(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled); + break; + + case IOC_E_TIMEOUT: + bfa_fsm_set_state(ioc, bfa_ioc_sm_semwait); + break; + + case IOC_E_FWREADY: + /** + * Recovery is already initiated by other function. + */ + break; + + case IOC_E_HWERROR: + /* + * HB failure notification, ignore. + */ + break; + default: + bfa_sm_fault(ioc, event); + } +} + +/** + * BFA IOC private functions + */ + +static void +bfa_ioc_disable_comp(struct bfa_ioc *ioc) +{ + struct list_head *qe; + struct bfa_ioc_hbfail_notify *notify; + + ioc->cbfn->disable_cbfn(ioc->bfa); + + /** + * Notify common modules registered for notification. + */ + list_for_each(qe, &ioc->hb_notify_q) { + notify = (struct bfa_ioc_hbfail_notify *) qe; + notify->cbfn(notify->cbarg); + } +} + +void +bfa_ioc_sem_timeout(void *ioc_arg) +{ + struct bfa_ioc *ioc = (struct bfa_ioc *) ioc_arg; + + bfa_ioc_hw_sem_get(ioc); +} + +bool +bfa_ioc_sem_get(void __iomem *sem_reg) +{ + u32 r32; + int cnt = 0; +#define BFA_SEM_SPINCNT 3000 + + r32 = readl(sem_reg); + + while (r32 && (cnt < BFA_SEM_SPINCNT)) { + cnt++; + udelay(2); + r32 = readl(sem_reg); + } + + if (r32 == 0) + return true; + + BUG_ON(!(cnt < BFA_SEM_SPINCNT)); + return false; +} + +void +bfa_ioc_sem_release(void __iomem *sem_reg) +{ + writel(1, sem_reg); +} + +static void +bfa_ioc_hw_sem_get(struct bfa_ioc *ioc) +{ + u32 r32; + + /** + * First read to the semaphore register will return 0, subsequent reads + * will return 1. Semaphore is released by writing 1 to the register + */ + r32 = readl(ioc->ioc_regs.ioc_sem_reg); + if (r32 == 0) { + bfa_fsm_send_event(ioc, IOC_E_SEMLOCKED); + return; + } + + mod_timer(&ioc->sem_timer, jiffies + + msecs_to_jiffies(BFA_IOC_HWSEM_TOV)); +} + +void +bfa_ioc_hw_sem_release(struct bfa_ioc *ioc) +{ + writel(1, ioc->ioc_regs.ioc_sem_reg); +} + +static void +bfa_ioc_hw_sem_get_cancel(struct bfa_ioc *ioc) +{ + del_timer(&ioc->sem_timer); +} + +/** + * @brief + * Initialize LPU local memory (aka secondary memory / SRAM) + */ +static void +bfa_ioc_lmem_init(struct bfa_ioc *ioc) +{ + u32 pss_ctl; + int i; +#define PSS_LMEM_INIT_TIME 10000 + + pss_ctl = readl(ioc->ioc_regs.pss_ctl_reg); + pss_ctl &= ~__PSS_LMEM_RESET; + pss_ctl |= __PSS_LMEM_INIT_EN; + + /* + * i2c workaround 12.5khz clock + */ + pss_ctl |= __PSS_I2C_CLK_DIV(3UL); + writel(pss_ctl, ioc->ioc_regs.pss_ctl_reg); + + /** + * wait for memory initialization to be complete + */ + i = 0; + do { + pss_ctl = readl(ioc->ioc_regs.pss_ctl_reg); + i++; + } while (!(pss_ctl & __PSS_LMEM_INIT_DONE) && (i < PSS_LMEM_INIT_TIME)); + + /** + * If memory initialization is not successful, IOC timeout will catch + * such failures. + */ + BUG_ON(!(pss_ctl & __PSS_LMEM_INIT_DONE)); + + pss_ctl &= ~(__PSS_LMEM_INIT_DONE | __PSS_LMEM_INIT_EN); + writel(pss_ctl, ioc->ioc_regs.pss_ctl_reg); +} + +static void +bfa_ioc_lpu_start(struct bfa_ioc *ioc) +{ + u32 pss_ctl; + + /** + * Take processor out of reset. + */ + pss_ctl = readl(ioc->ioc_regs.pss_ctl_reg); + pss_ctl &= ~__PSS_LPU0_RESET; + + writel(pss_ctl, ioc->ioc_regs.pss_ctl_reg); +} + +static void +bfa_ioc_lpu_stop(struct bfa_ioc *ioc) +{ + u32 pss_ctl; + + /** + * Put processors in reset. + */ + pss_ctl = readl(ioc->ioc_regs.pss_ctl_reg); + pss_ctl |= (__PSS_LPU0_RESET | __PSS_LPU1_RESET); + + writel(pss_ctl, ioc->ioc_regs.pss_ctl_reg); +} + +/** + * Get driver and firmware versions. + */ +void +bfa_ioc_fwver_get(struct bfa_ioc *ioc, struct bfi_ioc_image_hdr *fwhdr) +{ + u32 pgnum, pgoff; + u32 loff = 0; + int i; + u32 *fwsig = (u32 *) fwhdr; + + pgnum = bfa_ioc_smem_pgnum(ioc, loff); + pgoff = bfa_ioc_smem_pgoff(ioc, loff); + writel(pgnum, ioc->ioc_regs.host_page_num_fn); + + for (i = 0; i < (sizeof(struct bfi_ioc_image_hdr) / sizeof(u32)); + i++) { + fwsig[i] = + swab32(readl((loff) + (ioc->ioc_regs.smem_page_start))); + loff += sizeof(u32); + } +} + +/** + * Returns TRUE if same. + */ +bool +bfa_ioc_fwver_cmp(struct bfa_ioc *ioc, struct bfi_ioc_image_hdr *fwhdr) +{ + struct bfi_ioc_image_hdr *drv_fwhdr; + int i; + + drv_fwhdr = (struct bfi_ioc_image_hdr *) + bfa_cb_image_get_chunk(BFA_IOC_FWIMG_TYPE(ioc), 0); + + for (i = 0; i < BFI_IOC_MD5SUM_SZ; i++) { + if (fwhdr->md5sum[i] != drv_fwhdr->md5sum[i]) + return false; + } + + return true; +} + +/** + * Return true if current running version is valid. Firmware signature and + * execution context (driver/bios) must match. + */ +static bool +bfa_ioc_fwver_valid(struct bfa_ioc *ioc) +{ + struct bfi_ioc_image_hdr fwhdr, *drv_fwhdr; + + /** + * If bios/efi boot (flash based) -- return true + */ + if (bfa_ioc_is_optrom(ioc)) + return true; + + bfa_ioc_fwver_get(ioc, &fwhdr); + drv_fwhdr = (struct bfi_ioc_image_hdr *) + bfa_cb_image_get_chunk(BFA_IOC_FWIMG_TYPE(ioc), 0); + + if (fwhdr.signature != drv_fwhdr->signature) + return false; + + if (fwhdr.exec != drv_fwhdr->exec) + return false; + + return bfa_ioc_fwver_cmp(ioc, &fwhdr); +} + +/** + * Conditionally flush any pending message from firmware at start. + */ +static void +bfa_ioc_msgflush(struct bfa_ioc *ioc) +{ + u32 r32; + + r32 = readl(ioc->ioc_regs.lpu_mbox_cmd); + if (r32) + writel(1, ioc->ioc_regs.lpu_mbox_cmd); +} + +/** + * @img ioc_init_logic.jpg + */ +static void +bfa_ioc_hwinit(struct bfa_ioc *ioc, bool force) +{ + enum bfi_ioc_state ioc_fwstate; + bool fwvalid; + + ioc_fwstate = readl(ioc->ioc_regs.ioc_fwstate); + + if (force) + ioc_fwstate = BFI_IOC_UNINIT; + + /** + * check if firmware is valid + */ + fwvalid = (ioc_fwstate == BFI_IOC_UNINIT) ? + false : bfa_ioc_fwver_valid(ioc); + + if (!fwvalid) { + bfa_ioc_boot(ioc, BFI_BOOT_TYPE_NORMAL, ioc->pcidev.device_id); + return; + } + + /** + * If hardware initialization is in progress (initialized by other IOC), + * just wait for an initialization completion interrupt. + */ + if (ioc_fwstate == BFI_IOC_INITING) { + ioc->cbfn->reset_cbfn(ioc->bfa); + return; + } + + /** + * If IOC function is disabled and firmware version is same, + * just re-enable IOC. + * + * If option rom, IOC must not be in operational state. With + * convergence, IOC will be in operational state when 2nd driver + * is loaded. + */ + if (ioc_fwstate == BFI_IOC_DISABLED || + (!bfa_ioc_is_optrom(ioc) && ioc_fwstate == BFI_IOC_OP)) { + /** + * When using MSI-X any pending firmware ready event should + * be flushed. Otherwise MSI-X interrupts are not delivered. + */ + bfa_ioc_msgflush(ioc); + ioc->cbfn->reset_cbfn(ioc->bfa); + bfa_fsm_send_event(ioc, IOC_E_FWREADY); + return; + } + + /** + * Initialize the h/w for any other states. + */ + bfa_ioc_boot(ioc, BFI_BOOT_TYPE_NORMAL, ioc->pcidev.device_id); +} + +void +bfa_ioc_timeout(void *ioc_arg) +{ + struct bfa_ioc *ioc = (struct bfa_ioc *) ioc_arg; + + bfa_fsm_send_event(ioc, IOC_E_TIMEOUT); +} + +void +bfa_ioc_mbox_send(struct bfa_ioc *ioc, void *ioc_msg, int len) +{ + u32 *msgp = (u32 *) ioc_msg; + u32 i; + + BUG_ON(!(len <= BFI_IOC_MSGLEN_MAX)); + + /* + * first write msg to mailbox registers + */ + for (i = 0; i < len / sizeof(u32); i++) + writel(cpu_to_le32(msgp[i]), + ioc->ioc_regs.hfn_mbox + i * sizeof(u32)); + + for (; i < BFI_IOC_MSGLEN_MAX / sizeof(u32); i++) + writel(0, ioc->ioc_regs.hfn_mbox + i * sizeof(u32)); + + /* + * write 1 to mailbox CMD to trigger LPU event + */ + writel(1, ioc->ioc_regs.hfn_mbox_cmd); + (void) readl(ioc->ioc_regs.hfn_mbox_cmd); +} + +static void +bfa_ioc_send_enable(struct bfa_ioc *ioc) +{ + struct bfi_ioc_ctrl_req enable_req; + struct timeval tv; + + bfi_h2i_set(enable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_ENABLE_REQ, + bfa_ioc_portid(ioc)); + enable_req.ioc_class = ioc->ioc_mc; + do_gettimeofday(&tv); + enable_req.tv_sec = ntohl(tv.tv_sec); + bfa_ioc_mbox_send(ioc, &enable_req, sizeof(struct bfi_ioc_ctrl_req)); +} + +static void +bfa_ioc_send_disable(struct bfa_ioc *ioc) +{ + struct bfi_ioc_ctrl_req disable_req; + + bfi_h2i_set(disable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_DISABLE_REQ, + bfa_ioc_portid(ioc)); + bfa_ioc_mbox_send(ioc, &disable_req, sizeof(struct bfi_ioc_ctrl_req)); +} + +static void +bfa_ioc_send_getattr(struct bfa_ioc *ioc) +{ + struct bfi_ioc_getattr_req attr_req; + + bfi_h2i_set(attr_req.mh, BFI_MC_IOC, BFI_IOC_H2I_GETATTR_REQ, + bfa_ioc_portid(ioc)); + bfa_dma_be_addr_set(attr_req.attr_addr, ioc->attr_dma.pa); + bfa_ioc_mbox_send(ioc, &attr_req, sizeof(attr_req)); +} + +void +bfa_ioc_hb_check(void *cbarg) +{ + struct bfa_ioc *ioc = cbarg; + u32 hb_count; + + hb_count = readl(ioc->ioc_regs.heartbeat); + if (ioc->hb_count == hb_count) { + pr_crit("Firmware heartbeat failure at %d", hb_count); + bfa_ioc_recover(ioc); + return; + } else { + ioc->hb_count = hb_count; + } + + bfa_ioc_mbox_poll(ioc); + mod_timer(&ioc->hb_timer, jiffies + + msecs_to_jiffies(BFA_IOC_HB_TOV)); +} + +static void +bfa_ioc_hb_monitor(struct bfa_ioc *ioc) +{ + ioc->hb_count = readl(ioc->ioc_regs.heartbeat); + mod_timer(&ioc->hb_timer, jiffies + + msecs_to_jiffies(BFA_IOC_HB_TOV)); +} + +static void +bfa_ioc_hb_stop(struct bfa_ioc *ioc) +{ + del_timer(&ioc->hb_timer); +} + +/** + * @brief + * Initiate a full firmware download. + */ +static void +bfa_ioc_download_fw(struct bfa_ioc *ioc, u32 boot_type, + u32 boot_param) +{ + u32 *fwimg; + u32 pgnum, pgoff; + u32 loff = 0; + u32 chunkno = 0; + u32 i; + + /** + * Initialize LMEM first before code download + */ + bfa_ioc_lmem_init(ioc); + + /** + * Flash based firmware boot + */ + if (bfa_ioc_is_optrom(ioc)) + boot_type = BFI_BOOT_TYPE_FLASH; + fwimg = bfa_cb_image_get_chunk(BFA_IOC_FWIMG_TYPE(ioc), chunkno); + + pgnum = bfa_ioc_smem_pgnum(ioc, loff); + pgoff = bfa_ioc_smem_pgoff(ioc, loff); + + writel(pgnum, ioc->ioc_regs.host_page_num_fn); + + for (i = 0; i < bfa_cb_image_get_size(BFA_IOC_FWIMG_TYPE(ioc)); i++) { + if (BFA_IOC_FLASH_CHUNK_NO(i) != chunkno) { + chunkno = BFA_IOC_FLASH_CHUNK_NO(i); + fwimg = bfa_cb_image_get_chunk(BFA_IOC_FWIMG_TYPE(ioc), + BFA_IOC_FLASH_CHUNK_ADDR(chunkno)); + } + + /** + * write smem + */ + writel((swab32(fwimg[BFA_IOC_FLASH_OFFSET_IN_CHUNK(i)])), + ((ioc->ioc_regs.smem_page_start) + (loff))); + + loff += sizeof(u32); + + /** + * handle page offset wrap around + */ + loff = PSS_SMEM_PGOFF(loff); + if (loff == 0) { + pgnum++; + writel(pgnum, + ioc->ioc_regs.host_page_num_fn); + } + } + + writel(bfa_ioc_smem_pgnum(ioc, 0), + ioc->ioc_regs.host_page_num_fn); + + /* + * Set boot type and boot param at the end. + */ + writel((swab32(swab32(boot_type))), ((ioc->ioc_regs.smem_page_start) + + (BFI_BOOT_TYPE_OFF))); + writel((swab32(swab32(boot_param))), ((ioc->ioc_regs.smem_page_start) + + (BFI_BOOT_PARAM_OFF))); +} + +static void +bfa_ioc_reset(struct bfa_ioc *ioc, bool force) +{ + bfa_ioc_hwinit(ioc, force); +} + +/** + * @brief + * Update BFA configuration from firmware configuration. + */ +static void +bfa_ioc_getattr_reply(struct bfa_ioc *ioc) +{ + struct bfi_ioc_attr *attr = ioc->attr; + + attr->adapter_prop = ntohl(attr->adapter_prop); + attr->card_type = ntohl(attr->card_type); + attr->maxfrsize = ntohs(attr->maxfrsize); + + bfa_fsm_send_event(ioc, IOC_E_FWRSP_GETATTR); +} + +/** + * Attach time initialization of mbox logic. + */ +static void +bfa_ioc_mbox_attach(struct bfa_ioc *ioc) +{ + struct bfa_ioc_mbox_mod *mod = &ioc->mbox_mod; + int mc; + + INIT_LIST_HEAD(&mod->cmd_q); + for (mc = 0; mc < BFI_MC_MAX; mc++) { + mod->mbhdlr[mc].cbfn = NULL; + mod->mbhdlr[mc].cbarg = ioc->bfa; + } +} + +/** + * Mbox poll timer -- restarts any pending mailbox requests. + */ +static void +bfa_ioc_mbox_poll(struct bfa_ioc *ioc) +{ + struct bfa_ioc_mbox_mod *mod = &ioc->mbox_mod; + struct bfa_mbox_cmd *cmd; + u32 stat; + + /** + * If no command pending, do nothing + */ + if (list_empty(&mod->cmd_q)) + return; + + /** + * If previous command is not yet fetched by firmware, do nothing + */ + stat = readl(ioc->ioc_regs.hfn_mbox_cmd); + if (stat) + return; + + /** + * Enqueue command to firmware. + */ + bfa_q_deq(&mod->cmd_q, &cmd); + bfa_ioc_mbox_send(ioc, cmd->msg, sizeof(cmd->msg)); +} + +/** + * Cleanup any pending requests. + */ +static void +bfa_ioc_mbox_hbfail(struct bfa_ioc *ioc) +{ + struct bfa_ioc_mbox_mod *mod = &ioc->mbox_mod; + struct bfa_mbox_cmd *cmd; + + while (!list_empty(&mod->cmd_q)) + bfa_q_deq(&mod->cmd_q, &cmd); +} + +/** + * IOC public + */ +enum bfa_status +bfa_ioc_pll_init(struct bfa_ioc *ioc) +{ + /* + * Hold semaphore so that nobody can access the chip during init. + */ + bfa_ioc_sem_get(ioc->ioc_regs.ioc_init_sem_reg); + + bfa_ioc_pll_init_asic(ioc); + + ioc->pllinit = true; + /* + * release semaphore. + */ + bfa_ioc_sem_release(ioc->ioc_regs.ioc_init_sem_reg); + + return BFA_STATUS_OK; +} + +/** + * Interface used by diag module to do firmware boot with memory test + * as the entry vector. + */ +void +bfa_ioc_boot(struct bfa_ioc *ioc, u32 boot_type, u32 boot_param) +{ + void __iomem *rb; + + bfa_ioc_stats(ioc, ioc_boots); + + if (bfa_ioc_pll_init(ioc) != BFA_STATUS_OK) + return; + + /** + * Initialize IOC state of all functions on a chip reset. + */ + rb = ioc->pcidev.pci_bar_kva; + if (boot_param == BFI_BOOT_TYPE_MEMTEST) { + writel(BFI_IOC_MEMTEST, (rb + BFA_IOC0_STATE_REG)); + writel(BFI_IOC_MEMTEST, (rb + BFA_IOC1_STATE_REG)); + } else { + writel(BFI_IOC_INITING, (rb + BFA_IOC0_STATE_REG)); + writel(BFI_IOC_INITING, (rb + BFA_IOC1_STATE_REG)); + } + + bfa_ioc_msgflush(ioc); + bfa_ioc_download_fw(ioc, boot_type, boot_param); + + /** + * Enable interrupts just before starting LPU + */ + ioc->cbfn->reset_cbfn(ioc->bfa); + bfa_ioc_lpu_start(ioc); +} + +/** + * Enable/disable IOC failure auto recovery. + */ +void +bfa_ioc_auto_recover(bool auto_recover) +{ + bfa_auto_recover = auto_recover; +} + +bool +bfa_ioc_is_operational(struct bfa_ioc *ioc) +{ + return bfa_fsm_cmp_state(ioc, bfa_ioc_sm_op); +} + +bool +bfa_ioc_is_initialized(struct bfa_ioc *ioc) +{ + u32 r32 = readl(ioc->ioc_regs.ioc_fwstate); + + return ((r32 != BFI_IOC_UNINIT) && + (r32 != BFI_IOC_INITING) && + (r32 != BFI_IOC_MEMTEST)); +} + +void +bfa_ioc_msgget(struct bfa_ioc *ioc, void *mbmsg) +{ + u32 *msgp = mbmsg; + u32 r32; + int i; + + /** + * read the MBOX msg + */ + for (i = 0; i < (sizeof(union bfi_ioc_i2h_msg_u) / sizeof(u32)); + i++) { + r32 = readl(ioc->ioc_regs.lpu_mbox + + i * sizeof(u32)); + msgp[i] = htonl(r32); + } + + /** + * turn off mailbox interrupt by clearing mailbox status + */ + writel(1, ioc->ioc_regs.lpu_mbox_cmd); + readl(ioc->ioc_regs.lpu_mbox_cmd); +} + +void +bfa_ioc_isr(struct bfa_ioc *ioc, struct bfi_mbmsg *m) +{ + union bfi_ioc_i2h_msg_u *msg; + + msg = (union bfi_ioc_i2h_msg_u *) m; + + bfa_ioc_stats(ioc, ioc_isrs); + + switch (msg->mh.msg_id) { + case BFI_IOC_I2H_HBEAT: + break; + + case BFI_IOC_I2H_READY_EVENT: + bfa_fsm_send_event(ioc, IOC_E_FWREADY); + break; + + case BFI_IOC_I2H_ENABLE_REPLY: + bfa_fsm_send_event(ioc, IOC_E_FWRSP_ENABLE); + break; + + case BFI_IOC_I2H_DISABLE_REPLY: + bfa_fsm_send_event(ioc, IOC_E_FWRSP_DISABLE); + break; + + case BFI_IOC_I2H_GETATTR_REPLY: + bfa_ioc_getattr_reply(ioc); + break; + + default: + BUG_ON(1); + } +} + +/** + * IOC attach time initialization and setup. + * + * @param[in] ioc memory for IOC + * @param[in] bfa driver instance structure + */ +void +bfa_ioc_attach(struct bfa_ioc *ioc, void *bfa, struct bfa_ioc_cbfn *cbfn) +{ + ioc->bfa = bfa; + ioc->cbfn = cbfn; + ioc->fcmode = false; + ioc->pllinit = false; + ioc->dbg_fwsave_once = true; + + bfa_ioc_mbox_attach(ioc); + INIT_LIST_HEAD(&ioc->hb_notify_q); + + bfa_fsm_set_state(ioc, bfa_ioc_sm_reset); +} + +/** + * Driver detach time IOC cleanup. + */ +void +bfa_ioc_detach(struct bfa_ioc *ioc) +{ + bfa_fsm_send_event(ioc, IOC_E_DETACH); +} + +/** + * Setup IOC PCI properties. + * + * @param[in] pcidev PCI device information for this IOC + */ +void +bfa_ioc_pci_init(struct bfa_ioc *ioc, struct bfa_pcidev *pcidev, + enum bfi_mclass mc) +{ + ioc->ioc_mc = mc; + ioc->pcidev = *pcidev; + ioc->ctdev = bfa_asic_id_ct(ioc->pcidev.device_id); + ioc->cna = ioc->ctdev && !ioc->fcmode; + + bfa_ioc_set_ct_hwif(ioc); + + bfa_ioc_map_port(ioc); + bfa_ioc_reg_init(ioc); +} + +/** + * Initialize IOC dma memory + * + * @param[in] dm_kva kernel virtual address of IOC dma memory + * @param[in] dm_pa physical address of IOC dma memory + */ +void +bfa_ioc_mem_claim(struct bfa_ioc *ioc, u8 *dm_kva, u64 dm_pa) +{ + /** + * dma memory for firmware attribute + */ + ioc->attr_dma.kva = dm_kva; + ioc->attr_dma.pa = dm_pa; + ioc->attr = (struct bfi_ioc_attr *) dm_kva; +} + +/** + * Return size of dma memory required. + */ +u32 +bfa_ioc_meminfo(void) +{ + return roundup(sizeof(struct bfi_ioc_attr), BFA_DMA_ALIGN_SZ); +} + +void +bfa_ioc_enable(struct bfa_ioc *ioc) +{ + bfa_ioc_stats(ioc, ioc_enables); + ioc->dbg_fwsave_once = true; + + bfa_fsm_send_event(ioc, IOC_E_ENABLE); +} + +void +bfa_ioc_disable(struct bfa_ioc *ioc) +{ + bfa_ioc_stats(ioc, ioc_disables); + bfa_fsm_send_event(ioc, IOC_E_DISABLE); +} + +u32 +bfa_ioc_smem_pgnum(struct bfa_ioc *ioc, u32 fmaddr) +{ + return PSS_SMEM_PGNUM(ioc->ioc_regs.smem_pg0, fmaddr); +} + +u32 +bfa_ioc_smem_pgoff(struct bfa_ioc *ioc, u32 fmaddr) +{ + return PSS_SMEM_PGOFF(fmaddr); +} + +/** + * Register mailbox message handler functions + * + * @param[in] ioc IOC instance + * @param[in] mcfuncs message class handler functions + */ +void +bfa_ioc_mbox_register(struct bfa_ioc *ioc, bfa_ioc_mbox_mcfunc_t *mcfuncs) +{ + struct bfa_ioc_mbox_mod *mod = &ioc->mbox_mod; + int mc; + + for (mc = 0; mc < BFI_MC_MAX; mc++) + mod->mbhdlr[mc].cbfn = mcfuncs[mc]; +} + +/** + * Register mailbox message handler function, to be called by common modules + */ +void +bfa_ioc_mbox_regisr(struct bfa_ioc *ioc, enum bfi_mclass mc, + bfa_ioc_mbox_mcfunc_t cbfn, void *cbarg) +{ + struct bfa_ioc_mbox_mod *mod = &ioc->mbox_mod; + + mod->mbhdlr[mc].cbfn = cbfn; + mod->mbhdlr[mc].cbarg = cbarg; +} + +/** + * Queue a mailbox command request to firmware. Waits if mailbox is busy. + * Responsibility of caller to serialize + * + * @param[in] ioc IOC instance + * @param[i] cmd Mailbox command + */ +void +bfa_ioc_mbox_queue(struct bfa_ioc *ioc, struct bfa_mbox_cmd *cmd) +{ + struct bfa_ioc_mbox_mod *mod = &ioc->mbox_mod; + u32 stat; + + /** + * If a previous command is pending, queue new command + */ + if (!list_empty(&mod->cmd_q)) { + list_add_tail(&cmd->qe, &mod->cmd_q); + return; + } + + /** + * If mailbox is busy, queue command for poll timer + */ + stat = readl(ioc->ioc_regs.hfn_mbox_cmd); + if (stat) { + list_add_tail(&cmd->qe, &mod->cmd_q); + return; + } + + /** + * mailbox is free -- queue command to firmware + */ + bfa_ioc_mbox_send(ioc, cmd->msg, sizeof(cmd->msg)); +} + +/** + * Handle mailbox interrupts + */ +void +bfa_ioc_mbox_isr(struct bfa_ioc *ioc) +{ + struct bfa_ioc_mbox_mod *mod = &ioc->mbox_mod; + struct bfi_mbmsg m; + int mc; + + bfa_ioc_msgget(ioc, &m); + + /** + * Treat IOC message class as special. + */ + mc = m.mh.msg_class; + if (mc == BFI_MC_IOC) { + bfa_ioc_isr(ioc, &m); + return; + } + + if ((mc > BFI_MC_MAX) || (mod->mbhdlr[mc].cbfn == NULL)) + return; + + mod->mbhdlr[mc].cbfn(mod->mbhdlr[mc].cbarg, &m); +} + +void +bfa_ioc_error_isr(struct bfa_ioc *ioc) +{ + bfa_fsm_send_event(ioc, IOC_E_HWERROR); +} + +void +bfa_ioc_set_fcmode(struct bfa_ioc *ioc) +{ + ioc->fcmode = true; + ioc->port_id = bfa_ioc_pcifn(ioc); +} + +/** + * return true if IOC is disabled + */ +bool +bfa_ioc_is_disabled(struct bfa_ioc *ioc) +{ + return bfa_fsm_cmp_state(ioc, bfa_ioc_sm_disabling) || + bfa_fsm_cmp_state(ioc, bfa_ioc_sm_disabled); +} + +/** + * return true if IOC firmware is different. + */ +bool +bfa_ioc_fw_mismatch(struct bfa_ioc *ioc) +{ + return bfa_fsm_cmp_state(ioc, bfa_ioc_sm_reset) || + bfa_fsm_cmp_state(ioc, bfa_ioc_sm_fwcheck) || + bfa_fsm_cmp_state(ioc, bfa_ioc_sm_mismatch); +} + +#define bfa_ioc_state_disabled(__sm) \ + (((__sm) == BFI_IOC_UNINIT) || \ + ((__sm) == BFI_IOC_INITING) || \ + ((__sm) == BFI_IOC_HWINIT) || \ + ((__sm) == BFI_IOC_DISABLED) || \ + ((__sm) == BFI_IOC_FAIL) || \ + ((__sm) == BFI_IOC_CFG_DISABLED)) + +/** + * Check if adapter is disabled -- both IOCs should be in a disabled + * state. + */ +bool +bfa_ioc_adapter_is_disabled(struct bfa_ioc *ioc) +{ + u32 ioc_state; + void __iomem *rb = ioc->pcidev.pci_bar_kva; + + if (!bfa_fsm_cmp_state(ioc, bfa_ioc_sm_disabled)) + return false; + + ioc_state = readl(rb + BFA_IOC0_STATE_REG); + if (!bfa_ioc_state_disabled(ioc_state)) + return false; + + if (ioc->pcidev.device_id != PCI_DEVICE_ID_BROCADE_FC_8G1P) { + ioc_state = readl(rb + BFA_IOC1_STATE_REG); + if (!bfa_ioc_state_disabled(ioc_state)) + return false; + } + + return true; +} + +/** + * Add to IOC heartbeat failure notification queue. To be used by common + * modules such as cee, port, diag. + */ +void +bfa_ioc_hbfail_register(struct bfa_ioc *ioc, + struct bfa_ioc_hbfail_notify *notify) +{ + list_add_tail(¬ify->qe, &ioc->hb_notify_q); +} + +#define BFA_MFG_NAME "Brocade" +void +bfa_ioc_get_adapter_attr(struct bfa_ioc *ioc, + struct bfa_adapter_attr *ad_attr) +{ + struct bfi_ioc_attr *ioc_attr; + + ioc_attr = ioc->attr; + + bfa_ioc_get_adapter_serial_num(ioc, ad_attr->serial_num); + bfa_ioc_get_adapter_fw_ver(ioc, ad_attr->fw_ver); + bfa_ioc_get_adapter_optrom_ver(ioc, ad_attr->optrom_ver); + bfa_ioc_get_adapter_manufacturer(ioc, ad_attr->manufacturer); + memcpy(&ad_attr->vpd, &ioc_attr->vpd, + sizeof(struct bfa_mfg_vpd)); + + ad_attr->nports = bfa_ioc_get_nports(ioc); + ad_attr->max_speed = bfa_ioc_speed_sup(ioc); + + bfa_ioc_get_adapter_model(ioc, ad_attr->model); + /* For now, model descr uses same model string */ + bfa_ioc_get_adapter_model(ioc, ad_attr->model_descr); + + ad_attr->card_type = ioc_attr->card_type; + ad_attr->is_mezz = bfa_mfg_is_mezz(ioc_attr->card_type); + + if (BFI_ADAPTER_IS_SPECIAL(ioc_attr->adapter_prop)) + ad_attr->prototype = 1; + else + ad_attr->prototype = 0; + + ad_attr->pwwn = bfa_ioc_get_pwwn(ioc); + ad_attr->mac = bfa_ioc_get_mac(ioc); + + ad_attr->pcie_gen = ioc_attr->pcie_gen; + ad_attr->pcie_lanes = ioc_attr->pcie_lanes; + ad_attr->pcie_lanes_orig = ioc_attr->pcie_lanes_orig; + ad_attr->asic_rev = ioc_attr->asic_rev; + + bfa_ioc_get_pci_chip_rev(ioc, ad_attr->hw_ver); + + ad_attr->cna_capable = ioc->cna; + ad_attr->trunk_capable = (ad_attr->nports > 1) && !ioc->cna; +} + +enum bfa_ioc_type +bfa_ioc_get_type(struct bfa_ioc *ioc) +{ + if (!ioc->ctdev || ioc->fcmode) + return BFA_IOC_TYPE_FC; + else if (ioc->ioc_mc == BFI_MC_IOCFC) + return BFA_IOC_TYPE_FCoE; + else if (ioc->ioc_mc == BFI_MC_LL) + return BFA_IOC_TYPE_LL; + else { + BUG_ON(!(ioc->ioc_mc == BFI_MC_LL)); + return BFA_IOC_TYPE_LL; + } +} + +void +bfa_ioc_get_adapter_serial_num(struct bfa_ioc *ioc, char *serial_num) +{ + memset(serial_num, 0, BFA_ADAPTER_SERIAL_NUM_LEN); + memcpy(serial_num, + (void *)ioc->attr->brcd_serialnum, + BFA_ADAPTER_SERIAL_NUM_LEN); +} + +void +bfa_ioc_get_adapter_fw_ver(struct bfa_ioc *ioc, char *fw_ver) +{ + memset(fw_ver, 0, BFA_VERSION_LEN); + memcpy(fw_ver, ioc->attr->fw_version, BFA_VERSION_LEN); +} + +void +bfa_ioc_get_pci_chip_rev(struct bfa_ioc *ioc, char *chip_rev) +{ + BUG_ON(!(chip_rev)); + + memset(chip_rev, 0, BFA_IOC_CHIP_REV_LEN); + + chip_rev[0] = 'R'; + chip_rev[1] = 'e'; + chip_rev[2] = 'v'; + chip_rev[3] = '-'; + chip_rev[4] = ioc->attr->asic_rev; + chip_rev[5] = '\0'; +} + +void +bfa_ioc_get_adapter_optrom_ver(struct bfa_ioc *ioc, char *optrom_ver) +{ + memset(optrom_ver, 0, BFA_VERSION_LEN); + memcpy(optrom_ver, ioc->attr->optrom_version, + BFA_VERSION_LEN); +} + +void +bfa_ioc_get_adapter_manufacturer(struct bfa_ioc *ioc, char *manufacturer) +{ + memset(manufacturer, 0, BFA_ADAPTER_MFG_NAME_LEN); + memcpy(manufacturer, BFA_MFG_NAME, BFA_ADAPTER_MFG_NAME_LEN); +} + +void +bfa_ioc_get_adapter_model(struct bfa_ioc *ioc, char *model) +{ + struct bfi_ioc_attr *ioc_attr; + + BUG_ON(!(model)); + memset(model, 0, BFA_ADAPTER_MODEL_NAME_LEN); + + ioc_attr = ioc->attr; + + /** + * model name + */ + snprintf(model, BFA_ADAPTER_MODEL_NAME_LEN, "%s-%u", + BFA_MFG_NAME, ioc_attr->card_type); +} + +enum bfa_ioc_state +bfa_ioc_get_state(struct bfa_ioc *ioc) +{ + return bfa_sm_to_state(ioc_sm_table, ioc->fsm); +} + +void +bfa_ioc_get_attr(struct bfa_ioc *ioc, struct bfa_ioc_attr *ioc_attr) +{ + memset((void *)ioc_attr, 0, sizeof(struct bfa_ioc_attr)); + + ioc_attr->state = bfa_ioc_get_state(ioc); + ioc_attr->port_id = ioc->port_id; + + ioc_attr->ioc_type = bfa_ioc_get_type(ioc); + + bfa_ioc_get_adapter_attr(ioc, &ioc_attr->adapter_attr); + + ioc_attr->pci_attr.device_id = ioc->pcidev.device_id; + ioc_attr->pci_attr.pcifn = ioc->pcidev.pci_func; + bfa_ioc_get_pci_chip_rev(ioc, ioc_attr->pci_attr.chip_rev); +} + +/** + * WWN public + */ +u64 +bfa_ioc_get_pwwn(struct bfa_ioc *ioc) +{ + return ioc->attr->pwwn; +} + +u64 +bfa_ioc_get_nwwn(struct bfa_ioc *ioc) +{ + return ioc->attr->nwwn; +} + +u64 +bfa_ioc_get_adid(struct bfa_ioc *ioc) +{ + return ioc->attr->mfg_pwwn; +} + +mac_t +bfa_ioc_get_mac(struct bfa_ioc *ioc) +{ + /* + * Currently mfg mac is used as FCoE enode mac (not configured by PBC) + */ + if (bfa_ioc_get_type(ioc) == BFA_IOC_TYPE_FCoE) + return bfa_ioc_get_mfg_mac(ioc); + else + return ioc->attr->mac; +} + +u64 +bfa_ioc_get_mfg_pwwn(struct bfa_ioc *ioc) +{ + return ioc->attr->mfg_pwwn; +} + +u64 +bfa_ioc_get_mfg_nwwn(struct bfa_ioc *ioc) +{ + return ioc->attr->mfg_nwwn; +} + +mac_t +bfa_ioc_get_mfg_mac(struct bfa_ioc *ioc) +{ + mac_t m; + + m = ioc->attr->mfg_mac; + if (bfa_mfg_is_old_wwn_mac_model(ioc->attr->card_type)) + m.mac[MAC_ADDRLEN - 1] += bfa_ioc_pcifn(ioc); + else + bfa_mfg_increment_wwn_mac(&(m.mac[MAC_ADDRLEN-3]), + bfa_ioc_pcifn(ioc)); + + return m; +} + +bool +bfa_ioc_get_fcmode(struct bfa_ioc *ioc) +{ + return ioc->fcmode || !bfa_asic_id_ct(ioc->pcidev.device_id); +} + +/** + * Firmware failure detected. Start recovery actions. + */ +static void +bfa_ioc_recover(struct bfa_ioc *ioc) +{ + bfa_ioc_stats(ioc, ioc_hbfails); + bfa_fsm_send_event(ioc, IOC_E_HBFAIL); +} + +static void +bfa_ioc_check_attr_wwns(struct bfa_ioc *ioc) +{ + if (bfa_ioc_get_type(ioc) == BFA_IOC_TYPE_LL) + return; + +} diff --git a/drivers/net/bna/bfa_ioc.h b/drivers/net/bna/bfa_ioc.h new file mode 100644 index 000000000000..2e5c0adef899 --- /dev/null +++ b/drivers/net/bna/bfa_ioc.h @@ -0,0 +1,343 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +#ifndef __BFA_IOC_H__ +#define __BFA_IOC_H__ + +#include "bfa_sm.h" +#include "bfi.h" +#include "cna.h" + +#define BFA_IOC_TOV 3000 /* msecs */ +#define BFA_IOC_HWSEM_TOV 500 /* msecs */ +#define BFA_IOC_HB_TOV 500 /* msecs */ +#define BFA_IOC_HWINIT_MAX 2 +#define BFA_IOC_TOV_RECOVER BFA_IOC_HB_TOV + +/** + * Generic Scatter Gather Element used by driver + */ +struct bfa_sge { + u32 sg_len; + void *sg_addr; +}; + +/** + * PCI device information required by IOC + */ +struct bfa_pcidev { + int pci_slot; + u8 pci_func; + u16 device_id; + void __iomem *pci_bar_kva; +}; + +/** + * Structure used to remember the DMA-able memory block's KVA and Physical + * Address + */ +struct bfa_dma { + void *kva; /* ! Kernel virtual address */ + u64 pa; /* ! Physical address */ +}; + +#define BFA_DMA_ALIGN_SZ 256 + +/** + * smem size for Crossbow and Catapult + */ +#define BFI_SMEM_CB_SIZE 0x200000U /* ! 2MB for crossbow */ +#define BFI_SMEM_CT_SIZE 0x280000U /* ! 2.5MB for catapult */ + +/** + * @brief BFA dma address assignment macro + */ +#define bfa_dma_addr_set(dma_addr, pa) \ + __bfa_dma_addr_set(&dma_addr, (u64)pa) + +static inline void +__bfa_dma_addr_set(union bfi_addr_u *dma_addr, u64 pa) +{ + dma_addr->a32.addr_lo = (u32) pa; + dma_addr->a32.addr_hi = (u32) (upper_32_bits(pa)); +} + +/** + * @brief BFA dma address assignment macro. (big endian format) + */ +#define bfa_dma_be_addr_set(dma_addr, pa) \ + __bfa_dma_be_addr_set(&dma_addr, (u64)pa) +static inline void +__bfa_dma_be_addr_set(union bfi_addr_u *dma_addr, u64 pa) +{ + dma_addr->a32.addr_lo = (u32) htonl(pa); + dma_addr->a32.addr_hi = (u32) htonl(upper_32_bits(pa)); +} + +struct bfa_ioc_regs { + void __iomem *hfn_mbox_cmd; + void __iomem *hfn_mbox; + void __iomem *lpu_mbox_cmd; + void __iomem *lpu_mbox; + void __iomem *pss_ctl_reg; + void __iomem *pss_err_status_reg; + void __iomem *app_pll_fast_ctl_reg; + void __iomem *app_pll_slow_ctl_reg; + void __iomem *ioc_sem_reg; + void __iomem *ioc_usage_sem_reg; + void __iomem *ioc_init_sem_reg; + void __iomem *ioc_usage_reg; + void __iomem *host_page_num_fn; + void __iomem *heartbeat; + void __iomem *ioc_fwstate; + void __iomem *ll_halt; + void __iomem *err_set; + void __iomem *shirq_isr_next; + void __iomem *shirq_msk_next; + void __iomem *smem_page_start; + u32 smem_pg0; +}; + +/** + * IOC Mailbox structures + */ +struct bfa_mbox_cmd { + struct list_head qe; + u32 msg[BFI_IOC_MSGSZ]; +}; + +/** + * IOC mailbox module + */ +typedef void (*bfa_ioc_mbox_mcfunc_t)(void *cbarg, struct bfi_mbmsg *m); +struct bfa_ioc_mbox_mod { + struct list_head cmd_q; /*!< pending mbox queue */ + int nmclass; /*!< number of handlers */ + struct { + bfa_ioc_mbox_mcfunc_t cbfn; /*!< message handlers */ + void *cbarg; + } mbhdlr[BFI_MC_MAX]; +}; + +/** + * IOC callback function interfaces + */ +typedef void (*bfa_ioc_enable_cbfn_t)(void *bfa, enum bfa_status status); +typedef void (*bfa_ioc_disable_cbfn_t)(void *bfa); +typedef void (*bfa_ioc_hbfail_cbfn_t)(void *bfa); +typedef void (*bfa_ioc_reset_cbfn_t)(void *bfa); +struct bfa_ioc_cbfn { + bfa_ioc_enable_cbfn_t enable_cbfn; + bfa_ioc_disable_cbfn_t disable_cbfn; + bfa_ioc_hbfail_cbfn_t hbfail_cbfn; + bfa_ioc_reset_cbfn_t reset_cbfn; +}; + +/** + * Heartbeat failure notification queue element. + */ +struct bfa_ioc_hbfail_notify { + struct list_head qe; + bfa_ioc_hbfail_cbfn_t cbfn; + void *cbarg; +}; + +/** + * Initialize a heartbeat failure notification structure + */ +#define bfa_ioc_hbfail_init(__notify, __cbfn, __cbarg) do { \ + (__notify)->cbfn = (__cbfn); \ + (__notify)->cbarg = (__cbarg); \ +} while (0) + +struct bfa_ioc { + bfa_fsm_t fsm; + struct bfa *bfa; + struct bfa_pcidev pcidev; + struct bfa_timer_mod *timer_mod; + struct timer_list ioc_timer; + struct timer_list sem_timer; + struct timer_list hb_timer; + u32 hb_count; + u32 retry_count; + struct list_head hb_notify_q; + void *dbg_fwsave; + int dbg_fwsave_len; + bool dbg_fwsave_once; + enum bfi_mclass ioc_mc; + struct bfa_ioc_regs ioc_regs; + struct bfa_ioc_drv_stats stats; + bool auto_recover; + bool fcmode; + bool ctdev; + bool cna; + bool pllinit; + bool stats_busy; /*!< outstanding stats */ + u8 port_id; + + struct bfa_dma attr_dma; + struct bfi_ioc_attr *attr; + struct bfa_ioc_cbfn *cbfn; + struct bfa_ioc_mbox_mod mbox_mod; + struct bfa_ioc_hwif *ioc_hwif; +}; + +struct bfa_ioc_hwif { + enum bfa_status (*ioc_pll_init) (void __iomem *rb, bool fcmode); + bool (*ioc_firmware_lock) (struct bfa_ioc *ioc); + void (*ioc_firmware_unlock) (struct bfa_ioc *ioc); + void (*ioc_reg_init) (struct bfa_ioc *ioc); + void (*ioc_map_port) (struct bfa_ioc *ioc); + void (*ioc_isr_mode_set) (struct bfa_ioc *ioc, + bool msix); + void (*ioc_notify_hbfail) (struct bfa_ioc *ioc); + void (*ioc_ownership_reset) (struct bfa_ioc *ioc); +}; + +#define bfa_ioc_pcifn(__ioc) ((__ioc)->pcidev.pci_func) +#define bfa_ioc_devid(__ioc) ((__ioc)->pcidev.device_id) +#define bfa_ioc_bar0(__ioc) ((__ioc)->pcidev.pci_bar_kva) +#define bfa_ioc_portid(__ioc) ((__ioc)->port_id) +#define bfa_ioc_fetch_stats(__ioc, __stats) \ + (((__stats)->drv_stats) = (__ioc)->stats) +#define bfa_ioc_clr_stats(__ioc) \ + memset(&(__ioc)->stats, 0, sizeof((__ioc)->stats)) +#define bfa_ioc_maxfrsize(__ioc) ((__ioc)->attr->maxfrsize) +#define bfa_ioc_rx_bbcredit(__ioc) ((__ioc)->attr->rx_bbcredit) +#define bfa_ioc_speed_sup(__ioc) \ + BFI_ADAPTER_GETP(SPEED, (__ioc)->attr->adapter_prop) +#define bfa_ioc_get_nports(__ioc) \ + BFI_ADAPTER_GETP(NPORTS, (__ioc)->attr->adapter_prop) + +#define bfa_ioc_stats(_ioc, _stats) ((_ioc)->stats._stats++) +#define BFA_IOC_FWIMG_MINSZ (16 * 1024) +#define BFA_IOC_FWIMG_TYPE(__ioc) \ + (((__ioc)->ctdev) ? \ + (((__ioc)->fcmode) ? BFI_IMAGE_CT_FC : BFI_IMAGE_CT_CNA) : \ + BFI_IMAGE_CB_FC) +#define BFA_IOC_FW_SMEM_SIZE(__ioc) \ + (((__ioc)->ctdev) ? BFI_SMEM_CT_SIZE : BFI_SMEM_CB_SIZE) +#define BFA_IOC_FLASH_CHUNK_NO(off) (off / BFI_FLASH_CHUNK_SZ_WORDS) +#define BFA_IOC_FLASH_OFFSET_IN_CHUNK(off) (off % BFI_FLASH_CHUNK_SZ_WORDS) +#define BFA_IOC_FLASH_CHUNK_ADDR(chunkno) (chunkno * BFI_FLASH_CHUNK_SZ_WORDS) + +/** + * IOC mailbox interface + */ +void bfa_ioc_mbox_queue(struct bfa_ioc *ioc, struct bfa_mbox_cmd *cmd); +void bfa_ioc_mbox_register(struct bfa_ioc *ioc, + bfa_ioc_mbox_mcfunc_t *mcfuncs); +void bfa_ioc_mbox_isr(struct bfa_ioc *ioc); +void bfa_ioc_mbox_send(struct bfa_ioc *ioc, void *ioc_msg, int len); +void bfa_ioc_msgget(struct bfa_ioc *ioc, void *mbmsg); +void bfa_ioc_mbox_regisr(struct bfa_ioc *ioc, enum bfi_mclass mc, + bfa_ioc_mbox_mcfunc_t cbfn, void *cbarg); + +/** + * IOC interfaces + */ + +#define bfa_ioc_pll_init_asic(__ioc) \ + ((__ioc)->ioc_hwif->ioc_pll_init((__ioc)->pcidev.pci_bar_kva, \ + (__ioc)->fcmode)) + +enum bfa_status bfa_ioc_pll_init(struct bfa_ioc *ioc); +enum bfa_status bfa_ioc_cb_pll_init(void __iomem *rb, bool fcmode); +enum bfa_status bfa_ioc_ct_pll_init(void __iomem *rb, bool fcmode); + +#define bfa_ioc_isr_mode_set(__ioc, __msix) \ + ((__ioc)->ioc_hwif->ioc_isr_mode_set(__ioc, __msix)) +#define bfa_ioc_ownership_reset(__ioc) \ + ((__ioc)->ioc_hwif->ioc_ownership_reset(__ioc)) + +void bfa_ioc_set_ct_hwif(struct bfa_ioc *ioc); + +void bfa_ioc_attach(struct bfa_ioc *ioc, void *bfa, + struct bfa_ioc_cbfn *cbfn); +void bfa_ioc_auto_recover(bool auto_recover); +void bfa_ioc_detach(struct bfa_ioc *ioc); +void bfa_ioc_pci_init(struct bfa_ioc *ioc, struct bfa_pcidev *pcidev, + enum bfi_mclass mc); +u32 bfa_ioc_meminfo(void); +void bfa_ioc_mem_claim(struct bfa_ioc *ioc, u8 *dm_kva, u64 dm_pa); +void bfa_ioc_enable(struct bfa_ioc *ioc); +void bfa_ioc_disable(struct bfa_ioc *ioc); +bool bfa_ioc_intx_claim(struct bfa_ioc *ioc); + +void bfa_ioc_boot(struct bfa_ioc *ioc, u32 boot_type, + u32 boot_param); +void bfa_ioc_isr(struct bfa_ioc *ioc, struct bfi_mbmsg *msg); +void bfa_ioc_error_isr(struct bfa_ioc *ioc); +bool bfa_ioc_is_operational(struct bfa_ioc *ioc); +bool bfa_ioc_is_initialized(struct bfa_ioc *ioc); +bool bfa_ioc_is_disabled(struct bfa_ioc *ioc); +bool bfa_ioc_fw_mismatch(struct bfa_ioc *ioc); +bool bfa_ioc_adapter_is_disabled(struct bfa_ioc *ioc); +void bfa_ioc_cfg_complete(struct bfa_ioc *ioc); +enum bfa_ioc_type bfa_ioc_get_type(struct bfa_ioc *ioc); +void bfa_ioc_get_adapter_serial_num(struct bfa_ioc *ioc, char *serial_num); +void bfa_ioc_get_adapter_fw_ver(struct bfa_ioc *ioc, char *fw_ver); +void bfa_ioc_get_adapter_optrom_ver(struct bfa_ioc *ioc, char *optrom_ver); +void bfa_ioc_get_adapter_model(struct bfa_ioc *ioc, char *model); +void bfa_ioc_get_adapter_manufacturer(struct bfa_ioc *ioc, + char *manufacturer); +void bfa_ioc_get_pci_chip_rev(struct bfa_ioc *ioc, char *chip_rev); +enum bfa_ioc_state bfa_ioc_get_state(struct bfa_ioc *ioc); + +void bfa_ioc_get_attr(struct bfa_ioc *ioc, struct bfa_ioc_attr *ioc_attr); +void bfa_ioc_get_adapter_attr(struct bfa_ioc *ioc, + struct bfa_adapter_attr *ad_attr); +u32 bfa_ioc_smem_pgnum(struct bfa_ioc *ioc, u32 fmaddr); +u32 bfa_ioc_smem_pgoff(struct bfa_ioc *ioc, u32 fmaddr); +void bfa_ioc_set_fcmode(struct bfa_ioc *ioc); +bool bfa_ioc_get_fcmode(struct bfa_ioc *ioc); +void bfa_ioc_hbfail_register(struct bfa_ioc *ioc, + struct bfa_ioc_hbfail_notify *notify); +bool bfa_ioc_sem_get(void __iomem *sem_reg); +void bfa_ioc_sem_release(void __iomem *sem_reg); +void bfa_ioc_hw_sem_release(struct bfa_ioc *ioc); +void bfa_ioc_fwver_get(struct bfa_ioc *ioc, + struct bfi_ioc_image_hdr *fwhdr); +bool bfa_ioc_fwver_cmp(struct bfa_ioc *ioc, + struct bfi_ioc_image_hdr *fwhdr); + +/* + * Timeout APIs + */ +void bfa_ioc_timeout(void *ioc); +void bfa_ioc_hb_check(void *ioc); +void bfa_ioc_sem_timeout(void *ioc); + +/* + * bfa mfg wwn API functions + */ +u64 bfa_ioc_get_pwwn(struct bfa_ioc *ioc); +u64 bfa_ioc_get_nwwn(struct bfa_ioc *ioc); +mac_t bfa_ioc_get_mac(struct bfa_ioc *ioc); +u64 bfa_ioc_get_mfg_pwwn(struct bfa_ioc *ioc); +u64 bfa_ioc_get_mfg_nwwn(struct bfa_ioc *ioc); +mac_t bfa_ioc_get_mfg_mac(struct bfa_ioc *ioc); +u64 bfa_ioc_get_adid(struct bfa_ioc *ioc); + +/* + * F/W Image Size & Chunk + */ +u32 *bfa_cb_image_get_chunk(int type, u32 off); +u32 bfa_cb_image_get_size(int type); + +#endif /* __BFA_IOC_H__ */ diff --git a/drivers/net/bna/bfa_ioc_ct.c b/drivers/net/bna/bfa_ioc_ct.c new file mode 100644 index 000000000000..870046e32c8d --- /dev/null +++ b/drivers/net/bna/bfa_ioc_ct.c @@ -0,0 +1,391 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +#include "bfa_ioc.h" +#include "cna.h" +#include "bfi.h" +#include "bfi_ctreg.h" +#include "bfa_defs.h" + +/* + * forward declarations + */ +static bool bfa_ioc_ct_firmware_lock(struct bfa_ioc *ioc); +static void bfa_ioc_ct_firmware_unlock(struct bfa_ioc *ioc); +static void bfa_ioc_ct_reg_init(struct bfa_ioc *ioc); +static void bfa_ioc_ct_map_port(struct bfa_ioc *ioc); +static void bfa_ioc_ct_isr_mode_set(struct bfa_ioc *ioc, bool msix); +static void bfa_ioc_ct_notify_hbfail(struct bfa_ioc *ioc); +static void bfa_ioc_ct_ownership_reset(struct bfa_ioc *ioc); + +struct bfa_ioc_hwif hwif_ct; + +/** + * Called from bfa_ioc_attach() to map asic specific calls. + */ +void +bfa_ioc_set_ct_hwif(struct bfa_ioc *ioc) +{ + hwif_ct.ioc_pll_init = bfa_ioc_ct_pll_init; + hwif_ct.ioc_firmware_lock = bfa_ioc_ct_firmware_lock; + hwif_ct.ioc_firmware_unlock = bfa_ioc_ct_firmware_unlock; + hwif_ct.ioc_reg_init = bfa_ioc_ct_reg_init; + hwif_ct.ioc_map_port = bfa_ioc_ct_map_port; + hwif_ct.ioc_isr_mode_set = bfa_ioc_ct_isr_mode_set; + hwif_ct.ioc_notify_hbfail = bfa_ioc_ct_notify_hbfail; + hwif_ct.ioc_ownership_reset = bfa_ioc_ct_ownership_reset; + + ioc->ioc_hwif = &hwif_ct; +} + +/** + * Return true if firmware of current driver matches the running firmware. + */ +static bool +bfa_ioc_ct_firmware_lock(struct bfa_ioc *ioc) +{ + enum bfi_ioc_state ioc_fwstate; + u32 usecnt; + struct bfi_ioc_image_hdr fwhdr; + + /** + * Firmware match check is relevant only for CNA. + */ + if (!ioc->cna) + return true; + + /** + * If bios boot (flash based) -- do not increment usage count + */ + if (bfa_cb_image_get_size(BFA_IOC_FWIMG_TYPE(ioc)) < + BFA_IOC_FWIMG_MINSZ) + return true; + + bfa_ioc_sem_get(ioc->ioc_regs.ioc_usage_sem_reg); + usecnt = readl(ioc->ioc_regs.ioc_usage_reg); + + /** + * If usage count is 0, always return TRUE. + */ + if (usecnt == 0) { + writel(1, ioc->ioc_regs.ioc_usage_reg); + bfa_ioc_sem_release(ioc->ioc_regs.ioc_usage_sem_reg); + return true; + } + + ioc_fwstate = readl(ioc->ioc_regs.ioc_fwstate); + + /** + * Use count cannot be non-zero and chip in uninitialized state. + */ + BUG_ON(!(ioc_fwstate != BFI_IOC_UNINIT)); + + /** + * Check if another driver with a different firmware is active + */ + bfa_ioc_fwver_get(ioc, &fwhdr); + if (!bfa_ioc_fwver_cmp(ioc, &fwhdr)) { + bfa_ioc_sem_release(ioc->ioc_regs.ioc_usage_sem_reg); + return false; + } + + /** + * Same firmware version. Increment the reference count. + */ + usecnt++; + writel(usecnt, ioc->ioc_regs.ioc_usage_reg); + bfa_ioc_sem_release(ioc->ioc_regs.ioc_usage_sem_reg); + return true; +} + +static void +bfa_ioc_ct_firmware_unlock(struct bfa_ioc *ioc) +{ + u32 usecnt; + + /** + * Firmware lock is relevant only for CNA. + */ + if (!ioc->cna) + return; + + /** + * If bios boot (flash based) -- do not decrement usage count + */ + if (bfa_cb_image_get_size(BFA_IOC_FWIMG_TYPE(ioc)) < + BFA_IOC_FWIMG_MINSZ) + return; + + /** + * decrement usage count + */ + bfa_ioc_sem_get(ioc->ioc_regs.ioc_usage_sem_reg); + usecnt = readl(ioc->ioc_regs.ioc_usage_reg); + BUG_ON(!(usecnt > 0)); + + usecnt--; + writel(usecnt, ioc->ioc_regs.ioc_usage_reg); + + bfa_ioc_sem_release(ioc->ioc_regs.ioc_usage_sem_reg); +} + +/** + * Notify other functions on HB failure. + */ +static void +bfa_ioc_ct_notify_hbfail(struct bfa_ioc *ioc) +{ + if (ioc->cna) { + writel(__FW_INIT_HALT_P, ioc->ioc_regs.ll_halt); + /* Wait for halt to take effect */ + readl(ioc->ioc_regs.ll_halt); + } else { + writel(__PSS_ERR_STATUS_SET, ioc->ioc_regs.err_set); + readl(ioc->ioc_regs.err_set); + } +} + +/** + * Host to LPU mailbox message addresses + */ +static struct { u32 hfn_mbox, lpu_mbox, hfn_pgn; } iocreg_fnreg[] = { + { HOSTFN0_LPU_MBOX0_0, LPU_HOSTFN0_MBOX0_0, HOST_PAGE_NUM_FN0 }, + { HOSTFN1_LPU_MBOX0_8, LPU_HOSTFN1_MBOX0_8, HOST_PAGE_NUM_FN1 }, + { HOSTFN2_LPU_MBOX0_0, LPU_HOSTFN2_MBOX0_0, HOST_PAGE_NUM_FN2 }, + { HOSTFN3_LPU_MBOX0_8, LPU_HOSTFN3_MBOX0_8, HOST_PAGE_NUM_FN3 } +}; + +/** + * Host <-> LPU mailbox command/status registers - port 0 + */ +static struct { u32 hfn, lpu; } iocreg_mbcmd_p0[] = { + { HOSTFN0_LPU0_MBOX0_CMD_STAT, LPU0_HOSTFN0_MBOX0_CMD_STAT }, + { HOSTFN1_LPU0_MBOX0_CMD_STAT, LPU0_HOSTFN1_MBOX0_CMD_STAT }, + { HOSTFN2_LPU0_MBOX0_CMD_STAT, LPU0_HOSTFN2_MBOX0_CMD_STAT }, + { HOSTFN3_LPU0_MBOX0_CMD_STAT, LPU0_HOSTFN3_MBOX0_CMD_STAT } +}; + +/** + * Host <-> LPU mailbox command/status registers - port 1 + */ +static struct { u32 hfn, lpu; } iocreg_mbcmd_p1[] = { + { HOSTFN0_LPU1_MBOX0_CMD_STAT, LPU1_HOSTFN0_MBOX0_CMD_STAT }, + { HOSTFN1_LPU1_MBOX0_CMD_STAT, LPU1_HOSTFN1_MBOX0_CMD_STAT }, + { HOSTFN2_LPU1_MBOX0_CMD_STAT, LPU1_HOSTFN2_MBOX0_CMD_STAT }, + { HOSTFN3_LPU1_MBOX0_CMD_STAT, LPU1_HOSTFN3_MBOX0_CMD_STAT } +}; + +static void +bfa_ioc_ct_reg_init(struct bfa_ioc *ioc) +{ + void __iomem *rb; + int pcifn = bfa_ioc_pcifn(ioc); + + rb = bfa_ioc_bar0(ioc); + + ioc->ioc_regs.hfn_mbox = rb + iocreg_fnreg[pcifn].hfn_mbox; + ioc->ioc_regs.lpu_mbox = rb + iocreg_fnreg[pcifn].lpu_mbox; + ioc->ioc_regs.host_page_num_fn = rb + iocreg_fnreg[pcifn].hfn_pgn; + + if (ioc->port_id == 0) { + ioc->ioc_regs.heartbeat = rb + BFA_IOC0_HBEAT_REG; + ioc->ioc_regs.ioc_fwstate = rb + BFA_IOC0_STATE_REG; + ioc->ioc_regs.hfn_mbox_cmd = rb + iocreg_mbcmd_p0[pcifn].hfn; + ioc->ioc_regs.lpu_mbox_cmd = rb + iocreg_mbcmd_p0[pcifn].lpu; + ioc->ioc_regs.ll_halt = rb + FW_INIT_HALT_P0; + } else { + ioc->ioc_regs.heartbeat = (rb + BFA_IOC1_HBEAT_REG); + ioc->ioc_regs.ioc_fwstate = (rb + BFA_IOC1_STATE_REG); + ioc->ioc_regs.hfn_mbox_cmd = rb + iocreg_mbcmd_p1[pcifn].hfn; + ioc->ioc_regs.lpu_mbox_cmd = rb + iocreg_mbcmd_p1[pcifn].lpu; + ioc->ioc_regs.ll_halt = rb + FW_INIT_HALT_P1; + } + + /* + * PSS control registers + */ + ioc->ioc_regs.pss_ctl_reg = (rb + PSS_CTL_REG); + ioc->ioc_regs.pss_err_status_reg = (rb + PSS_ERR_STATUS_REG); + ioc->ioc_regs.app_pll_fast_ctl_reg = (rb + APP_PLL_425_CTL_REG); + ioc->ioc_regs.app_pll_slow_ctl_reg = (rb + APP_PLL_312_CTL_REG); + + /* + * IOC semaphore registers and serialization + */ + ioc->ioc_regs.ioc_sem_reg = (rb + HOST_SEM0_REG); + ioc->ioc_regs.ioc_usage_sem_reg = (rb + HOST_SEM1_REG); + ioc->ioc_regs.ioc_init_sem_reg = (rb + HOST_SEM2_REG); + ioc->ioc_regs.ioc_usage_reg = (rb + BFA_FW_USE_COUNT); + + /** + * sram memory access + */ + ioc->ioc_regs.smem_page_start = (rb + PSS_SMEM_PAGE_START); + ioc->ioc_regs.smem_pg0 = BFI_IOC_SMEM_PG0_CT; + + /* + * err set reg : for notification of hb failure in fcmode + */ + ioc->ioc_regs.err_set = (rb + ERR_SET_REG); +} + +/** + * Initialize IOC to port mapping. + */ + +#define FNC_PERS_FN_SHIFT(__fn) ((__fn) * 8) +static void +bfa_ioc_ct_map_port(struct bfa_ioc *ioc) +{ + void __iomem *rb = ioc->pcidev.pci_bar_kva; + u32 r32; + + /** + * For catapult, base port id on personality register and IOC type + */ + r32 = readl(rb + FNC_PERS_REG); + r32 >>= FNC_PERS_FN_SHIFT(bfa_ioc_pcifn(ioc)); + ioc->port_id = (r32 & __F0_PORT_MAP_MK) >> __F0_PORT_MAP_SH; + +} + +/** + * Set interrupt mode for a function: INTX or MSIX + */ +static void +bfa_ioc_ct_isr_mode_set(struct bfa_ioc *ioc, bool msix) +{ + void __iomem *rb = ioc->pcidev.pci_bar_kva; + u32 r32, mode; + + r32 = readl(rb + FNC_PERS_REG); + + mode = (r32 >> FNC_PERS_FN_SHIFT(bfa_ioc_pcifn(ioc))) & + __F0_INTX_STATUS; + + /** + * If already in desired mode, do not change anything + */ + if (!msix && mode) + return; + + if (msix) + mode = __F0_INTX_STATUS_MSIX; + else + mode = __F0_INTX_STATUS_INTA; + + r32 &= ~(__F0_INTX_STATUS << FNC_PERS_FN_SHIFT(bfa_ioc_pcifn(ioc))); + r32 |= (mode << FNC_PERS_FN_SHIFT(bfa_ioc_pcifn(ioc))); + + writel(r32, rb + FNC_PERS_REG); +} + +/** + * Cleanup hw semaphore and usecnt registers + */ +static void +bfa_ioc_ct_ownership_reset(struct bfa_ioc *ioc) +{ + if (ioc->cna) { + bfa_ioc_sem_get(ioc->ioc_regs.ioc_usage_sem_reg); + writel(0, ioc->ioc_regs.ioc_usage_reg); + bfa_ioc_sem_release(ioc->ioc_regs.ioc_usage_sem_reg); + } + + /* + * Read the hw sem reg to make sure that it is locked + * before we clear it. If it is not locked, writing 1 + * will lock it instead of clearing it. + */ + readl(ioc->ioc_regs.ioc_sem_reg); + bfa_ioc_hw_sem_release(ioc); +} + +enum bfa_status +bfa_ioc_ct_pll_init(void __iomem *rb, bool fcmode) +{ + u32 pll_sclk, pll_fclk, r32; + + pll_sclk = __APP_PLL_312_LRESETN | __APP_PLL_312_ENARST | + __APP_PLL_312_RSEL200500 | __APP_PLL_312_P0_1(3U) | + __APP_PLL_312_JITLMT0_1(3U) | + __APP_PLL_312_CNTLMT0_1(1U); + pll_fclk = __APP_PLL_425_LRESETN | __APP_PLL_425_ENARST | + __APP_PLL_425_RSEL200500 | __APP_PLL_425_P0_1(3U) | + __APP_PLL_425_JITLMT0_1(3U) | + __APP_PLL_425_CNTLMT0_1(1U); + if (fcmode) { + writel(0, (rb + OP_MODE)); + writel(__APP_EMS_CMLCKSEL | + __APP_EMS_REFCKBUFEN2 | + __APP_EMS_CHANNEL_SEL, + (rb + ETH_MAC_SER_REG)); + } else { + writel(__GLOBAL_FCOE_MODE, (rb + OP_MODE)); + writel(__APP_EMS_REFCKBUFEN1, + (rb + ETH_MAC_SER_REG)); + } + writel(BFI_IOC_UNINIT, (rb + BFA_IOC0_STATE_REG)); + writel(BFI_IOC_UNINIT, (rb + BFA_IOC1_STATE_REG)); + writel(0xffffffffU, (rb + HOSTFN0_INT_MSK)); + writel(0xffffffffU, (rb + HOSTFN1_INT_MSK)); + writel(0xffffffffU, (rb + HOSTFN0_INT_STATUS)); + writel(0xffffffffU, (rb + HOSTFN1_INT_STATUS)); + writel(0xffffffffU, (rb + HOSTFN0_INT_MSK)); + writel(0xffffffffU, (rb + HOSTFN1_INT_MSK)); + writel(pll_sclk | + __APP_PLL_312_LOGIC_SOFT_RESET, + rb + APP_PLL_312_CTL_REG); + writel(pll_fclk | + __APP_PLL_425_LOGIC_SOFT_RESET, + rb + APP_PLL_425_CTL_REG); + writel(pll_sclk | + __APP_PLL_312_LOGIC_SOFT_RESET | __APP_PLL_312_ENABLE, + rb + APP_PLL_312_CTL_REG); + writel(pll_fclk | + __APP_PLL_425_LOGIC_SOFT_RESET | __APP_PLL_425_ENABLE, + rb + APP_PLL_425_CTL_REG); + readl(rb + HOSTFN0_INT_MSK); + udelay(2000); + writel(0xffffffffU, (rb + HOSTFN0_INT_STATUS)); + writel(0xffffffffU, (rb + HOSTFN1_INT_STATUS)); + writel(pll_sclk | + __APP_PLL_312_ENABLE, + rb + APP_PLL_312_CTL_REG); + writel(pll_fclk | + __APP_PLL_425_ENABLE, + rb + APP_PLL_425_CTL_REG); + if (!fcmode) { + writel(__PMM_1T_RESET_P, (rb + PMM_1T_RESET_REG_P0)); + writel(__PMM_1T_RESET_P, (rb + PMM_1T_RESET_REG_P1)); + } + r32 = readl((rb + PSS_CTL_REG)); + r32 &= ~__PSS_LMEM_RESET; + writel(r32, (rb + PSS_CTL_REG)); + udelay(1000); + if (!fcmode) { + writel(0, (rb + PMM_1T_RESET_REG_P0)); + writel(0, (rb + PMM_1T_RESET_REG_P1)); + } + + writel(__EDRAM_BISTR_START, (rb + MBIST_CTL_REG)); + udelay(1000); + r32 = readl((rb + MBIST_STAT_REG)); + writel(0, (rb + MBIST_CTL_REG)); + return BFA_STATUS_OK; +} diff --git a/drivers/net/bna/bfa_sm.h b/drivers/net/bna/bfa_sm.h new file mode 100644 index 000000000000..1d3d975d6f68 --- /dev/null +++ b/drivers/net/bna/bfa_sm.h @@ -0,0 +1,88 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +/** + * @file bfasm.h State machine defines + */ + +#ifndef __BFA_SM_H__ +#define __BFA_SM_H__ + +#include "cna.h" + +typedef void (*bfa_sm_t)(void *sm, int event); + +/** + * oc - object class eg. bfa_ioc + * st - state, eg. reset + * otype - object type, eg. struct bfa_ioc + * etype - object type, eg. enum ioc_event + */ +#define bfa_sm_state_decl(oc, st, otype, etype) \ + static void oc ## _sm_ ## st(otype * fsm, etype event) + +#define bfa_sm_set_state(_sm, _state) ((_sm)->sm = (bfa_sm_t)(_state)) +#define bfa_sm_send_event(_sm, _event) ((_sm)->sm((_sm), (_event))) +#define bfa_sm_get_state(_sm) ((_sm)->sm) +#define bfa_sm_cmp_state(_sm, _state) ((_sm)->sm == (bfa_sm_t)(_state)) + +/** + * For converting from state machine function to state encoding. + */ +struct bfa_sm_table { + bfa_sm_t sm; /*!< state machine function */ + int state; /*!< state machine encoding */ + char *name; /*!< state name for display */ +}; +#define BFA_SM(_sm) ((bfa_sm_t)(_sm)) + +/** + * State machine with entry actions. + */ +typedef void (*bfa_fsm_t)(void *fsm, int event); + +/** + * oc - object class eg. bfa_ioc + * st - state, eg. reset + * otype - object type, eg. struct bfa_ioc + * etype - object type, eg. enum ioc_event + */ +#define bfa_fsm_state_decl(oc, st, otype, etype) \ + static void oc ## _sm_ ## st(otype * fsm, etype event); \ + static void oc ## _sm_ ## st ## _entry(otype * fsm) + +#define bfa_fsm_set_state(_fsm, _state) do { \ + (_fsm)->fsm = (bfa_fsm_t)(_state); \ + _state ## _entry(_fsm); \ +} while (0) + +#define bfa_fsm_send_event(_fsm, _event) ((_fsm)->fsm((_fsm), (_event))) +#define bfa_fsm_get_state(_fsm) ((_fsm)->fsm) +#define bfa_fsm_cmp_state(_fsm, _state) \ + ((_fsm)->fsm == (bfa_fsm_t)(_state)) + +static inline int +bfa_sm_to_state(struct bfa_sm_table *smt, bfa_sm_t sm) +{ + int i = 0; + + while (smt[i].sm && smt[i].sm != sm) + i++; + return smt[i].state; +} +#endif diff --git a/drivers/net/bna/bfa_wc.h b/drivers/net/bna/bfa_wc.h new file mode 100644 index 000000000000..d0e4caee67b0 --- /dev/null +++ b/drivers/net/bna/bfa_wc.h @@ -0,0 +1,69 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +/** + * @file bfa_wc.h Generic wait counter. + */ + +#ifndef __BFA_WC_H__ +#define __BFA_WC_H__ + +typedef void (*bfa_wc_resume_t) (void *cbarg); + +struct bfa_wc { + bfa_wc_resume_t wc_resume; + void *wc_cbarg; + int wc_count; +}; + +static inline void +bfa_wc_up(struct bfa_wc *wc) +{ + wc->wc_count++; +} + +static inline void +bfa_wc_down(struct bfa_wc *wc) +{ + wc->wc_count--; + if (wc->wc_count == 0) + wc->wc_resume(wc->wc_cbarg); +} + +/** + * Initialize a waiting counter. + */ +static inline void +bfa_wc_init(struct bfa_wc *wc, bfa_wc_resume_t wc_resume, void *wc_cbarg) +{ + wc->wc_resume = wc_resume; + wc->wc_cbarg = wc_cbarg; + wc->wc_count = 0; + bfa_wc_up(wc); +} + +/** + * Wait for counter to reach zero + */ +static inline void +bfa_wc_wait(struct bfa_wc *wc) +{ + bfa_wc_down(wc); +} + +#endif diff --git a/drivers/net/bna/bfi.h b/drivers/net/bna/bfi.h new file mode 100644 index 000000000000..a97396811050 --- /dev/null +++ b/drivers/net/bna/bfi.h @@ -0,0 +1,392 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +#ifndef __BFI_H__ +#define __BFI_H__ + +#include "bfa_defs.h" + +#pragma pack(1) + +/** + * BFI FW image type + */ +#define BFI_FLASH_CHUNK_SZ 256 /*!< Flash chunk size */ +#define BFI_FLASH_CHUNK_SZ_WORDS (BFI_FLASH_CHUNK_SZ/sizeof(u32)) +enum { + BFI_IMAGE_CB_FC, + BFI_IMAGE_CT_FC, + BFI_IMAGE_CT_CNA, + BFI_IMAGE_MAX, +}; + +/** + * Msg header common to all msgs + */ +struct bfi_mhdr { + u8 msg_class; /*!< @ref enum bfi_mclass */ + u8 msg_id; /*!< msg opcode with in the class */ + union { + struct { + u8 rsvd; + u8 lpu_id; /*!< msg destination */ + } h2i; + u16 i2htok; /*!< token in msgs to host */ + } mtag; +}; + +#define bfi_h2i_set(_mh, _mc, _op, _lpuid) do { \ + (_mh).msg_class = (_mc); \ + (_mh).msg_id = (_op); \ + (_mh).mtag.h2i.lpu_id = (_lpuid); \ +} while (0) + +#define bfi_i2h_set(_mh, _mc, _op, _i2htok) do { \ + (_mh).msg_class = (_mc); \ + (_mh).msg_id = (_op); \ + (_mh).mtag.i2htok = (_i2htok); \ +} while (0) + +/* + * Message opcodes: 0-127 to firmware, 128-255 to host + */ +#define BFI_I2H_OPCODE_BASE 128 +#define BFA_I2HM(_x) ((_x) + BFI_I2H_OPCODE_BASE) + +/** + **************************************************************************** + * + * Scatter Gather Element and Page definition + * + **************************************************************************** + */ + +#define BFI_SGE_INLINE 1 +#define BFI_SGE_INLINE_MAX (BFI_SGE_INLINE + 1) + +/** + * SG Flags + */ +enum { + BFI_SGE_DATA = 0, /*!< data address, not last */ + BFI_SGE_DATA_CPL = 1, /*!< data addr, last in current page */ + BFI_SGE_DATA_LAST = 3, /*!< data address, last */ + BFI_SGE_LINK = 2, /*!< link address */ + BFI_SGE_PGDLEN = 2, /*!< cumulative data length for page */ +}; + +/** + * DMA addresses + */ +union bfi_addr_u { + struct { + u32 addr_lo; + u32 addr_hi; + } a32; +}; + +/** + * Scatter Gather Element + */ +struct bfi_sge { +#ifdef __BIGENDIAN + u32 flags:2, + rsvd:2, + sg_len:28; +#else + u32 sg_len:28, + rsvd:2, + flags:2; +#endif + union bfi_addr_u sga; +}; + +/** + * Scatter Gather Page + */ +#define BFI_SGPG_DATA_SGES 7 +#define BFI_SGPG_SGES_MAX (BFI_SGPG_DATA_SGES + 1) +#define BFI_SGPG_RSVD_WD_LEN 8 +struct bfi_sgpg { + struct bfi_sge sges[BFI_SGPG_SGES_MAX]; + u32 rsvd[BFI_SGPG_RSVD_WD_LEN]; +}; + +/* + * Large Message structure - 128 Bytes size Msgs + */ +#define BFI_LMSG_SZ 128 +#define BFI_LMSG_PL_WSZ \ + ((BFI_LMSG_SZ - sizeof(struct bfi_mhdr)) / 4) + +struct bfi_msg { + struct bfi_mhdr mhdr; + u32 pl[BFI_LMSG_PL_WSZ]; +}; + +/** + * Mailbox message structure + */ +#define BFI_MBMSG_SZ 7 +struct bfi_mbmsg { + struct bfi_mhdr mh; + u32 pl[BFI_MBMSG_SZ]; +}; + +/** + * Message Classes + */ +enum bfi_mclass { + BFI_MC_IOC = 1, /*!< IO Controller (IOC) */ + BFI_MC_DIAG = 2, /*!< Diagnostic Msgs */ + BFI_MC_FLASH = 3, /*!< Flash message class */ + BFI_MC_CEE = 4, /*!< CEE */ + BFI_MC_FCPORT = 5, /*!< FC port */ + BFI_MC_IOCFC = 6, /*!< FC - IO Controller (IOC) */ + BFI_MC_LL = 7, /*!< Link Layer */ + BFI_MC_UF = 8, /*!< Unsolicited frame receive */ + BFI_MC_FCXP = 9, /*!< FC Transport */ + BFI_MC_LPS = 10, /*!< lport fc login services */ + BFI_MC_RPORT = 11, /*!< Remote port */ + BFI_MC_ITNIM = 12, /*!< I-T nexus (Initiator mode) */ + BFI_MC_IOIM_READ = 13, /*!< read IO (Initiator mode) */ + BFI_MC_IOIM_WRITE = 14, /*!< write IO (Initiator mode) */ + BFI_MC_IOIM_IO = 15, /*!< IO (Initiator mode) */ + BFI_MC_IOIM = 16, /*!< IO (Initiator mode) */ + BFI_MC_IOIM_IOCOM = 17, /*!< good IO completion */ + BFI_MC_TSKIM = 18, /*!< Initiator Task management */ + BFI_MC_SBOOT = 19, /*!< SAN boot services */ + BFI_MC_IPFC = 20, /*!< IP over FC Msgs */ + BFI_MC_PORT = 21, /*!< Physical port */ + BFI_MC_SFP = 22, /*!< SFP module */ + BFI_MC_MSGQ = 23, /*!< MSGQ */ + BFI_MC_ENET = 24, /*!< ENET commands/responses */ + BFI_MC_MAX = 32 +}; + +#define BFI_IOC_MAX_CQS 4 +#define BFI_IOC_MAX_CQS_ASIC 8 +#define BFI_IOC_MSGLEN_MAX 32 /* 32 bytes */ + +#define BFI_BOOT_TYPE_OFF 8 +#define BFI_BOOT_PARAM_OFF 12 + +#define BFI_BOOT_TYPE_NORMAL 0 /* param is device id */ +#define BFI_BOOT_TYPE_FLASH 1 +#define BFI_BOOT_TYPE_MEMTEST 2 + +#define BFI_BOOT_MEMTEST_RES_ADDR 0x900 +#define BFI_BOOT_MEMTEST_RES_SIG 0xA0A1A2A3 + +/** + *---------------------------------------------------------------------- + * IOC + *---------------------------------------------------------------------- + */ + +enum bfi_ioc_h2i_msgs { + BFI_IOC_H2I_ENABLE_REQ = 1, + BFI_IOC_H2I_DISABLE_REQ = 2, + BFI_IOC_H2I_GETATTR_REQ = 3, + BFI_IOC_H2I_DBG_SYNC = 4, + BFI_IOC_H2I_DBG_DUMP = 5, +}; + +enum bfi_ioc_i2h_msgs { + BFI_IOC_I2H_ENABLE_REPLY = BFA_I2HM(1), + BFI_IOC_I2H_DISABLE_REPLY = BFA_I2HM(2), + BFI_IOC_I2H_GETATTR_REPLY = BFA_I2HM(3), + BFI_IOC_I2H_READY_EVENT = BFA_I2HM(4), + BFI_IOC_I2H_HBEAT = BFA_I2HM(5), +}; + +/** + * BFI_IOC_H2I_GETATTR_REQ message + */ +struct bfi_ioc_getattr_req { + struct bfi_mhdr mh; + union bfi_addr_u attr_addr; +}; + +struct bfi_ioc_attr { + u64 mfg_pwwn; /*!< Mfg port wwn */ + u64 mfg_nwwn; /*!< Mfg node wwn */ + mac_t mfg_mac; /*!< Mfg mac */ + u16 rsvd_a; + u64 pwwn; + u64 nwwn; + mac_t mac; /*!< PBC or Mfg mac */ + u16 rsvd_b; + mac_t fcoe_mac; + u16 rsvd_c; + char brcd_serialnum[STRSZ(BFA_MFG_SERIALNUM_SIZE)]; + u8 pcie_gen; + u8 pcie_lanes_orig; + u8 pcie_lanes; + u8 rx_bbcredit; /*!< receive buffer credits */ + u32 adapter_prop; /*!< adapter properties */ + u16 maxfrsize; /*!< max receive frame size */ + char asic_rev; + u8 rsvd_d; + char fw_version[BFA_VERSION_LEN]; + char optrom_version[BFA_VERSION_LEN]; + struct bfa_mfg_vpd vpd; + u32 card_type; /*!< card type */ +}; + +/** + * BFI_IOC_I2H_GETATTR_REPLY message + */ +struct bfi_ioc_getattr_reply { + struct bfi_mhdr mh; /*!< Common msg header */ + u8 status; /*!< cfg reply status */ + u8 rsvd[3]; +}; + +/** + * Firmware memory page offsets + */ +#define BFI_IOC_SMEM_PG0_CB (0x40) +#define BFI_IOC_SMEM_PG0_CT (0x180) + +/** + * Firmware statistic offset + */ +#define BFI_IOC_FWSTATS_OFF (0x6B40) +#define BFI_IOC_FWSTATS_SZ (4096) + +/** + * Firmware trace offset + */ +#define BFI_IOC_TRC_OFF (0x4b00) +#define BFI_IOC_TRC_ENTS 256 + +#define BFI_IOC_FW_SIGNATURE (0xbfadbfad) +#define BFI_IOC_MD5SUM_SZ 4 +struct bfi_ioc_image_hdr { + u32 signature; /*!< constant signature */ + u32 rsvd_a; + u32 exec; /*!< exec vector */ + u32 param; /*!< parameters */ + u32 rsvd_b[4]; + u32 md5sum[BFI_IOC_MD5SUM_SZ]; +}; + +/** + * BFI_IOC_I2H_READY_EVENT message + */ +struct bfi_ioc_rdy_event { + struct bfi_mhdr mh; /*!< common msg header */ + u8 init_status; /*!< init event status */ + u8 rsvd[3]; +}; + +struct bfi_ioc_hbeat { + struct bfi_mhdr mh; /*!< common msg header */ + u32 hb_count; /*!< current heart beat count */ +}; + +/** + * IOC hardware/firmware state + */ +enum bfi_ioc_state { + BFI_IOC_UNINIT = 0, /*!< not initialized */ + BFI_IOC_INITING = 1, /*!< h/w is being initialized */ + BFI_IOC_HWINIT = 2, /*!< h/w is initialized */ + BFI_IOC_CFG = 3, /*!< IOC configuration in progress */ + BFI_IOC_OP = 4, /*!< IOC is operational */ + BFI_IOC_DISABLING = 5, /*!< IOC is being disabled */ + BFI_IOC_DISABLED = 6, /*!< IOC is disabled */ + BFI_IOC_CFG_DISABLED = 7, /*!< IOC is being disabled;transient */ + BFI_IOC_FAIL = 8, /*!< IOC heart-beat failure */ + BFI_IOC_MEMTEST = 9, /*!< IOC is doing memtest */ +}; + +#define BFI_IOC_ENDIAN_SIG 0x12345678 + +enum { + BFI_ADAPTER_TYPE_FC = 0x01, /*!< FC adapters */ + BFI_ADAPTER_TYPE_MK = 0x0f0000, /*!< adapter type mask */ + BFI_ADAPTER_TYPE_SH = 16, /*!< adapter type shift */ + BFI_ADAPTER_NPORTS_MK = 0xff00, /*!< number of ports mask */ + BFI_ADAPTER_NPORTS_SH = 8, /*!< number of ports shift */ + BFI_ADAPTER_SPEED_MK = 0xff, /*!< adapter speed mask */ + BFI_ADAPTER_SPEED_SH = 0, /*!< adapter speed shift */ + BFI_ADAPTER_PROTO = 0x100000, /*!< prototype adapaters */ + BFI_ADAPTER_TTV = 0x200000, /*!< TTV debug capable */ + BFI_ADAPTER_UNSUPP = 0x400000, /*!< unknown adapter type */ +}; + +#define BFI_ADAPTER_GETP(__prop, __adap_prop) \ + (((__adap_prop) & BFI_ADAPTER_ ## __prop ## _MK) >> \ + BFI_ADAPTER_ ## __prop ## _SH) +#define BFI_ADAPTER_SETP(__prop, __val) \ + ((__val) << BFI_ADAPTER_ ## __prop ## _SH) +#define BFI_ADAPTER_IS_PROTO(__adap_type) \ + ((__adap_type) & BFI_ADAPTER_PROTO) +#define BFI_ADAPTER_IS_TTV(__adap_type) \ + ((__adap_type) & BFI_ADAPTER_TTV) +#define BFI_ADAPTER_IS_UNSUPP(__adap_type) \ + ((__adap_type) & BFI_ADAPTER_UNSUPP) +#define BFI_ADAPTER_IS_SPECIAL(__adap_type) \ + ((__adap_type) & (BFI_ADAPTER_TTV | BFI_ADAPTER_PROTO | \ + BFI_ADAPTER_UNSUPP)) + +/** + * BFI_IOC_H2I_ENABLE_REQ & BFI_IOC_H2I_DISABLE_REQ messages + */ +struct bfi_ioc_ctrl_req { + struct bfi_mhdr mh; + u8 ioc_class; + u8 rsvd[3]; + u32 tv_sec; +}; + +/** + * BFI_IOC_I2H_ENABLE_REPLY & BFI_IOC_I2H_DISABLE_REPLY messages + */ +struct bfi_ioc_ctrl_reply { + struct bfi_mhdr mh; /*!< Common msg header */ + u8 status; /*!< enable/disable status */ + u8 rsvd[3]; +}; + +#define BFI_IOC_MSGSZ 8 +/** + * H2I Messages + */ +union bfi_ioc_h2i_msg_u { + struct bfi_mhdr mh; + struct bfi_ioc_ctrl_req enable_req; + struct bfi_ioc_ctrl_req disable_req; + struct bfi_ioc_getattr_req getattr_req; + u32 mboxmsg[BFI_IOC_MSGSZ]; +}; + +/** + * I2H Messages + */ +union bfi_ioc_i2h_msg_u { + struct bfi_mhdr mh; + struct bfi_ioc_rdy_event rdy_event; + u32 mboxmsg[BFI_IOC_MSGSZ]; +}; + +#pragma pack() + +#endif /* __BFI_H__ */ diff --git a/drivers/net/bna/bfi_cna.h b/drivers/net/bna/bfi_cna.h new file mode 100644 index 000000000000..4eecabea397b --- /dev/null +++ b/drivers/net/bna/bfi_cna.h @@ -0,0 +1,199 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ +#ifndef __BFI_CNA_H__ +#define __BFI_CNA_H__ + +#include "bfi.h" +#include "bfa_defs_cna.h" + +#pragma pack(1) + +enum bfi_port_h2i { + BFI_PORT_H2I_ENABLE_REQ = (1), + BFI_PORT_H2I_DISABLE_REQ = (2), + BFI_PORT_H2I_GET_STATS_REQ = (3), + BFI_PORT_H2I_CLEAR_STATS_REQ = (4), +}; + +enum bfi_port_i2h { + BFI_PORT_I2H_ENABLE_RSP = BFA_I2HM(1), + BFI_PORT_I2H_DISABLE_RSP = BFA_I2HM(2), + BFI_PORT_I2H_GET_STATS_RSP = BFA_I2HM(3), + BFI_PORT_I2H_CLEAR_STATS_RSP = BFA_I2HM(4), +}; + +/** + * Generic REQ type + */ +struct bfi_port_generic_req { + struct bfi_mhdr mh; /*!< msg header */ + u32 msgtag; /*!< msgtag for reply */ + u32 rsvd; +}; + +/** + * Generic RSP type + */ +struct bfi_port_generic_rsp { + struct bfi_mhdr mh; /*!< common msg header */ + u8 status; /*!< port enable status */ + u8 rsvd[3]; + u32 msgtag; /*!< msgtag for reply */ +}; + +/** + * @todo + * BFI_PORT_H2I_ENABLE_REQ + */ + +/** + * @todo + * BFI_PORT_I2H_ENABLE_RSP + */ + +/** + * BFI_PORT_H2I_DISABLE_REQ + */ + +/** + * BFI_PORT_I2H_DISABLE_RSP + */ + +/** + * BFI_PORT_H2I_GET_STATS_REQ + */ +struct bfi_port_get_stats_req { + struct bfi_mhdr mh; /*!< common msg header */ + union bfi_addr_u dma_addr; +}; + +/** + * BFI_PORT_I2H_GET_STATS_RSP + */ + +/** + * BFI_PORT_H2I_CLEAR_STATS_REQ + */ + +/** + * BFI_PORT_I2H_CLEAR_STATS_RSP + */ + +union bfi_port_h2i_msg_u { + struct bfi_mhdr mh; + struct bfi_port_generic_req enable_req; + struct bfi_port_generic_req disable_req; + struct bfi_port_get_stats_req getstats_req; + struct bfi_port_generic_req clearstats_req; +}; + +union bfi_port_i2h_msg_u { + struct bfi_mhdr mh; + struct bfi_port_generic_rsp enable_rsp; + struct bfi_port_generic_rsp disable_rsp; + struct bfi_port_generic_rsp getstats_rsp; + struct bfi_port_generic_rsp clearstats_rsp; +}; + +/* @brief Mailbox commands from host to (DCBX/LLDP) firmware */ +enum bfi_cee_h2i_msgs { + BFI_CEE_H2I_GET_CFG_REQ = 1, + BFI_CEE_H2I_RESET_STATS = 2, + BFI_CEE_H2I_GET_STATS_REQ = 3, +}; + +/* @brief Mailbox reply and AEN messages from DCBX/LLDP firmware to host */ +enum bfi_cee_i2h_msgs { + BFI_CEE_I2H_GET_CFG_RSP = BFA_I2HM(1), + BFI_CEE_I2H_RESET_STATS_RSP = BFA_I2HM(2), + BFI_CEE_I2H_GET_STATS_RSP = BFA_I2HM(3), +}; + +/* Data structures */ + +/* + * @brief H2I command structure for resetting the stats. + * BFI_CEE_H2I_RESET_STATS + */ +struct bfi_lldp_reset_stats { + struct bfi_mhdr mh; +}; + +/* + * @brief H2I command structure for resetting the stats. + * BFI_CEE_H2I_RESET_STATS + */ +struct bfi_cee_reset_stats { + struct bfi_mhdr mh; +}; + +/* + * @brief get configuration command from host + * BFI_CEE_H2I_GET_CFG_REQ + */ +struct bfi_cee_get_req { + struct bfi_mhdr mh; + union bfi_addr_u dma_addr; +}; + +/* + * @brief reply message from firmware + * BFI_CEE_I2H_GET_CFG_RSP + */ +struct bfi_cee_get_rsp { + struct bfi_mhdr mh; + u8 cmd_status; + u8 rsvd[3]; +}; + +/* + * @brief get configuration command from host + * BFI_CEE_H2I_GET_STATS_REQ + */ +struct bfi_cee_stats_req { + struct bfi_mhdr mh; + union bfi_addr_u dma_addr; +}; + +/* + * @brief reply message from firmware + * BFI_CEE_I2H_GET_STATS_RSP + */ +struct bfi_cee_stats_rsp { + struct bfi_mhdr mh; + u8 cmd_status; + u8 rsvd[3]; +}; + +/* @brief mailbox command structures from host to firmware */ +union bfi_cee_h2i_msg_u { + struct bfi_mhdr mh; + struct bfi_cee_get_req get_req; + struct bfi_cee_stats_req stats_req; +}; + +/* @brief mailbox message structures from firmware to host */ +union bfi_cee_i2h_msg_u { + struct bfi_mhdr mh; + struct bfi_cee_get_rsp get_rsp; + struct bfi_cee_stats_rsp stats_rsp; +}; + +#pragma pack() + +#endif /* __BFI_CNA_H__ */ diff --git a/drivers/net/bna/bfi_ctreg.h b/drivers/net/bna/bfi_ctreg.h new file mode 100644 index 000000000000..404ea351d4a1 --- /dev/null +++ b/drivers/net/bna/bfi_ctreg.h @@ -0,0 +1,637 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +/* + * bfi_ctreg.h catapult host block register definitions + * + * !!! Do not edit. Auto generated. !!! + */ + +#ifndef __BFI_CTREG_H__ +#define __BFI_CTREG_H__ + +#define HOSTFN0_LPU_MBOX0_0 0x00019200 +#define HOSTFN1_LPU_MBOX0_8 0x00019260 +#define LPU_HOSTFN0_MBOX0_0 0x00019280 +#define LPU_HOSTFN1_MBOX0_8 0x000192e0 +#define HOSTFN2_LPU_MBOX0_0 0x00019400 +#define HOSTFN3_LPU_MBOX0_8 0x00019460 +#define LPU_HOSTFN2_MBOX0_0 0x00019480 +#define LPU_HOSTFN3_MBOX0_8 0x000194e0 +#define HOSTFN0_INT_STATUS 0x00014000 +#define __HOSTFN0_HALT_OCCURRED 0x01000000 +#define __HOSTFN0_INT_STATUS_LVL_MK 0x00f00000 +#define __HOSTFN0_INT_STATUS_LVL_SH 20 +#define __HOSTFN0_INT_STATUS_LVL(_v) ((_v) << __HOSTFN0_INT_STATUS_LVL_SH) +#define __HOSTFN0_INT_STATUS_P_MK 0x000f0000 +#define __HOSTFN0_INT_STATUS_P_SH 16 +#define __HOSTFN0_INT_STATUS_P(_v) ((_v) << __HOSTFN0_INT_STATUS_P_SH) +#define __HOSTFN0_INT_STATUS_F 0x0000ffff +#define HOSTFN0_INT_MSK 0x00014004 +#define HOST_PAGE_NUM_FN0 0x00014008 +#define __HOST_PAGE_NUM_FN 0x000001ff +#define HOST_MSIX_ERR_INDEX_FN0 0x0001400c +#define __MSIX_ERR_INDEX_FN 0x000001ff +#define HOSTFN1_INT_STATUS 0x00014100 +#define __HOSTFN1_HALT_OCCURRED 0x01000000 +#define __HOSTFN1_INT_STATUS_LVL_MK 0x00f00000 +#define __HOSTFN1_INT_STATUS_LVL_SH 20 +#define __HOSTFN1_INT_STATUS_LVL(_v) ((_v) << __HOSTFN1_INT_STATUS_LVL_SH) +#define __HOSTFN1_INT_STATUS_P_MK 0x000f0000 +#define __HOSTFN1_INT_STATUS_P_SH 16 +#define __HOSTFN1_INT_STATUS_P(_v) ((_v) << __HOSTFN1_INT_STATUS_P_SH) +#define __HOSTFN1_INT_STATUS_F 0x0000ffff +#define HOSTFN1_INT_MSK 0x00014104 +#define HOST_PAGE_NUM_FN1 0x00014108 +#define HOST_MSIX_ERR_INDEX_FN1 0x0001410c +#define APP_PLL_425_CTL_REG 0x00014204 +#define __P_425_PLL_LOCK 0x80000000 +#define __APP_PLL_425_SRAM_USE_100MHZ 0x00100000 +#define __APP_PLL_425_RESET_TIMER_MK 0x000e0000 +#define __APP_PLL_425_RESET_TIMER_SH 17 +#define __APP_PLL_425_RESET_TIMER(_v) ((_v) << __APP_PLL_425_RESET_TIMER_SH) +#define __APP_PLL_425_LOGIC_SOFT_RESET 0x00010000 +#define __APP_PLL_425_CNTLMT0_1_MK 0x0000c000 +#define __APP_PLL_425_CNTLMT0_1_SH 14 +#define __APP_PLL_425_CNTLMT0_1(_v) ((_v) << __APP_PLL_425_CNTLMT0_1_SH) +#define __APP_PLL_425_JITLMT0_1_MK 0x00003000 +#define __APP_PLL_425_JITLMT0_1_SH 12 +#define __APP_PLL_425_JITLMT0_1(_v) ((_v) << __APP_PLL_425_JITLMT0_1_SH) +#define __APP_PLL_425_HREF 0x00000800 +#define __APP_PLL_425_HDIV 0x00000400 +#define __APP_PLL_425_P0_1_MK 0x00000300 +#define __APP_PLL_425_P0_1_SH 8 +#define __APP_PLL_425_P0_1(_v) ((_v) << __APP_PLL_425_P0_1_SH) +#define __APP_PLL_425_Z0_2_MK 0x000000e0 +#define __APP_PLL_425_Z0_2_SH 5 +#define __APP_PLL_425_Z0_2(_v) ((_v) << __APP_PLL_425_Z0_2_SH) +#define __APP_PLL_425_RSEL200500 0x00000010 +#define __APP_PLL_425_ENARST 0x00000008 +#define __APP_PLL_425_BYPASS 0x00000004 +#define __APP_PLL_425_LRESETN 0x00000002 +#define __APP_PLL_425_ENABLE 0x00000001 +#define APP_PLL_312_CTL_REG 0x00014208 +#define __P_312_PLL_LOCK 0x80000000 +#define __ENABLE_MAC_AHB_1 0x00800000 +#define __ENABLE_MAC_AHB_0 0x00400000 +#define __ENABLE_MAC_1 0x00200000 +#define __ENABLE_MAC_0 0x00100000 +#define __APP_PLL_312_RESET_TIMER_MK 0x000e0000 +#define __APP_PLL_312_RESET_TIMER_SH 17 +#define __APP_PLL_312_RESET_TIMER(_v) ((_v) << __APP_PLL_312_RESET_TIMER_SH) +#define __APP_PLL_312_LOGIC_SOFT_RESET 0x00010000 +#define __APP_PLL_312_CNTLMT0_1_MK 0x0000c000 +#define __APP_PLL_312_CNTLMT0_1_SH 14 +#define __APP_PLL_312_CNTLMT0_1(_v) ((_v) << __APP_PLL_312_CNTLMT0_1_SH) +#define __APP_PLL_312_JITLMT0_1_MK 0x00003000 +#define __APP_PLL_312_JITLMT0_1_SH 12 +#define __APP_PLL_312_JITLMT0_1(_v) ((_v) << __APP_PLL_312_JITLMT0_1_SH) +#define __APP_PLL_312_HREF 0x00000800 +#define __APP_PLL_312_HDIV 0x00000400 +#define __APP_PLL_312_P0_1_MK 0x00000300 +#define __APP_PLL_312_P0_1_SH 8 +#define __APP_PLL_312_P0_1(_v) ((_v) << __APP_PLL_312_P0_1_SH) +#define __APP_PLL_312_Z0_2_MK 0x000000e0 +#define __APP_PLL_312_Z0_2_SH 5 +#define __APP_PLL_312_Z0_2(_v) ((_v) << __APP_PLL_312_Z0_2_SH) +#define __APP_PLL_312_RSEL200500 0x00000010 +#define __APP_PLL_312_ENARST 0x00000008 +#define __APP_PLL_312_BYPASS 0x00000004 +#define __APP_PLL_312_LRESETN 0x00000002 +#define __APP_PLL_312_ENABLE 0x00000001 +#define MBIST_CTL_REG 0x00014220 +#define __EDRAM_BISTR_START 0x00000004 +#define __MBIST_RESET 0x00000002 +#define __MBIST_START 0x00000001 +#define MBIST_STAT_REG 0x00014224 +#define __EDRAM_BISTR_STATUS 0x00000008 +#define __EDRAM_BISTR_DONE 0x00000004 +#define __MEM_BIT_STATUS 0x00000002 +#define __MBIST_DONE 0x00000001 +#define HOST_SEM0_REG 0x00014230 +#define __HOST_SEMAPHORE 0x00000001 +#define HOST_SEM1_REG 0x00014234 +#define HOST_SEM2_REG 0x00014238 +#define HOST_SEM3_REG 0x0001423c +#define HOST_SEM0_INFO_REG 0x00014240 +#define HOST_SEM1_INFO_REG 0x00014244 +#define HOST_SEM2_INFO_REG 0x00014248 +#define HOST_SEM3_INFO_REG 0x0001424c +#define ETH_MAC_SER_REG 0x00014288 +#define __APP_EMS_CKBUFAMPIN 0x00000020 +#define __APP_EMS_REFCLKSEL 0x00000010 +#define __APP_EMS_CMLCKSEL 0x00000008 +#define __APP_EMS_REFCKBUFEN2 0x00000004 +#define __APP_EMS_REFCKBUFEN1 0x00000002 +#define __APP_EMS_CHANNEL_SEL 0x00000001 +#define HOSTFN2_INT_STATUS 0x00014300 +#define __HOSTFN2_HALT_OCCURRED 0x01000000 +#define __HOSTFN2_INT_STATUS_LVL_MK 0x00f00000 +#define __HOSTFN2_INT_STATUS_LVL_SH 20 +#define __HOSTFN2_INT_STATUS_LVL(_v) ((_v) << __HOSTFN2_INT_STATUS_LVL_SH) +#define __HOSTFN2_INT_STATUS_P_MK 0x000f0000 +#define __HOSTFN2_INT_STATUS_P_SH 16 +#define __HOSTFN2_INT_STATUS_P(_v) ((_v) << __HOSTFN2_INT_STATUS_P_SH) +#define __HOSTFN2_INT_STATUS_F 0x0000ffff +#define HOSTFN2_INT_MSK 0x00014304 +#define HOST_PAGE_NUM_FN2 0x00014308 +#define HOST_MSIX_ERR_INDEX_FN2 0x0001430c +#define HOSTFN3_INT_STATUS 0x00014400 +#define __HALT_OCCURRED 0x01000000 +#define __HOSTFN3_INT_STATUS_LVL_MK 0x00f00000 +#define __HOSTFN3_INT_STATUS_LVL_SH 20 +#define __HOSTFN3_INT_STATUS_LVL(_v) ((_v) << __HOSTFN3_INT_STATUS_LVL_SH) +#define __HOSTFN3_INT_STATUS_P_MK 0x000f0000 +#define __HOSTFN3_INT_STATUS_P_SH 16 +#define __HOSTFN3_INT_STATUS_P(_v) ((_v) << __HOSTFN3_INT_STATUS_P_SH) +#define __HOSTFN3_INT_STATUS_F 0x0000ffff +#define HOSTFN3_INT_MSK 0x00014404 +#define HOST_PAGE_NUM_FN3 0x00014408 +#define HOST_MSIX_ERR_INDEX_FN3 0x0001440c +#define FNC_ID_REG 0x00014600 +#define __FUNCTION_NUMBER 0x00000007 +#define FNC_PERS_REG 0x00014604 +#define __F3_FUNCTION_ACTIVE 0x80000000 +#define __F3_FUNCTION_MODE 0x40000000 +#define __F3_PORT_MAP_MK 0x30000000 +#define __F3_PORT_MAP_SH 28 +#define __F3_PORT_MAP(_v) ((_v) << __F3_PORT_MAP_SH) +#define __F3_VM_MODE 0x08000000 +#define __F3_INTX_STATUS_MK 0x07000000 +#define __F3_INTX_STATUS_SH 24 +#define __F3_INTX_STATUS(_v) ((_v) << __F3_INTX_STATUS_SH) +#define __F2_FUNCTION_ACTIVE 0x00800000 +#define __F2_FUNCTION_MODE 0x00400000 +#define __F2_PORT_MAP_MK 0x00300000 +#define __F2_PORT_MAP_SH 20 +#define __F2_PORT_MAP(_v) ((_v) << __F2_PORT_MAP_SH) +#define __F2_VM_MODE 0x00080000 +#define __F2_INTX_STATUS_MK 0x00070000 +#define __F2_INTX_STATUS_SH 16 +#define __F2_INTX_STATUS(_v) ((_v) << __F2_INTX_STATUS_SH) +#define __F1_FUNCTION_ACTIVE 0x00008000 +#define __F1_FUNCTION_MODE 0x00004000 +#define __F1_PORT_MAP_MK 0x00003000 +#define __F1_PORT_MAP_SH 12 +#define __F1_PORT_MAP(_v) ((_v) << __F1_PORT_MAP_SH) +#define __F1_VM_MODE 0x00000800 +#define __F1_INTX_STATUS_MK 0x00000700 +#define __F1_INTX_STATUS_SH 8 +#define __F1_INTX_STATUS(_v) ((_v) << __F1_INTX_STATUS_SH) +#define __F0_FUNCTION_ACTIVE 0x00000080 +#define __F0_FUNCTION_MODE 0x00000040 +#define __F0_PORT_MAP_MK 0x00000030 +#define __F0_PORT_MAP_SH 4 +#define __F0_PORT_MAP(_v) ((_v) << __F0_PORT_MAP_SH) +#define __F0_VM_MODE 0x00000008 +#define __F0_INTX_STATUS 0x00000007 +enum { + __F0_INTX_STATUS_MSIX = 0x0, + __F0_INTX_STATUS_INTA = 0x1, + __F0_INTX_STATUS_INTB = 0x2, + __F0_INTX_STATUS_INTC = 0x3, + __F0_INTX_STATUS_INTD = 0x4, +}; +#define OP_MODE 0x0001460c +#define __APP_ETH_CLK_LOWSPEED 0x00000004 +#define __GLOBAL_CORECLK_HALFSPEED 0x00000002 +#define __GLOBAL_FCOE_MODE 0x00000001 +#define HOST_SEM4_REG 0x00014610 +#define HOST_SEM5_REG 0x00014614 +#define HOST_SEM6_REG 0x00014618 +#define HOST_SEM7_REG 0x0001461c +#define HOST_SEM4_INFO_REG 0x00014620 +#define HOST_SEM5_INFO_REG 0x00014624 +#define HOST_SEM6_INFO_REG 0x00014628 +#define HOST_SEM7_INFO_REG 0x0001462c +#define HOSTFN0_LPU0_MBOX0_CMD_STAT 0x00019000 +#define __HOSTFN0_LPU0_MBOX0_INFO_MK 0xfffffffe +#define __HOSTFN0_LPU0_MBOX0_INFO_SH 1 +#define __HOSTFN0_LPU0_MBOX0_INFO(_v) ((_v) << __HOSTFN0_LPU0_MBOX0_INFO_SH) +#define __HOSTFN0_LPU0_MBOX0_CMD_STATUS 0x00000001 +#define HOSTFN0_LPU1_MBOX0_CMD_STAT 0x00019004 +#define __HOSTFN0_LPU1_MBOX0_INFO_MK 0xfffffffe +#define __HOSTFN0_LPU1_MBOX0_INFO_SH 1 +#define __HOSTFN0_LPU1_MBOX0_INFO(_v) ((_v) << __HOSTFN0_LPU1_MBOX0_INFO_SH) +#define __HOSTFN0_LPU1_MBOX0_CMD_STATUS 0x00000001 +#define LPU0_HOSTFN0_MBOX0_CMD_STAT 0x00019008 +#define __LPU0_HOSTFN0_MBOX0_INFO_MK 0xfffffffe +#define __LPU0_HOSTFN0_MBOX0_INFO_SH 1 +#define __LPU0_HOSTFN0_MBOX0_INFO(_v) ((_v) << __LPU0_HOSTFN0_MBOX0_INFO_SH) +#define __LPU0_HOSTFN0_MBOX0_CMD_STATUS 0x00000001 +#define LPU1_HOSTFN0_MBOX0_CMD_STAT 0x0001900c +#define __LPU1_HOSTFN0_MBOX0_INFO_MK 0xfffffffe +#define __LPU1_HOSTFN0_MBOX0_INFO_SH 1 +#define __LPU1_HOSTFN0_MBOX0_INFO(_v) ((_v) << __LPU1_HOSTFN0_MBOX0_INFO_SH) +#define __LPU1_HOSTFN0_MBOX0_CMD_STATUS 0x00000001 +#define HOSTFN1_LPU0_MBOX0_CMD_STAT 0x00019010 +#define __HOSTFN1_LPU0_MBOX0_INFO_MK 0xfffffffe +#define __HOSTFN1_LPU0_MBOX0_INFO_SH 1 +#define __HOSTFN1_LPU0_MBOX0_INFO(_v) ((_v) << __HOSTFN1_LPU0_MBOX0_INFO_SH) +#define __HOSTFN1_LPU0_MBOX0_CMD_STATUS 0x00000001 +#define HOSTFN1_LPU1_MBOX0_CMD_STAT 0x00019014 +#define __HOSTFN1_LPU1_MBOX0_INFO_MK 0xfffffffe +#define __HOSTFN1_LPU1_MBOX0_INFO_SH 1 +#define __HOSTFN1_LPU1_MBOX0_INFO(_v) ((_v) << __HOSTFN1_LPU1_MBOX0_INFO_SH) +#define __HOSTFN1_LPU1_MBOX0_CMD_STATUS 0x00000001 +#define LPU0_HOSTFN1_MBOX0_CMD_STAT 0x00019018 +#define __LPU0_HOSTFN1_MBOX0_INFO_MK 0xfffffffe +#define __LPU0_HOSTFN1_MBOX0_INFO_SH 1 +#define __LPU0_HOSTFN1_MBOX0_INFO(_v) ((_v) << __LPU0_HOSTFN1_MBOX0_INFO_SH) +#define __LPU0_HOSTFN1_MBOX0_CMD_STATUS 0x00000001 +#define LPU1_HOSTFN1_MBOX0_CMD_STAT 0x0001901c +#define __LPU1_HOSTFN1_MBOX0_INFO_MK 0xfffffffe +#define __LPU1_HOSTFN1_MBOX0_INFO_SH 1 +#define __LPU1_HOSTFN1_MBOX0_INFO(_v) ((_v) << __LPU1_HOSTFN1_MBOX0_INFO_SH) +#define __LPU1_HOSTFN1_MBOX0_CMD_STATUS 0x00000001 +#define HOSTFN2_LPU0_MBOX0_CMD_STAT 0x00019150 +#define __HOSTFN2_LPU0_MBOX0_INFO_MK 0xfffffffe +#define __HOSTFN2_LPU0_MBOX0_INFO_SH 1 +#define __HOSTFN2_LPU0_MBOX0_INFO(_v) ((_v) << __HOSTFN2_LPU0_MBOX0_INFO_SH) +#define __HOSTFN2_LPU0_MBOX0_CMD_STATUS 0x00000001 +#define HOSTFN2_LPU1_MBOX0_CMD_STAT 0x00019154 +#define __HOSTFN2_LPU1_MBOX0_INFO_MK 0xfffffffe +#define __HOSTFN2_LPU1_MBOX0_INFO_SH 1 +#define __HOSTFN2_LPU1_MBOX0_INFO(_v) ((_v) << __HOSTFN2_LPU1_MBOX0_INFO_SH) +#define __HOSTFN2_LPU1_MBOX0BOX0_CMD_STATUS 0x00000001 +#define LPU0_HOSTFN2_MBOX0_CMD_STAT 0x00019158 +#define __LPU0_HOSTFN2_MBOX0_INFO_MK 0xfffffffe +#define __LPU0_HOSTFN2_MBOX0_INFO_SH 1 +#define __LPU0_HOSTFN2_MBOX0_INFO(_v) ((_v) << __LPU0_HOSTFN2_MBOX0_INFO_SH) +#define __LPU0_HOSTFN2_MBOX0_CMD_STATUS 0x00000001 +#define LPU1_HOSTFN2_MBOX0_CMD_STAT 0x0001915c +#define __LPU1_HOSTFN2_MBOX0_INFO_MK 0xfffffffe +#define __LPU1_HOSTFN2_MBOX0_INFO_SH 1 +#define __LPU1_HOSTFN2_MBOX0_INFO(_v) ((_v) << __LPU1_HOSTFN2_MBOX0_INFO_SH) +#define __LPU1_HOSTFN2_MBOX0_CMD_STATUS 0x00000001 +#define HOSTFN3_LPU0_MBOX0_CMD_STAT 0x00019160 +#define __HOSTFN3_LPU0_MBOX0_INFO_MK 0xfffffffe +#define __HOSTFN3_LPU0_MBOX0_INFO_SH 1 +#define __HOSTFN3_LPU0_MBOX0_INFO(_v) ((_v) << __HOSTFN3_LPU0_MBOX0_INFO_SH) +#define __HOSTFN3_LPU0_MBOX0_CMD_STATUS 0x00000001 +#define HOSTFN3_LPU1_MBOX0_CMD_STAT 0x00019164 +#define __HOSTFN3_LPU1_MBOX0_INFO_MK 0xfffffffe +#define __HOSTFN3_LPU1_MBOX0_INFO_SH 1 +#define __HOSTFN3_LPU1_MBOX0_INFO(_v) ((_v) << __HOSTFN3_LPU1_MBOX0_INFO_SH) +#define __HOSTFN3_LPU1_MBOX0_CMD_STATUS 0x00000001 +#define LPU0_HOSTFN3_MBOX0_CMD_STAT 0x00019168 +#define __LPU0_HOSTFN3_MBOX0_INFO_MK 0xfffffffe +#define __LPU0_HOSTFN3_MBOX0_INFO_SH 1 +#define __LPU0_HOSTFN3_MBOX0_INFO(_v) ((_v) << __LPU0_HOSTFN3_MBOX0_INFO_SH) +#define __LPU0_HOSTFN3_MBOX0_CMD_STATUS 0x00000001 +#define LPU1_HOSTFN3_MBOX0_CMD_STAT 0x0001916c +#define __LPU1_HOSTFN3_MBOX0_INFO_MK 0xfffffffe +#define __LPU1_HOSTFN3_MBOX0_INFO_SH 1 +#define __LPU1_HOSTFN3_MBOX0_INFO(_v) ((_v) << __LPU1_HOSTFN3_MBOX0_INFO_SH) +#define __LPU1_HOSTFN3_MBOX0_CMD_STATUS 0x00000001 +#define FW_INIT_HALT_P0 0x000191ac +#define __FW_INIT_HALT_P 0x00000001 +#define FW_INIT_HALT_P1 0x000191bc +#define CPE_PI_PTR_Q0 0x00038000 +#define __CPE_PI_UNUSED_MK 0xffff0000 +#define __CPE_PI_UNUSED_SH 16 +#define __CPE_PI_UNUSED(_v) ((_v) << __CPE_PI_UNUSED_SH) +#define __CPE_PI_PTR 0x0000ffff +#define CPE_PI_PTR_Q1 0x00038040 +#define CPE_CI_PTR_Q0 0x00038004 +#define __CPE_CI_UNUSED_MK 0xffff0000 +#define __CPE_CI_UNUSED_SH 16 +#define __CPE_CI_UNUSED(_v) ((_v) << __CPE_CI_UNUSED_SH) +#define __CPE_CI_PTR 0x0000ffff +#define CPE_CI_PTR_Q1 0x00038044 +#define CPE_DEPTH_Q0 0x00038008 +#define __CPE_DEPTH_UNUSED_MK 0xf8000000 +#define __CPE_DEPTH_UNUSED_SH 27 +#define __CPE_DEPTH_UNUSED(_v) ((_v) << __CPE_DEPTH_UNUSED_SH) +#define __CPE_MSIX_VEC_INDEX_MK 0x07ff0000 +#define __CPE_MSIX_VEC_INDEX_SH 16 +#define __CPE_MSIX_VEC_INDEX(_v) ((_v) << __CPE_MSIX_VEC_INDEX_SH) +#define __CPE_DEPTH 0x0000ffff +#define CPE_DEPTH_Q1 0x00038048 +#define CPE_QCTRL_Q0 0x0003800c +#define __CPE_CTRL_UNUSED30_MK 0xfc000000 +#define __CPE_CTRL_UNUSED30_SH 26 +#define __CPE_CTRL_UNUSED30(_v) ((_v) << __CPE_CTRL_UNUSED30_SH) +#define __CPE_FUNC_INT_CTRL_MK 0x03000000 +#define __CPE_FUNC_INT_CTRL_SH 24 +#define __CPE_FUNC_INT_CTRL(_v) ((_v) << __CPE_FUNC_INT_CTRL_SH) +enum { + __CPE_FUNC_INT_CTRL_DISABLE = 0x0, + __CPE_FUNC_INT_CTRL_F2NF = 0x1, + __CPE_FUNC_INT_CTRL_3QUART = 0x2, + __CPE_FUNC_INT_CTRL_HALF = 0x3, +}; +#define __CPE_CTRL_UNUSED20_MK 0x00f00000 +#define __CPE_CTRL_UNUSED20_SH 20 +#define __CPE_CTRL_UNUSED20(_v) ((_v) << __CPE_CTRL_UNUSED20_SH) +#define __CPE_SCI_TH_MK 0x000f0000 +#define __CPE_SCI_TH_SH 16 +#define __CPE_SCI_TH(_v) ((_v) << __CPE_SCI_TH_SH) +#define __CPE_CTRL_UNUSED10_MK 0x0000c000 +#define __CPE_CTRL_UNUSED10_SH 14 +#define __CPE_CTRL_UNUSED10(_v) ((_v) << __CPE_CTRL_UNUSED10_SH) +#define __CPE_ACK_PENDING 0x00002000 +#define __CPE_CTRL_UNUSED40_MK 0x00001c00 +#define __CPE_CTRL_UNUSED40_SH 10 +#define __CPE_CTRL_UNUSED40(_v) ((_v) << __CPE_CTRL_UNUSED40_SH) +#define __CPE_PCIEID_MK 0x00000300 +#define __CPE_PCIEID_SH 8 +#define __CPE_PCIEID(_v) ((_v) << __CPE_PCIEID_SH) +#define __CPE_CTRL_UNUSED00_MK 0x000000fe +#define __CPE_CTRL_UNUSED00_SH 1 +#define __CPE_CTRL_UNUSED00(_v) ((_v) << __CPE_CTRL_UNUSED00_SH) +#define __CPE_ESIZE 0x00000001 +#define CPE_QCTRL_Q1 0x0003804c +#define __CPE_CTRL_UNUSED31_MK 0xfc000000 +#define __CPE_CTRL_UNUSED31_SH 26 +#define __CPE_CTRL_UNUSED31(_v) ((_v) << __CPE_CTRL_UNUSED31_SH) +#define __CPE_CTRL_UNUSED21_MK 0x00f00000 +#define __CPE_CTRL_UNUSED21_SH 20 +#define __CPE_CTRL_UNUSED21(_v) ((_v) << __CPE_CTRL_UNUSED21_SH) +#define __CPE_CTRL_UNUSED11_MK 0x0000c000 +#define __CPE_CTRL_UNUSED11_SH 14 +#define __CPE_CTRL_UNUSED11(_v) ((_v) << __CPE_CTRL_UNUSED11_SH) +#define __CPE_CTRL_UNUSED41_MK 0x00001c00 +#define __CPE_CTRL_UNUSED41_SH 10 +#define __CPE_CTRL_UNUSED41(_v) ((_v) << __CPE_CTRL_UNUSED41_SH) +#define __CPE_CTRL_UNUSED01_MK 0x000000fe +#define __CPE_CTRL_UNUSED01_SH 1 +#define __CPE_CTRL_UNUSED01(_v) ((_v) << __CPE_CTRL_UNUSED01_SH) +#define RME_PI_PTR_Q0 0x00038020 +#define __LATENCY_TIME_STAMP_MK 0xffff0000 +#define __LATENCY_TIME_STAMP_SH 16 +#define __LATENCY_TIME_STAMP(_v) ((_v) << __LATENCY_TIME_STAMP_SH) +#define __RME_PI_PTR 0x0000ffff +#define RME_PI_PTR_Q1 0x00038060 +#define RME_CI_PTR_Q0 0x00038024 +#define __DELAY_TIME_STAMP_MK 0xffff0000 +#define __DELAY_TIME_STAMP_SH 16 +#define __DELAY_TIME_STAMP(_v) ((_v) << __DELAY_TIME_STAMP_SH) +#define __RME_CI_PTR 0x0000ffff +#define RME_CI_PTR_Q1 0x00038064 +#define RME_DEPTH_Q0 0x00038028 +#define __RME_DEPTH_UNUSED_MK 0xf8000000 +#define __RME_DEPTH_UNUSED_SH 27 +#define __RME_DEPTH_UNUSED(_v) ((_v) << __RME_DEPTH_UNUSED_SH) +#define __RME_MSIX_VEC_INDEX_MK 0x07ff0000 +#define __RME_MSIX_VEC_INDEX_SH 16 +#define __RME_MSIX_VEC_INDEX(_v) ((_v) << __RME_MSIX_VEC_INDEX_SH) +#define __RME_DEPTH 0x0000ffff +#define RME_DEPTH_Q1 0x00038068 +#define RME_QCTRL_Q0 0x0003802c +#define __RME_INT_LATENCY_TIMER_MK 0xff000000 +#define __RME_INT_LATENCY_TIMER_SH 24 +#define __RME_INT_LATENCY_TIMER(_v) ((_v) << __RME_INT_LATENCY_TIMER_SH) +#define __RME_INT_DELAY_TIMER_MK 0x00ff0000 +#define __RME_INT_DELAY_TIMER_SH 16 +#define __RME_INT_DELAY_TIMER(_v) ((_v) << __RME_INT_DELAY_TIMER_SH) +#define __RME_INT_DELAY_DISABLE 0x00008000 +#define __RME_DLY_DELAY_DISABLE 0x00004000 +#define __RME_ACK_PENDING 0x00002000 +#define __RME_FULL_INTERRUPT_DISABLE 0x00001000 +#define __RME_CTRL_UNUSED10_MK 0x00000c00 +#define __RME_CTRL_UNUSED10_SH 10 +#define __RME_CTRL_UNUSED10(_v) ((_v) << __RME_CTRL_UNUSED10_SH) +#define __RME_PCIEID_MK 0x00000300 +#define __RME_PCIEID_SH 8 +#define __RME_PCIEID(_v) ((_v) << __RME_PCIEID_SH) +#define __RME_CTRL_UNUSED00_MK 0x000000fe +#define __RME_CTRL_UNUSED00_SH 1 +#define __RME_CTRL_UNUSED00(_v) ((_v) << __RME_CTRL_UNUSED00_SH) +#define __RME_ESIZE 0x00000001 +#define RME_QCTRL_Q1 0x0003806c +#define __RME_CTRL_UNUSED11_MK 0x00000c00 +#define __RME_CTRL_UNUSED11_SH 10 +#define __RME_CTRL_UNUSED11(_v) ((_v) << __RME_CTRL_UNUSED11_SH) +#define __RME_CTRL_UNUSED01_MK 0x000000fe +#define __RME_CTRL_UNUSED01_SH 1 +#define __RME_CTRL_UNUSED01(_v) ((_v) << __RME_CTRL_UNUSED01_SH) +#define PSS_CTL_REG 0x00018800 +#define __PSS_I2C_CLK_DIV_MK 0x007f0000 +#define __PSS_I2C_CLK_DIV_SH 16 +#define __PSS_I2C_CLK_DIV(_v) ((_v) << __PSS_I2C_CLK_DIV_SH) +#define __PSS_LMEM_INIT_DONE 0x00001000 +#define __PSS_LMEM_RESET 0x00000200 +#define __PSS_LMEM_INIT_EN 0x00000100 +#define __PSS_LPU1_RESET 0x00000002 +#define __PSS_LPU0_RESET 0x00000001 +#define PSS_ERR_STATUS_REG 0x00018810 +#define __PSS_LPU1_TCM_READ_ERR 0x00200000 +#define __PSS_LPU0_TCM_READ_ERR 0x00100000 +#define __PSS_LMEM5_CORR_ERR 0x00080000 +#define __PSS_LMEM4_CORR_ERR 0x00040000 +#define __PSS_LMEM3_CORR_ERR 0x00020000 +#define __PSS_LMEM2_CORR_ERR 0x00010000 +#define __PSS_LMEM1_CORR_ERR 0x00008000 +#define __PSS_LMEM0_CORR_ERR 0x00004000 +#define __PSS_LMEM5_UNCORR_ERR 0x00002000 +#define __PSS_LMEM4_UNCORR_ERR 0x00001000 +#define __PSS_LMEM3_UNCORR_ERR 0x00000800 +#define __PSS_LMEM2_UNCORR_ERR 0x00000400 +#define __PSS_LMEM1_UNCORR_ERR 0x00000200 +#define __PSS_LMEM0_UNCORR_ERR 0x00000100 +#define __PSS_BAL_PERR 0x00000080 +#define __PSS_DIP_IF_ERR 0x00000040 +#define __PSS_IOH_IF_ERR 0x00000020 +#define __PSS_TDS_IF_ERR 0x00000010 +#define __PSS_RDS_IF_ERR 0x00000008 +#define __PSS_SGM_IF_ERR 0x00000004 +#define __PSS_LPU1_RAM_ERR 0x00000002 +#define __PSS_LPU0_RAM_ERR 0x00000001 +#define ERR_SET_REG 0x00018818 +#define __PSS_ERR_STATUS_SET 0x003fffff +#define PMM_1T_RESET_REG_P0 0x0002381c +#define __PMM_1T_RESET_P 0x00000001 +#define PMM_1T_RESET_REG_P1 0x00023c1c +#define HQM_QSET0_RXQ_DRBL_P0 0x00038000 +#define __RXQ0_ADD_VECTORS_P 0x80000000 +#define __RXQ0_STOP_P 0x40000000 +#define __RXQ0_PRD_PTR_P 0x0000ffff +#define HQM_QSET1_RXQ_DRBL_P0 0x00038080 +#define __RXQ1_ADD_VECTORS_P 0x80000000 +#define __RXQ1_STOP_P 0x40000000 +#define __RXQ1_PRD_PTR_P 0x0000ffff +#define HQM_QSET0_RXQ_DRBL_P1 0x0003c000 +#define HQM_QSET1_RXQ_DRBL_P1 0x0003c080 +#define HQM_QSET0_TXQ_DRBL_P0 0x00038020 +#define __TXQ0_ADD_VECTORS_P 0x80000000 +#define __TXQ0_STOP_P 0x40000000 +#define __TXQ0_PRD_PTR_P 0x0000ffff +#define HQM_QSET1_TXQ_DRBL_P0 0x000380a0 +#define __TXQ1_ADD_VECTORS_P 0x80000000 +#define __TXQ1_STOP_P 0x40000000 +#define __TXQ1_PRD_PTR_P 0x0000ffff +#define HQM_QSET0_TXQ_DRBL_P1 0x0003c020 +#define HQM_QSET1_TXQ_DRBL_P1 0x0003c0a0 +#define HQM_QSET0_IB_DRBL_1_P0 0x00038040 +#define __IB1_0_ACK_P 0x80000000 +#define __IB1_0_DISABLE_P 0x40000000 +#define __IB1_0_COALESCING_CFG_P_MK 0x00ff0000 +#define __IB1_0_COALESCING_CFG_P_SH 16 +#define __IB1_0_COALESCING_CFG_P(_v) ((_v) << __IB1_0_COALESCING_CFG_P_SH) +#define __IB1_0_NUM_OF_ACKED_EVENTS_P 0x0000ffff +#define HQM_QSET1_IB_DRBL_1_P0 0x000380c0 +#define __IB1_1_ACK_P 0x80000000 +#define __IB1_1_DISABLE_P 0x40000000 +#define __IB1_1_COALESCING_CFG_P_MK 0x00ff0000 +#define __IB1_1_COALESCING_CFG_P_SH 16 +#define __IB1_1_COALESCING_CFG_P(_v) ((_v) << __IB1_1_COALESCING_CFG_P_SH) +#define __IB1_1_NUM_OF_ACKED_EVENTS_P 0x0000ffff +#define HQM_QSET0_IB_DRBL_1_P1 0x0003c040 +#define HQM_QSET1_IB_DRBL_1_P1 0x0003c0c0 +#define HQM_QSET0_IB_DRBL_2_P0 0x00038060 +#define __IB2_0_ACK_P 0x80000000 +#define __IB2_0_DISABLE_P 0x40000000 +#define __IB2_0_COALESCING_CFG_P_MK 0x00ff0000 +#define __IB2_0_COALESCING_CFG_P_SH 16 +#define __IB2_0_COALESCING_CFG_P(_v) ((_v) << __IB2_0_COALESCING_CFG_P_SH) +#define __IB2_0_NUM_OF_ACKED_EVENTS_P 0x0000ffff +#define HQM_QSET1_IB_DRBL_2_P0 0x000380e0 +#define __IB2_1_ACK_P 0x80000000 +#define __IB2_1_DISABLE_P 0x40000000 +#define __IB2_1_COALESCING_CFG_P_MK 0x00ff0000 +#define __IB2_1_COALESCING_CFG_P_SH 16 +#define __IB2_1_COALESCING_CFG_P(_v) ((_v) << __IB2_1_COALESCING_CFG_P_SH) +#define __IB2_1_NUM_OF_ACKED_EVENTS_P 0x0000ffff +#define HQM_QSET0_IB_DRBL_2_P1 0x0003c060 +#define HQM_QSET1_IB_DRBL_2_P1 0x0003c0e0 + +/* + * These definitions are either in error/missing in spec. Its auto-generated + * from hard coded values in regparse.pl. + */ +#define __EMPHPOST_AT_4G_MK_FIX 0x0000001c +#define __EMPHPOST_AT_4G_SH_FIX 0x00000002 +#define __EMPHPRE_AT_4G_FIX 0x00000003 +#define __SFP_TXRATE_EN_FIX 0x00000100 +#define __SFP_RXRATE_EN_FIX 0x00000080 + +/* + * These register definitions are auto-generated from hard coded values + * in regparse.pl. + */ + +/* + * These register mapping definitions are auto-generated from mapping tables + * in regparse.pl. + */ +#define BFA_IOC0_HBEAT_REG HOST_SEM0_INFO_REG +#define BFA_IOC0_STATE_REG HOST_SEM1_INFO_REG +#define BFA_IOC1_HBEAT_REG HOST_SEM2_INFO_REG +#define BFA_IOC1_STATE_REG HOST_SEM3_INFO_REG +#define BFA_FW_USE_COUNT HOST_SEM4_INFO_REG + +#define CPE_DEPTH_Q(__n) \ + (CPE_DEPTH_Q0 + (__n) * (CPE_DEPTH_Q1 - CPE_DEPTH_Q0)) +#define CPE_QCTRL_Q(__n) \ + (CPE_QCTRL_Q0 + (__n) * (CPE_QCTRL_Q1 - CPE_QCTRL_Q0)) +#define CPE_PI_PTR_Q(__n) \ + (CPE_PI_PTR_Q0 + (__n) * (CPE_PI_PTR_Q1 - CPE_PI_PTR_Q0)) +#define CPE_CI_PTR_Q(__n) \ + (CPE_CI_PTR_Q0 + (__n) * (CPE_CI_PTR_Q1 - CPE_CI_PTR_Q0)) +#define RME_DEPTH_Q(__n) \ + (RME_DEPTH_Q0 + (__n) * (RME_DEPTH_Q1 - RME_DEPTH_Q0)) +#define RME_QCTRL_Q(__n) \ + (RME_QCTRL_Q0 + (__n) * (RME_QCTRL_Q1 - RME_QCTRL_Q0)) +#define RME_PI_PTR_Q(__n) \ + (RME_PI_PTR_Q0 + (__n) * (RME_PI_PTR_Q1 - RME_PI_PTR_Q0)) +#define RME_CI_PTR_Q(__n) \ + (RME_CI_PTR_Q0 + (__n) * (RME_CI_PTR_Q1 - RME_CI_PTR_Q0)) +#define HQM_QSET_RXQ_DRBL_P0(__n) (HQM_QSET0_RXQ_DRBL_P0 + (__n) \ + * (HQM_QSET1_RXQ_DRBL_P0 - HQM_QSET0_RXQ_DRBL_P0)) +#define HQM_QSET_TXQ_DRBL_P0(__n) (HQM_QSET0_TXQ_DRBL_P0 + (__n) \ + * (HQM_QSET1_TXQ_DRBL_P0 - HQM_QSET0_TXQ_DRBL_P0)) +#define HQM_QSET_IB_DRBL_1_P0(__n) (HQM_QSET0_IB_DRBL_1_P0 + (__n) \ + * (HQM_QSET1_IB_DRBL_1_P0 - HQM_QSET0_IB_DRBL_1_P0)) +#define HQM_QSET_IB_DRBL_2_P0(__n) (HQM_QSET0_IB_DRBL_2_P0 + (__n) \ + * (HQM_QSET1_IB_DRBL_2_P0 - HQM_QSET0_IB_DRBL_2_P0)) +#define HQM_QSET_RXQ_DRBL_P1(__n) (HQM_QSET0_RXQ_DRBL_P1 + (__n) \ + * (HQM_QSET1_RXQ_DRBL_P1 - HQM_QSET0_RXQ_DRBL_P1)) +#define HQM_QSET_TXQ_DRBL_P1(__n) (HQM_QSET0_TXQ_DRBL_P1 + (__n) \ + * (HQM_QSET1_TXQ_DRBL_P1 - HQM_QSET0_TXQ_DRBL_P1)) +#define HQM_QSET_IB_DRBL_1_P1(__n) (HQM_QSET0_IB_DRBL_1_P1 + (__n) \ + * (HQM_QSET1_IB_DRBL_1_P1 - HQM_QSET0_IB_DRBL_1_P1)) +#define HQM_QSET_IB_DRBL_2_P1(__n) (HQM_QSET0_IB_DRBL_2_P1 + (__n) \ + * (HQM_QSET1_IB_DRBL_2_P1 - HQM_QSET0_IB_DRBL_2_P1)) + +#define CPE_Q_NUM(__fn, __q) (((__fn) << 2) + (__q)) +#define RME_Q_NUM(__fn, __q) (((__fn) << 2) + (__q)) +#define CPE_Q_MASK(__q) ((__q) & 0x3) +#define RME_Q_MASK(__q) ((__q) & 0x3) + +/* + * PCI MSI-X vector defines + */ +enum { + BFA_MSIX_CPE_Q0 = 0, + BFA_MSIX_CPE_Q1 = 1, + BFA_MSIX_CPE_Q2 = 2, + BFA_MSIX_CPE_Q3 = 3, + BFA_MSIX_RME_Q0 = 4, + BFA_MSIX_RME_Q1 = 5, + BFA_MSIX_RME_Q2 = 6, + BFA_MSIX_RME_Q3 = 7, + BFA_MSIX_LPU_ERR = 8, + BFA_MSIX_CT_MAX = 9, +}; + +/* + * And corresponding host interrupt status bit field defines + */ +#define __HFN_INT_CPE_Q0 0x00000001U +#define __HFN_INT_CPE_Q1 0x00000002U +#define __HFN_INT_CPE_Q2 0x00000004U +#define __HFN_INT_CPE_Q3 0x00000008U +#define __HFN_INT_CPE_Q4 0x00000010U +#define __HFN_INT_CPE_Q5 0x00000020U +#define __HFN_INT_CPE_Q6 0x00000040U +#define __HFN_INT_CPE_Q7 0x00000080U +#define __HFN_INT_RME_Q0 0x00000100U +#define __HFN_INT_RME_Q1 0x00000200U +#define __HFN_INT_RME_Q2 0x00000400U +#define __HFN_INT_RME_Q3 0x00000800U +#define __HFN_INT_RME_Q4 0x00001000U +#define __HFN_INT_RME_Q5 0x00002000U +#define __HFN_INT_RME_Q6 0x00004000U +#define __HFN_INT_RME_Q7 0x00008000U +#define __HFN_INT_ERR_EMC 0x00010000U +#define __HFN_INT_ERR_LPU0 0x00020000U +#define __HFN_INT_ERR_LPU1 0x00040000U +#define __HFN_INT_ERR_PSS 0x00080000U +#define __HFN_INT_MBOX_LPU0 0x00100000U +#define __HFN_INT_MBOX_LPU1 0x00200000U +#define __HFN_INT_MBOX1_LPU0 0x00400000U +#define __HFN_INT_MBOX1_LPU1 0x00800000U +#define __HFN_INT_LL_HALT 0x01000000U +#define __HFN_INT_CPE_MASK 0x000000ffU +#define __HFN_INT_RME_MASK 0x0000ff00U + +/* + * catapult memory map. + */ +#define LL_PGN_HQM0 0x0096 +#define LL_PGN_HQM1 0x0097 +#define PSS_SMEM_PAGE_START 0x8000 +#define PSS_SMEM_PGNUM(_pg0, _ma) ((_pg0) + ((_ma) >> 15)) +#define PSS_SMEM_PGOFF(_ma) ((_ma) & 0x7fff) + +/* + * End of catapult memory map + */ + +#endif /* __BFI_CTREG_H__ */ diff --git a/drivers/net/bna/bfi_ll.h b/drivers/net/bna/bfi_ll.h new file mode 100644 index 000000000000..bee4d054066a --- /dev/null +++ b/drivers/net/bna/bfi_ll.h @@ -0,0 +1,438 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ +#ifndef __BFI_LL_H__ +#define __BFI_LL_H__ + +#include "bfi.h" + +#pragma pack(1) + +/** + * @brief + * "enums" for all LL mailbox messages other than IOC + */ +enum { + BFI_LL_H2I_MAC_UCAST_SET_REQ = 1, + BFI_LL_H2I_MAC_UCAST_ADD_REQ = 2, + BFI_LL_H2I_MAC_UCAST_DEL_REQ = 3, + + BFI_LL_H2I_MAC_MCAST_ADD_REQ = 4, + BFI_LL_H2I_MAC_MCAST_DEL_REQ = 5, + BFI_LL_H2I_MAC_MCAST_FILTER_REQ = 6, + BFI_LL_H2I_MAC_MCAST_DEL_ALL_REQ = 7, + + BFI_LL_H2I_PORT_ADMIN_REQ = 8, + BFI_LL_H2I_STATS_GET_REQ = 9, + BFI_LL_H2I_STATS_CLEAR_REQ = 10, + + BFI_LL_H2I_RXF_PROMISCUOUS_SET_REQ = 11, + BFI_LL_H2I_RXF_DEFAULT_SET_REQ = 12, + + BFI_LL_H2I_TXQ_STOP_REQ = 13, + BFI_LL_H2I_RXQ_STOP_REQ = 14, + + BFI_LL_H2I_DIAG_LOOPBACK_REQ = 15, + + BFI_LL_H2I_SET_PAUSE_REQ = 16, + BFI_LL_H2I_MTU_INFO_REQ = 17, + + BFI_LL_H2I_RX_REQ = 18, +} ; + +enum { + BFI_LL_I2H_MAC_UCAST_SET_RSP = BFA_I2HM(1), + BFI_LL_I2H_MAC_UCAST_ADD_RSP = BFA_I2HM(2), + BFI_LL_I2H_MAC_UCAST_DEL_RSP = BFA_I2HM(3), + + BFI_LL_I2H_MAC_MCAST_ADD_RSP = BFA_I2HM(4), + BFI_LL_I2H_MAC_MCAST_DEL_RSP = BFA_I2HM(5), + BFI_LL_I2H_MAC_MCAST_FILTER_RSP = BFA_I2HM(6), + BFI_LL_I2H_MAC_MCAST_DEL_ALL_RSP = BFA_I2HM(7), + + BFI_LL_I2H_PORT_ADMIN_RSP = BFA_I2HM(8), + BFI_LL_I2H_STATS_GET_RSP = BFA_I2HM(9), + BFI_LL_I2H_STATS_CLEAR_RSP = BFA_I2HM(10), + + BFI_LL_I2H_RXF_PROMISCUOUS_SET_RSP = BFA_I2HM(11), + BFI_LL_I2H_RXF_DEFAULT_SET_RSP = BFA_I2HM(12), + + BFI_LL_I2H_TXQ_STOP_RSP = BFA_I2HM(13), + BFI_LL_I2H_RXQ_STOP_RSP = BFA_I2HM(14), + + BFI_LL_I2H_DIAG_LOOPBACK_RSP = BFA_I2HM(15), + + BFI_LL_I2H_SET_PAUSE_RSP = BFA_I2HM(16), + + BFI_LL_I2H_MTU_INFO_RSP = BFA_I2HM(17), + BFI_LL_I2H_RX_RSP = BFA_I2HM(18), + + BFI_LL_I2H_LINK_DOWN_AEN = BFA_I2HM(19), + BFI_LL_I2H_LINK_UP_AEN = BFA_I2HM(20), + + BFI_LL_I2H_PORT_ENABLE_AEN = BFA_I2HM(21), + BFI_LL_I2H_PORT_DISABLE_AEN = BFA_I2HM(22), +} ; + +/** + * @brief bfi_ll_mac_addr_req is used by: + * BFI_LL_H2I_MAC_UCAST_SET_REQ + * BFI_LL_H2I_MAC_UCAST_ADD_REQ + * BFI_LL_H2I_MAC_UCAST_DEL_REQ + * BFI_LL_H2I_MAC_MCAST_ADD_REQ + * BFI_LL_H2I_MAC_MCAST_DEL_REQ + */ +struct bfi_ll_mac_addr_req { + struct bfi_mhdr mh; /*!< common msg header */ + u8 rxf_id; + u8 rsvd1[3]; + mac_t mac_addr; + u8 rsvd2[2]; +}; + +/** + * @brief bfi_ll_mcast_filter_req is used by: + * BFI_LL_H2I_MAC_MCAST_FILTER_REQ + */ +struct bfi_ll_mcast_filter_req { + struct bfi_mhdr mh; /*!< common msg header */ + u8 rxf_id; + u8 enable; + u8 rsvd[2]; +}; + +/** + * @brief bfi_ll_mcast_del_all is used by: + * BFI_LL_H2I_MAC_MCAST_DEL_ALL_REQ + */ +struct bfi_ll_mcast_del_all_req { + struct bfi_mhdr mh; /*!< common msg header */ + u8 rxf_id; + u8 rsvd[3]; +}; + +/** + * @brief bfi_ll_q_stop_req is used by: + * BFI_LL_H2I_TXQ_STOP_REQ + * BFI_LL_H2I_RXQ_STOP_REQ + */ +struct bfi_ll_q_stop_req { + struct bfi_mhdr mh; /*!< common msg header */ + u32 q_id_mask[2]; /* !< bit-mask for queue ids */ +}; + +/** + * @brief bfi_ll_stats_req is used by: + * BFI_LL_I2H_STATS_GET_REQ + * BFI_LL_I2H_STATS_CLEAR_REQ + */ +struct bfi_ll_stats_req { + struct bfi_mhdr mh; /*!< common msg header */ + u16 stats_mask; /* !< bit-mask for non-function statistics */ + u8 rsvd[2]; + u32 rxf_id_mask[2]; /* !< bit-mask for RxF Statistics */ + u32 txf_id_mask[2]; /* !< bit-mask for TxF Statistics */ + union bfi_addr_u host_buffer; /* !< where statistics are returned */ +}; + +/** + * @brief defines for "stats_mask" above. + */ +#define BFI_LL_STATS_MAC (1 << 0) /* !< MAC Statistics */ +#define BFI_LL_STATS_BPC (1 << 1) /* !< Pause Stats from BPC */ +#define BFI_LL_STATS_RAD (1 << 2) /* !< Rx Admission Statistics */ +#define BFI_LL_STATS_RX_FC (1 << 3) /* !< Rx FC Stats from RxA */ +#define BFI_LL_STATS_TX_FC (1 << 4) /* !< Tx FC Stats from TxA */ + +#define BFI_LL_STATS_ALL 0x1f + +/** + * @brief bfi_ll_port_admin_req + */ +struct bfi_ll_port_admin_req { + struct bfi_mhdr mh; /*!< common msg header */ + u8 up; + u8 rsvd[3]; +}; + +/** + * @brief bfi_ll_rxf_req is used by: + * BFI_LL_H2I_RXF_PROMISCUOUS_SET_REQ + * BFI_LL_H2I_RXF_DEFAULT_SET_REQ + */ +struct bfi_ll_rxf_req { + struct bfi_mhdr mh; /*!< common msg header */ + u8 rxf_id; + u8 enable; + u8 rsvd[2]; +}; + +/** + * @brief bfi_ll_rxf_multi_req is used by: + * BFI_LL_H2I_RX_REQ + */ +struct bfi_ll_rxf_multi_req { + struct bfi_mhdr mh; /*!< common msg header */ + u32 rxf_id_mask[2]; + u8 enable; + u8 rsvd[3]; +}; + +/** + * @brief enum for Loopback opmodes + */ +enum { + BFI_LL_DIAG_LB_OPMODE_EXT = 0, + BFI_LL_DIAG_LB_OPMODE_CBL = 1, +}; + +/** + * @brief bfi_ll_set_pause_req is used by: + * BFI_LL_H2I_SET_PAUSE_REQ + */ +struct bfi_ll_set_pause_req { + struct bfi_mhdr mh; + u8 tx_pause; /* 1 = enable, 0 = disable */ + u8 rx_pause; /* 1 = enable, 0 = disable */ + u8 rsvd[2]; +}; + +/** + * @brief bfi_ll_mtu_info_req is used by: + * BFI_LL_H2I_MTU_INFO_REQ + */ +struct bfi_ll_mtu_info_req { + struct bfi_mhdr mh; + u16 mtu; + u8 rsvd[2]; +}; + +/** + * @brief + * Response header format used by all responses + * For both responses and asynchronous notifications + */ +struct bfi_ll_rsp { + struct bfi_mhdr mh; /*!< common msg header */ + u8 error; + u8 rsvd[3]; +}; + +/** + * @brief bfi_ll_cee_aen is used by: + * BFI_LL_I2H_LINK_DOWN_AEN + * BFI_LL_I2H_LINK_UP_AEN + */ +struct bfi_ll_aen { + struct bfi_mhdr mh; /*!< common msg header */ + u32 reason; + u8 cee_linkup; + u8 prio_map; /*!< LL priority bit-map */ + u8 rsvd[2]; +}; + +/** + * @brief + * The following error codes can be returned + * by the mbox commands + */ +enum { + BFI_LL_CMD_OK = 0, + BFI_LL_CMD_FAIL = 1, + BFI_LL_CMD_DUP_ENTRY = 2, /* !< Duplicate entry in CAM */ + BFI_LL_CMD_CAM_FULL = 3, /* !< CAM is full */ + BFI_LL_CMD_NOT_OWNER = 4, /* !< Not permitted, b'cos not owner */ + BFI_LL_CMD_NOT_EXEC = 5, /* !< Was not sent to f/w at all */ + BFI_LL_CMD_WAITING = 6, /* !< Waiting for completion (VMware) */ + BFI_LL_CMD_PORT_DISABLED = 7, /* !< port in disabled state */ +} ; + +/* Statistics */ +#define BFI_LL_TXF_ID_MAX 64 +#define BFI_LL_RXF_ID_MAX 64 + +/* TxF Frame Statistics */ +struct bfi_ll_stats_txf { + u64 ucast_octets; + u64 ucast; + u64 ucast_vlan; + + u64 mcast_octets; + u64 mcast; + u64 mcast_vlan; + + u64 bcast_octets; + u64 bcast; + u64 bcast_vlan; + + u64 errors; + u64 filter_vlan; /* frames filtered due to VLAN */ + u64 filter_mac_sa; /* frames filtered due to SA check */ +}; + +/* RxF Frame Statistics */ +struct bfi_ll_stats_rxf { + u64 ucast_octets; + u64 ucast; + u64 ucast_vlan; + + u64 mcast_octets; + u64 mcast; + u64 mcast_vlan; + + u64 bcast_octets; + u64 bcast; + u64 bcast_vlan; + u64 frame_drops; +}; + +/* FC Tx Frame Statistics */ +struct bfi_ll_stats_fc_tx { + u64 txf_ucast_octets; + u64 txf_ucast; + u64 txf_ucast_vlan; + + u64 txf_mcast_octets; + u64 txf_mcast; + u64 txf_mcast_vlan; + + u64 txf_bcast_octets; + u64 txf_bcast; + u64 txf_bcast_vlan; + + u64 txf_parity_errors; + u64 txf_timeout; + u64 txf_fid_parity_errors; +}; + +/* FC Rx Frame Statistics */ +struct bfi_ll_stats_fc_rx { + u64 rxf_ucast_octets; + u64 rxf_ucast; + u64 rxf_ucast_vlan; + + u64 rxf_mcast_octets; + u64 rxf_mcast; + u64 rxf_mcast_vlan; + + u64 rxf_bcast_octets; + u64 rxf_bcast; + u64 rxf_bcast_vlan; +}; + +/* RAD Frame Statistics */ +struct bfi_ll_stats_rad { + u64 rx_frames; + u64 rx_octets; + u64 rx_vlan_frames; + + u64 rx_ucast; + u64 rx_ucast_octets; + u64 rx_ucast_vlan; + + u64 rx_mcast; + u64 rx_mcast_octets; + u64 rx_mcast_vlan; + + u64 rx_bcast; + u64 rx_bcast_octets; + u64 rx_bcast_vlan; + + u64 rx_drops; +}; + +/* BPC Tx Registers */ +struct bfi_ll_stats_bpc { + /* transmit stats */ + u64 tx_pause[8]; + u64 tx_zero_pause[8]; /*!< Pause cancellation */ + /*!timer_mod)) + +/* MBOX API for PORT, TX, RX */ +#define bna_mbox_qe_fill(_qe, _cmd, _cmd_len, _cbfn, _cbarg) \ +do { \ + memcpy(&((_qe)->cmd.msg[0]), (_cmd), (_cmd_len)); \ + (_qe)->cbfn = (_cbfn); \ + (_qe)->cbarg = (_cbarg); \ +} while (0) + +#define bna_is_small_rxq(rcb) ((rcb)->id == 1) + +#define BNA_MAC_IS_EQUAL(_mac1, _mac2) \ + (!memcmp((_mac1), (_mac2), sizeof(mac_t))) + +#define BNA_POWER_OF_2(x) (((x) & ((x) - 1)) == 0) + +#define BNA_TO_POWER_OF_2(x) \ +do { \ + int _shift = 0; \ + while ((x) && (x) != 1) { \ + (x) >>= 1; \ + _shift++; \ + } \ + (x) <<= _shift; \ +} while (0) + +#define BNA_TO_POWER_OF_2_HIGH(x) \ +do { \ + int n = 1; \ + while (n < (x)) \ + n <<= 1; \ + (x) = n; \ +} while (0) + +/* + * input : _addr-> os dma addr in host endian format, + * output : _bna_dma_addr-> pointer to hw dma addr + */ +#define BNA_SET_DMA_ADDR(_addr, _bna_dma_addr) \ +do { \ + u64 tmp_addr = \ + cpu_to_be64((u64)(_addr)); \ + (_bna_dma_addr)->msb = ((struct bna_dma_addr *)&tmp_addr)->msb; \ + (_bna_dma_addr)->lsb = ((struct bna_dma_addr *)&tmp_addr)->lsb; \ +} while (0) + +/* + * input : _bna_dma_addr-> pointer to hw dma addr + * output : _addr-> os dma addr in host endian format + */ +#define BNA_GET_DMA_ADDR(_bna_dma_addr, _addr) \ +do { \ + (_addr) = ((((u64)ntohl((_bna_dma_addr)->msb))) << 32) \ + | ((ntohl((_bna_dma_addr)->lsb) & 0xffffffff)); \ +} while (0) + +#define containing_rec(addr, type, field) \ + ((type *)((unsigned char *)(addr) - \ + (unsigned char *)(&((type *)0)->field))) + +#define BNA_TXQ_WI_NEEDED(_vectors) (((_vectors) + 3) >> 2) + +/* TxQ element is 64 bytes */ +#define BNA_TXQ_PAGE_INDEX_MAX (PAGE_SIZE >> 6) +#define BNA_TXQ_PAGE_INDEX_MAX_SHIFT (PAGE_SHIFT - 6) + +#define BNA_TXQ_QPGE_PTR_GET(_qe_idx, _qpt_ptr, _qe_ptr, _qe_ptr_range) \ +{ \ + unsigned int page_index; /* index within a page */ \ + void *page_addr; \ + page_index = (_qe_idx) & (BNA_TXQ_PAGE_INDEX_MAX - 1); \ + (_qe_ptr_range) = (BNA_TXQ_PAGE_INDEX_MAX - page_index); \ + page_addr = (_qpt_ptr)[((_qe_idx) >> BNA_TXQ_PAGE_INDEX_MAX_SHIFT)];\ + (_qe_ptr) = &((struct bna_txq_entry *)(page_addr))[page_index]; \ +} + +/* RxQ element is 8 bytes */ +#define BNA_RXQ_PAGE_INDEX_MAX (PAGE_SIZE >> 3) +#define BNA_RXQ_PAGE_INDEX_MAX_SHIFT (PAGE_SHIFT - 3) + +#define BNA_RXQ_QPGE_PTR_GET(_qe_idx, _qpt_ptr, _qe_ptr, _qe_ptr_range) \ +{ \ + unsigned int page_index; /* index within a page */ \ + void *page_addr; \ + page_index = (_qe_idx) & (BNA_RXQ_PAGE_INDEX_MAX - 1); \ + (_qe_ptr_range) = (BNA_RXQ_PAGE_INDEX_MAX - page_index); \ + page_addr = (_qpt_ptr)[((_qe_idx) >> \ + BNA_RXQ_PAGE_INDEX_MAX_SHIFT)]; \ + (_qe_ptr) = &((struct bna_rxq_entry *)(page_addr))[page_index]; \ +} + +/* CQ element is 16 bytes */ +#define BNA_CQ_PAGE_INDEX_MAX (PAGE_SIZE >> 4) +#define BNA_CQ_PAGE_INDEX_MAX_SHIFT (PAGE_SHIFT - 4) + +#define BNA_CQ_QPGE_PTR_GET(_qe_idx, _qpt_ptr, _qe_ptr, _qe_ptr_range) \ +{ \ + unsigned int page_index; /* index within a page */ \ + void *page_addr; \ + \ + page_index = (_qe_idx) & (BNA_CQ_PAGE_INDEX_MAX - 1); \ + (_qe_ptr_range) = (BNA_CQ_PAGE_INDEX_MAX - page_index); \ + page_addr = (_qpt_ptr)[((_qe_idx) >> \ + BNA_CQ_PAGE_INDEX_MAX_SHIFT)]; \ + (_qe_ptr) = &((struct bna_cq_entry *)(page_addr))[page_index];\ +} + +#define BNA_QE_INDX_2_PTR(_cast, _qe_idx, _q_base) \ + (&((_cast *)(_q_base))[(_qe_idx)]) + +#define BNA_QE_INDX_RANGE(_qe_idx, _q_depth) ((_q_depth) - (_qe_idx)) + +#define BNA_QE_INDX_ADD(_qe_idx, _qe_num, _q_depth) \ + ((_qe_idx) = ((_qe_idx) + (_qe_num)) & ((_q_depth) - 1)) + +#define BNA_Q_INDEX_CHANGE(_old_idx, _updated_idx, _q_depth) \ + (((_updated_idx) - (_old_idx)) & ((_q_depth) - 1)) + +#define BNA_QE_FREE_CNT(_q_ptr, _q_depth) \ + (((_q_ptr)->consumer_index - (_q_ptr)->producer_index - 1) & \ + ((_q_depth) - 1)) + +#define BNA_QE_IN_USE_CNT(_q_ptr, _q_depth) \ + ((((_q_ptr)->producer_index - (_q_ptr)->consumer_index)) & \ + (_q_depth - 1)) + +#define BNA_Q_GET_CI(_q_ptr) ((_q_ptr)->q.consumer_index) + +#define BNA_Q_GET_PI(_q_ptr) ((_q_ptr)->q.producer_index) + +#define BNA_Q_PI_ADD(_q_ptr, _num) \ + (_q_ptr)->q.producer_index = \ + (((_q_ptr)->q.producer_index + (_num)) & \ + ((_q_ptr)->q.q_depth - 1)) + +#define BNA_Q_CI_ADD(_q_ptr, _num) \ + (_q_ptr)->q.consumer_index = \ + (((_q_ptr)->q.consumer_index + (_num)) \ + & ((_q_ptr)->q.q_depth - 1)) + +#define BNA_Q_FREE_COUNT(_q_ptr) \ + (BNA_QE_FREE_CNT(&((_q_ptr)->q), (_q_ptr)->q.q_depth)) + +#define BNA_Q_IN_USE_COUNT(_q_ptr) \ + (BNA_QE_IN_USE_CNT(&(_q_ptr)->q, (_q_ptr)->q.q_depth)) + +/* These macros build the data portion of the TxQ/RxQ doorbell */ +#define BNA_DOORBELL_Q_PRD_IDX(_pi) (0x80000000 | (_pi)) +#define BNA_DOORBELL_Q_STOP (0x40000000) + +/* These macros build the data portion of the IB doorbell */ +#define BNA_DOORBELL_IB_INT_ACK(_timeout, _events) \ + (0x80000000 | ((_timeout) << 16) | (_events)) +#define BNA_DOORBELL_IB_INT_DISABLE (0x40000000) + +/* Set the coalescing timer for the given ib */ +#define bna_ib_coalescing_timer_set(_i_dbell, _cls_timer) \ + ((_i_dbell)->doorbell_ack = BNA_DOORBELL_IB_INT_ACK((_cls_timer), 0)); + +/* Acks 'events' # of events for a given ib */ +#define bna_ib_ack(_i_dbell, _events) \ + (writel(((_i_dbell)->doorbell_ack | (_events)), \ + (_i_dbell)->doorbell_addr)); + +#define bna_txq_prod_indx_doorbell(_tcb) \ + (writel(BNA_DOORBELL_Q_PRD_IDX((_tcb)->producer_index), \ + (_tcb)->q_dbell)); + +#define bna_rxq_prod_indx_doorbell(_rcb) \ + (writel(BNA_DOORBELL_Q_PRD_IDX((_rcb)->producer_index), \ + (_rcb)->q_dbell)); + +#define BNA_LARGE_PKT_SIZE 1000 + +#define BNA_UPDATE_PKT_CNT(_pkt, _len) \ +do { \ + if ((_len) > BNA_LARGE_PKT_SIZE) { \ + (_pkt)->large_pkt_cnt++; \ + } else { \ + (_pkt)->small_pkt_cnt++; \ + } \ +} while (0) + +#define call_rxf_stop_cbfn(rxf, status) \ + if ((rxf)->stop_cbfn) { \ + (*(rxf)->stop_cbfn)((rxf)->stop_cbarg, (status)); \ + (rxf)->stop_cbfn = NULL; \ + (rxf)->stop_cbarg = NULL; \ + } + +#define call_rxf_start_cbfn(rxf, status) \ + if ((rxf)->start_cbfn) { \ + (*(rxf)->start_cbfn)((rxf)->start_cbarg, (status)); \ + (rxf)->start_cbfn = NULL; \ + (rxf)->start_cbarg = NULL; \ + } + +#define call_rxf_cam_fltr_cbfn(rxf, status) \ + if ((rxf)->cam_fltr_cbfn) { \ + (*(rxf)->cam_fltr_cbfn)((rxf)->cam_fltr_cbarg, rxf->rx, \ + (status)); \ + (rxf)->cam_fltr_cbfn = NULL; \ + (rxf)->cam_fltr_cbarg = NULL; \ + } + +#define call_rxf_pause_cbfn(rxf, status) \ + if ((rxf)->oper_state_cbfn) { \ + (*(rxf)->oper_state_cbfn)((rxf)->oper_state_cbarg, rxf->rx,\ + (status)); \ + (rxf)->rxf_flags &= ~BNA_RXF_FL_OPERSTATE_CHANGED; \ + (rxf)->oper_state_cbfn = NULL; \ + (rxf)->oper_state_cbarg = NULL; \ + } + +#define call_rxf_resume_cbfn(rxf, status) call_rxf_pause_cbfn(rxf, status) + +#define is_xxx_enable(mode, bitmask, xxx) ((bitmask & xxx) && (mode & xxx)) + +#define is_xxx_disable(mode, bitmask, xxx) ((bitmask & xxx) && !(mode & xxx)) + +#define xxx_enable(mode, bitmask, xxx) \ +do { \ + bitmask |= xxx; \ + mode |= xxx; \ +} while (0) + +#define xxx_disable(mode, bitmask, xxx) \ +do { \ + bitmask |= xxx; \ + mode &= ~xxx; \ +} while (0) + +#define xxx_inactive(mode, bitmask, xxx) \ +do { \ + bitmask &= ~xxx; \ + mode &= ~xxx; \ +} while (0) + +#define is_promisc_enable(mode, bitmask) \ + is_xxx_enable(mode, bitmask, BNA_RXMODE_PROMISC) + +#define is_promisc_disable(mode, bitmask) \ + is_xxx_disable(mode, bitmask, BNA_RXMODE_PROMISC) + +#define promisc_enable(mode, bitmask) \ + xxx_enable(mode, bitmask, BNA_RXMODE_PROMISC) + +#define promisc_disable(mode, bitmask) \ + xxx_disable(mode, bitmask, BNA_RXMODE_PROMISC) + +#define promisc_inactive(mode, bitmask) \ + xxx_inactive(mode, bitmask, BNA_RXMODE_PROMISC) + +#define is_default_enable(mode, bitmask) \ + is_xxx_enable(mode, bitmask, BNA_RXMODE_DEFAULT) + +#define is_default_disable(mode, bitmask) \ + is_xxx_disable(mode, bitmask, BNA_RXMODE_DEFAULT) + +#define default_enable(mode, bitmask) \ + xxx_enable(mode, bitmask, BNA_RXMODE_DEFAULT) + +#define default_disable(mode, bitmask) \ + xxx_disable(mode, bitmask, BNA_RXMODE_DEFAULT) + +#define default_inactive(mode, bitmask) \ + xxx_inactive(mode, bitmask, BNA_RXMODE_DEFAULT) + +#define is_allmulti_enable(mode, bitmask) \ + is_xxx_enable(mode, bitmask, BNA_RXMODE_ALLMULTI) + +#define is_allmulti_disable(mode, bitmask) \ + is_xxx_disable(mode, bitmask, BNA_RXMODE_ALLMULTI) + +#define allmulti_enable(mode, bitmask) \ + xxx_enable(mode, bitmask, BNA_RXMODE_ALLMULTI) + +#define allmulti_disable(mode, bitmask) \ + xxx_disable(mode, bitmask, BNA_RXMODE_ALLMULTI) + +#define allmulti_inactive(mode, bitmask) \ + xxx_inactive(mode, bitmask, BNA_RXMODE_ALLMULTI) + +#define GET_RXQS(rxp, q0, q1) do { \ + switch ((rxp)->type) { \ + case BNA_RXP_SINGLE: \ + (q0) = rxp->rxq.single.only; \ + (q1) = NULL; \ + break; \ + case BNA_RXP_SLR: \ + (q0) = rxp->rxq.slr.large; \ + (q1) = rxp->rxq.slr.small; \ + break; \ + case BNA_RXP_HDS: \ + (q0) = rxp->rxq.hds.data; \ + (q1) = rxp->rxq.hds.hdr; \ + break; \ + } \ +} while (0) + +/** + * + * Function prototypes + * + */ + +/** + * BNA + */ + +/* Internal APIs */ +void bna_adv_res_req(struct bna_res_info *res_info); + +/* APIs for BNAD */ +void bna_res_req(struct bna_res_info *res_info); +void bna_init(struct bna *bna, struct bnad *bnad, + struct bfa_pcidev *pcidev, + struct bna_res_info *res_info); +void bna_uninit(struct bna *bna); +void bna_stats_get(struct bna *bna); +void bna_stats_clr(struct bna *bna); +void bna_get_perm_mac(struct bna *bna, u8 *mac); + +/* APIs for Rx */ +int bna_rit_mod_can_satisfy(struct bna_rit_mod *rit_mod, int seg_size); + +/* APIs for RxF */ +struct bna_mac *bna_ucam_mod_mac_get(struct bna_ucam_mod *ucam_mod); +void bna_ucam_mod_mac_put(struct bna_ucam_mod *ucam_mod, + struct bna_mac *mac); +struct bna_mac *bna_mcam_mod_mac_get(struct bna_mcam_mod *mcam_mod); +void bna_mcam_mod_mac_put(struct bna_mcam_mod *mcam_mod, + struct bna_mac *mac); +struct bna_rit_segment * +bna_rit_mod_seg_get(struct bna_rit_mod *rit_mod, int seg_size); +void bna_rit_mod_seg_put(struct bna_rit_mod *rit_mod, + struct bna_rit_segment *seg); + +/** + * DEVICE + */ + +/* Interanl APIs */ +void bna_adv_device_init(struct bna_device *device, struct bna *bna, + struct bna_res_info *res_info); + +/* APIs for BNA */ +void bna_device_init(struct bna_device *device, struct bna *bna, + struct bna_res_info *res_info); +void bna_device_uninit(struct bna_device *device); +void bna_device_cb_port_stopped(void *arg, enum bna_cb_status status); +int bna_device_status_get(struct bna_device *device); +int bna_device_state_get(struct bna_device *device); + +/* APIs for BNAD */ +void bna_device_enable(struct bna_device *device); +void bna_device_disable(struct bna_device *device, + enum bna_cleanup_type type); + +/** + * MBOX + */ + +/* APIs for DEVICE */ +void bna_mbox_mod_init(struct bna_mbox_mod *mbox_mod, struct bna *bna); +void bna_mbox_mod_uninit(struct bna_mbox_mod *mbox_mod); +void bna_mbox_mod_start(struct bna_mbox_mod *mbox_mod); +void bna_mbox_mod_stop(struct bna_mbox_mod *mbox_mod); + +/* APIs for PORT, TX, RX */ +void bna_mbox_handler(struct bna *bna, u32 intr_status); +void bna_mbox_send(struct bna *bna, struct bna_mbox_qe *mbox_qe); + +/** + * PORT + */ + +/* APIs for BNA */ +void bna_port_init(struct bna_port *port, struct bna *bna); +void bna_port_uninit(struct bna_port *port); +int bna_port_state_get(struct bna_port *port); +int bna_llport_state_get(struct bna_llport *llport); + +/* APIs for DEVICE */ +void bna_port_start(struct bna_port *port); +void bna_port_stop(struct bna_port *port); +void bna_port_fail(struct bna_port *port); + +/* API for RX */ +int bna_port_mtu_get(struct bna_port *port); +void bna_llport_admin_up(struct bna_llport *llport); +void bna_llport_admin_down(struct bna_llport *llport); + +/* API for BNAD */ +void bna_port_enable(struct bna_port *port); +void bna_port_disable(struct bna_port *port, enum bna_cleanup_type type, + void (*cbfn)(void *, enum bna_cb_status)); +void bna_port_pause_config(struct bna_port *port, + struct bna_pause_config *pause_config, + void (*cbfn)(struct bnad *, enum bna_cb_status)); +void bna_port_mtu_set(struct bna_port *port, int mtu, + void (*cbfn)(struct bnad *, enum bna_cb_status)); +void bna_port_mac_get(struct bna_port *port, mac_t *mac); +void bna_port_type_set(struct bna_port *port, enum bna_port_type type); +void bna_port_linkcbfn_set(struct bna_port *port, + void (*linkcbfn)(struct bnad *, + enum bna_link_status)); +void bna_port_admin_up(struct bna_port *port); +void bna_port_admin_down(struct bna_port *port); + +/* Callbacks for TX, RX */ +void bna_port_cb_tx_stopped(struct bna_port *port, + enum bna_cb_status status); +void bna_port_cb_rx_stopped(struct bna_port *port, + enum bna_cb_status status); + +/* Callbacks for MBOX */ +void bna_port_cb_link_up(struct bna_port *port, struct bfi_ll_aen *aen, + int status); +void bna_port_cb_link_down(struct bna_port *port, int status); + +/** + * IB + */ + +/* APIs for BNA */ +void bna_ib_mod_init(struct bna_ib_mod *ib_mod, struct bna *bna, + struct bna_res_info *res_info); +void bna_ib_mod_uninit(struct bna_ib_mod *ib_mod); + +/* APIs for TX, RX */ +struct bna_ib *bna_ib_get(struct bna_ib_mod *ib_mod, + enum bna_intr_type intr_type, int vector); +void bna_ib_put(struct bna_ib_mod *ib_mod, struct bna_ib *ib); +int bna_ib_reserve_idx(struct bna_ib *ib); +void bna_ib_release_idx(struct bna_ib *ib, int idx); +int bna_ib_config(struct bna_ib *ib, struct bna_ib_config *ib_config); +void bna_ib_start(struct bna_ib *ib); +void bna_ib_stop(struct bna_ib *ib); +void bna_ib_fail(struct bna_ib *ib); +void bna_ib_coalescing_timeo_set(struct bna_ib *ib, u8 coalescing_timeo); + +/** + * TX MODULE AND TX + */ + +/* Internal APIs */ +void bna_tx_prio_changed(struct bna_tx *tx, int prio); + +/* APIs for BNA */ +void bna_tx_mod_init(struct bna_tx_mod *tx_mod, struct bna *bna, + struct bna_res_info *res_info); +void bna_tx_mod_uninit(struct bna_tx_mod *tx_mod); +int bna_tx_state_get(struct bna_tx *tx); + +/* APIs for PORT */ +void bna_tx_mod_start(struct bna_tx_mod *tx_mod, enum bna_tx_type type); +void bna_tx_mod_stop(struct bna_tx_mod *tx_mod, enum bna_tx_type type); +void bna_tx_mod_fail(struct bna_tx_mod *tx_mod); +void bna_tx_mod_prio_changed(struct bna_tx_mod *tx_mod, int prio); +void bna_tx_mod_cee_link_status(struct bna_tx_mod *tx_mod, int cee_link); + +/* APIs for BNAD */ +void bna_tx_res_req(int num_txq, int txq_depth, + struct bna_res_info *res_info); +struct bna_tx *bna_tx_create(struct bna *bna, struct bnad *bnad, + struct bna_tx_config *tx_cfg, + struct bna_tx_event_cbfn *tx_cbfn, + struct bna_res_info *res_info, void *priv); +void bna_tx_destroy(struct bna_tx *tx); +void bna_tx_enable(struct bna_tx *tx); +void bna_tx_disable(struct bna_tx *tx, enum bna_cleanup_type type, + void (*cbfn)(void *, struct bna_tx *, + enum bna_cb_status)); +enum bna_cb_status +bna_tx_prio_set(struct bna_tx *tx, int prio, + void (*cbfn)(struct bnad *, struct bna_tx *, + enum bna_cb_status)); +void bna_tx_coalescing_timeo_set(struct bna_tx *tx, int coalescing_timeo); + +/** + * RX MODULE, RX, RXF + */ + +/* Internal APIs */ +void rxf_cb_cam_fltr_mbox_cmd(void *arg, int status); +void rxf_cam_mbox_cmd(struct bna_rxf *rxf, u8 cmd, + const struct bna_mac *mac_addr); +void __rxf_vlan_filter_set(struct bna_rxf *rxf, enum bna_status status); +void bna_rxf_adv_init(struct bna_rxf *rxf, + struct bna_rx *rx, + struct bna_rx_config *q_config); +int rxf_process_packet_filter_ucast(struct bna_rxf *rxf); +int rxf_process_packet_filter_promisc(struct bna_rxf *rxf); +int rxf_process_packet_filter_default(struct bna_rxf *rxf); +int rxf_process_packet_filter_allmulti(struct bna_rxf *rxf); +int rxf_clear_packet_filter_ucast(struct bna_rxf *rxf); +int rxf_clear_packet_filter_promisc(struct bna_rxf *rxf); +int rxf_clear_packet_filter_default(struct bna_rxf *rxf); +int rxf_clear_packet_filter_allmulti(struct bna_rxf *rxf); +void rxf_reset_packet_filter_ucast(struct bna_rxf *rxf); +void rxf_reset_packet_filter_promisc(struct bna_rxf *rxf); +void rxf_reset_packet_filter_default(struct bna_rxf *rxf); +void rxf_reset_packet_filter_allmulti(struct bna_rxf *rxf); + +/* APIs for BNA */ +void bna_rx_mod_init(struct bna_rx_mod *rx_mod, struct bna *bna, + struct bna_res_info *res_info); +void bna_rx_mod_uninit(struct bna_rx_mod *rx_mod); +int bna_rx_state_get(struct bna_rx *rx); +int bna_rxf_state_get(struct bna_rxf *rxf); + +/* APIs for PORT */ +void bna_rx_mod_start(struct bna_rx_mod *rx_mod, enum bna_rx_type type); +void bna_rx_mod_stop(struct bna_rx_mod *rx_mod, enum bna_rx_type type); +void bna_rx_mod_fail(struct bna_rx_mod *rx_mod); + +/* APIs for BNAD */ +void bna_rx_res_req(struct bna_rx_config *rx_config, + struct bna_res_info *res_info); +struct bna_rx *bna_rx_create(struct bna *bna, struct bnad *bnad, + struct bna_rx_config *rx_cfg, + struct bna_rx_event_cbfn *rx_cbfn, + struct bna_res_info *res_info, void *priv); +void bna_rx_destroy(struct bna_rx *rx); +void bna_rx_enable(struct bna_rx *rx); +void bna_rx_disable(struct bna_rx *rx, enum bna_cleanup_type type, + void (*cbfn)(void *, struct bna_rx *, + enum bna_cb_status)); +void bna_rx_coalescing_timeo_set(struct bna_rx *rx, int coalescing_timeo); +void bna_rx_dim_reconfig(struct bna *bna, u32 vector[][BNA_BIAS_T_MAX]); +void bna_rx_dim_update(struct bna_ccb *ccb); +enum bna_cb_status +bna_rx_ucast_set(struct bna_rx *rx, u8 *ucmac, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); +enum bna_cb_status +bna_rx_ucast_add(struct bna_rx *rx, u8* ucmac, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); +enum bna_cb_status +bna_rx_ucast_del(struct bna_rx *rx, u8 *ucmac, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); +enum bna_cb_status +bna_rx_mcast_add(struct bna_rx *rx, u8 *mcmac, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); +enum bna_cb_status +bna_rx_mcast_del(struct bna_rx *rx, u8 *mcmac, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); +enum bna_cb_status +bna_rx_mcast_listset(struct bna_rx *rx, int count, u8 *mcmac, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); +void bna_rx_mcast_delall(struct bna_rx *rx, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); +enum bna_cb_status +bna_rx_mode_set(struct bna_rx *rx, enum bna_rxmode rxmode, + enum bna_rxmode bitmask, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); +void bna_rx_vlan_add(struct bna_rx *rx, int vlan_id); +void bna_rx_vlan_del(struct bna_rx *rx, int vlan_id); +void bna_rx_vlanfilter_enable(struct bna_rx *rx); +void bna_rx_vlanfilter_disable(struct bna_rx *rx); +void bna_rx_rss_enable(struct bna_rx *rx); +void bna_rx_rss_disable(struct bna_rx *rx); +void bna_rx_rss_reconfig(struct bna_rx *rx, struct bna_rxf_rss *rss_config); +void bna_rx_rss_rit_set(struct bna_rx *rx, unsigned int *vectors, + int nvectors); +void bna_rx_hds_enable(struct bna_rx *rx, struct bna_rxf_hds *hds_config, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); +void bna_rx_hds_disable(struct bna_rx *rx, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); +void bna_rx_receive_pause(struct bna_rx *rx, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); +void bna_rx_receive_resume(struct bna_rx *rx, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); + +/* RxF APIs for RX */ +void bna_rxf_start(struct bna_rxf *rxf); +void bna_rxf_stop(struct bna_rxf *rxf); +void bna_rxf_fail(struct bna_rxf *rxf); +void bna_rxf_init(struct bna_rxf *rxf, struct bna_rx *rx, + struct bna_rx_config *q_config); +void bna_rxf_uninit(struct bna_rxf *rxf); + +/* Callback from RXF to RX */ +void bna_rx_cb_rxf_stopped(struct bna_rx *rx, enum bna_cb_status); +void bna_rx_cb_rxf_started(struct bna_rx *rx, enum bna_cb_status); + +/** + * BNAD + */ + +/* Callbacks for BNA */ +void bnad_cb_stats_get(struct bnad *bnad, enum bna_cb_status status, + struct bna_stats *stats); +void bnad_cb_stats_clr(struct bnad *bnad); + +/* Callbacks for DEVICE */ +void bnad_cb_device_enabled(struct bnad *bnad, enum bna_cb_status status); +void bnad_cb_device_disabled(struct bnad *bnad, enum bna_cb_status status); +void bnad_cb_device_enable_mbox_intr(struct bnad *bnad); +void bnad_cb_device_disable_mbox_intr(struct bnad *bnad); + +/* Callbacks for port */ +void bnad_cb_port_link_status(struct bnad *bnad, + enum bna_link_status status); + +#endif /* __BNA_H__ */ diff --git a/drivers/net/bna/bna_ctrl.c b/drivers/net/bna/bna_ctrl.c new file mode 100644 index 000000000000..9d41ebf41cf4 --- /dev/null +++ b/drivers/net/bna/bna_ctrl.c @@ -0,0 +1,3626 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ +#include "bna.h" +#include "bfa_sm.h" +#include "bfa_wc.h" + +/** + * MBOX + */ +static int +bna_is_aen(u8 msg_id) +{ + return (msg_id == BFI_LL_I2H_LINK_DOWN_AEN || + msg_id == BFI_LL_I2H_LINK_UP_AEN); +} + +static void +bna_mbox_aen_callback(struct bna *bna, struct bfi_mbmsg *msg) +{ + struct bfi_ll_aen *aen = (struct bfi_ll_aen *)(msg); + + switch (aen->mh.msg_id) { + case BFI_LL_I2H_LINK_UP_AEN: + bna_port_cb_link_up(&bna->port, aen, aen->reason); + break; + case BFI_LL_I2H_LINK_DOWN_AEN: + bna_port_cb_link_down(&bna->port, aen->reason); + break; + default: + break; + } +} + +static void +bna_ll_isr(void *llarg, struct bfi_mbmsg *msg) +{ + struct bna *bna = (struct bna *)(llarg); + struct bfi_ll_rsp *mb_rsp = (struct bfi_ll_rsp *)(msg); + struct bfi_mhdr *cmd_h, *rsp_h; + struct bna_mbox_qe *mb_qe = NULL; + int to_post = 0; + u8 aen = 0; + char message[BNA_MESSAGE_SIZE]; + + aen = bna_is_aen(mb_rsp->mh.msg_id); + + if (!aen) { + mb_qe = bfa_q_first(&bna->mbox_mod.posted_q); + cmd_h = (struct bfi_mhdr *)(&mb_qe->cmd.msg[0]); + rsp_h = (struct bfi_mhdr *)(&mb_rsp->mh); + + if ((BFA_I2HM(cmd_h->msg_id) == rsp_h->msg_id) && + (cmd_h->mtag.i2htok == rsp_h->mtag.i2htok)) { + /* Remove the request from posted_q, update state */ + list_del(&mb_qe->qe); + bna->mbox_mod.msg_pending--; + if (list_empty(&bna->mbox_mod.posted_q)) + bna->mbox_mod.state = BNA_MBOX_FREE; + else + to_post = 1; + + /* Dispatch the cbfn */ + if (mb_qe->cbfn) + mb_qe->cbfn(mb_qe->cbarg, mb_rsp->error); + + /* Post the next entry, if needed */ + if (to_post) { + mb_qe = bfa_q_first(&bna->mbox_mod.posted_q); + bfa_ioc_mbox_queue(&bna->device.ioc, + &mb_qe->cmd); + } + } else { + snprintf(message, BNA_MESSAGE_SIZE, + "No matching rsp for [%d:%d:%d]\n", + mb_rsp->mh.msg_class, mb_rsp->mh.msg_id, + mb_rsp->mh.mtag.i2htok); + pr_info("%s", message); + } + + } else + bna_mbox_aen_callback(bna, msg); +} + +void +bna_err_handler(struct bna *bna, u32 intr_status) +{ + u32 init_halt; + + if (intr_status & __HALT_STATUS_BITS) { + init_halt = readl(bna->device.ioc.ioc_regs.ll_halt); + init_halt &= ~__FW_INIT_HALT_P; + writel(init_halt, bna->device.ioc.ioc_regs.ll_halt); + } + + bfa_ioc_error_isr(&bna->device.ioc); +} + +void +bna_mbox_handler(struct bna *bna, u32 intr_status) +{ + if (BNA_IS_ERR_INTR(intr_status)) { + bna_err_handler(bna, intr_status); + return; + } + if (BNA_IS_MBOX_INTR(intr_status)) + bfa_ioc_mbox_isr(&bna->device.ioc); +} + +void +bna_mbox_send(struct bna *bna, struct bna_mbox_qe *mbox_qe) +{ + struct bfi_mhdr *mh; + + mh = (struct bfi_mhdr *)(&mbox_qe->cmd.msg[0]); + + mh->mtag.i2htok = htons(bna->mbox_mod.msg_ctr); + bna->mbox_mod.msg_ctr++; + bna->mbox_mod.msg_pending++; + if (bna->mbox_mod.state == BNA_MBOX_FREE) { + list_add_tail(&mbox_qe->qe, &bna->mbox_mod.posted_q); + bfa_ioc_mbox_queue(&bna->device.ioc, &mbox_qe->cmd); + bna->mbox_mod.state = BNA_MBOX_POSTED; + } else { + list_add_tail(&mbox_qe->qe, &bna->mbox_mod.posted_q); + } +} + +void +bna_mbox_flush_q(struct bna *bna, struct list_head *q) +{ + struct bna_mbox_qe *mb_qe = NULL; + struct bfi_mhdr *cmd_h; + struct list_head *mb_q; + void (*cbfn)(void *arg, int status); + void *cbarg; + + mb_q = &bna->mbox_mod.posted_q; + + while (!list_empty(mb_q)) { + bfa_q_deq(mb_q, &mb_qe); + cbfn = mb_qe->cbfn; + cbarg = mb_qe->cbarg; + bfa_q_qe_init(mb_qe); + bna->mbox_mod.msg_pending--; + + cmd_h = (struct bfi_mhdr *)(&mb_qe->cmd.msg[0]); + if (cbfn) + cbfn(cbarg, BNA_CB_NOT_EXEC); + } + + bna->mbox_mod.state = BNA_MBOX_FREE; +} + +void +bna_mbox_mod_start(struct bna_mbox_mod *mbox_mod) +{ +} + +void +bna_mbox_mod_stop(struct bna_mbox_mod *mbox_mod) +{ + bna_mbox_flush_q(mbox_mod->bna, &mbox_mod->posted_q); +} + +void +bna_mbox_mod_init(struct bna_mbox_mod *mbox_mod, struct bna *bna) +{ + bfa_ioc_mbox_regisr(&bna->device.ioc, BFI_MC_LL, bna_ll_isr, bna); + mbox_mod->state = BNA_MBOX_FREE; + mbox_mod->msg_ctr = mbox_mod->msg_pending = 0; + INIT_LIST_HEAD(&mbox_mod->posted_q); + mbox_mod->bna = bna; +} + +void +bna_mbox_mod_uninit(struct bna_mbox_mod *mbox_mod) +{ + mbox_mod->bna = NULL; +} + +/** + * LLPORT + */ +#define call_llport_stop_cbfn(llport, status)\ +do {\ + if ((llport)->stop_cbfn)\ + (llport)->stop_cbfn(&(llport)->bna->port, status);\ + (llport)->stop_cbfn = NULL;\ +} while (0) + +static void bna_fw_llport_up(struct bna_llport *llport); +static void bna_fw_cb_llport_up(void *arg, int status); +static void bna_fw_llport_down(struct bna_llport *llport); +static void bna_fw_cb_llport_down(void *arg, int status); +static void bna_llport_start(struct bna_llport *llport); +static void bna_llport_stop(struct bna_llport *llport); +static void bna_llport_fail(struct bna_llport *llport); + +enum bna_llport_event { + LLPORT_E_START = 1, + LLPORT_E_STOP = 2, + LLPORT_E_FAIL = 3, + LLPORT_E_UP = 4, + LLPORT_E_DOWN = 5, + LLPORT_E_FWRESP_UP = 6, + LLPORT_E_FWRESP_DOWN = 7 +}; + +enum bna_llport_state { + BNA_LLPORT_STOPPED = 1, + BNA_LLPORT_DOWN = 2, + BNA_LLPORT_UP_RESP_WAIT = 3, + BNA_LLPORT_DOWN_RESP_WAIT = 4, + BNA_LLPORT_UP = 5, + BNA_LLPORT_LAST_RESP_WAIT = 6 +}; + +bfa_fsm_state_decl(bna_llport, stopped, struct bna_llport, + enum bna_llport_event); +bfa_fsm_state_decl(bna_llport, down, struct bna_llport, + enum bna_llport_event); +bfa_fsm_state_decl(bna_llport, up_resp_wait, struct bna_llport, + enum bna_llport_event); +bfa_fsm_state_decl(bna_llport, down_resp_wait, struct bna_llport, + enum bna_llport_event); +bfa_fsm_state_decl(bna_llport, up, struct bna_llport, + enum bna_llport_event); +bfa_fsm_state_decl(bna_llport, last_resp_wait, struct bna_llport, + enum bna_llport_event); + +static struct bfa_sm_table llport_sm_table[] = { + {BFA_SM(bna_llport_sm_stopped), BNA_LLPORT_STOPPED}, + {BFA_SM(bna_llport_sm_down), BNA_LLPORT_DOWN}, + {BFA_SM(bna_llport_sm_up_resp_wait), BNA_LLPORT_UP_RESP_WAIT}, + {BFA_SM(bna_llport_sm_down_resp_wait), BNA_LLPORT_DOWN_RESP_WAIT}, + {BFA_SM(bna_llport_sm_up), BNA_LLPORT_UP}, + {BFA_SM(bna_llport_sm_last_resp_wait), BNA_LLPORT_LAST_RESP_WAIT} +}; + +static void +bna_llport_sm_stopped_entry(struct bna_llport *llport) +{ + llport->bna->port.link_cbfn((llport)->bna->bnad, BNA_LINK_DOWN); + call_llport_stop_cbfn(llport, BNA_CB_SUCCESS); +} + +static void +bna_llport_sm_stopped(struct bna_llport *llport, + enum bna_llport_event event) +{ + switch (event) { + case LLPORT_E_START: + bfa_fsm_set_state(llport, bna_llport_sm_down); + break; + + case LLPORT_E_STOP: + call_llport_stop_cbfn(llport, BNA_CB_SUCCESS); + break; + + case LLPORT_E_FAIL: + break; + + case LLPORT_E_DOWN: + /* This event is received due to Rx objects failing */ + /* No-op */ + break; + + case LLPORT_E_FWRESP_UP: + case LLPORT_E_FWRESP_DOWN: + /** + * These events are received due to flushing of mbox when + * device fails + */ + /* No-op */ + break; + + default: + bfa_sm_fault(llport->bna, event); + } +} + +static void +bna_llport_sm_down_entry(struct bna_llport *llport) +{ + bnad_cb_port_link_status((llport)->bna->bnad, BNA_LINK_DOWN); +} + +static void +bna_llport_sm_down(struct bna_llport *llport, + enum bna_llport_event event) +{ + switch (event) { + case LLPORT_E_STOP: + bfa_fsm_set_state(llport, bna_llport_sm_stopped); + break; + + case LLPORT_E_FAIL: + bfa_fsm_set_state(llport, bna_llport_sm_stopped); + break; + + case LLPORT_E_UP: + bfa_fsm_set_state(llport, bna_llport_sm_up_resp_wait); + bna_fw_llport_up(llport); + break; + + default: + bfa_sm_fault(llport->bna, event); + } +} + +static void +bna_llport_sm_up_resp_wait_entry(struct bna_llport *llport) +{ + /** + * NOTE: Do not call bna_fw_llport_up() here. That will over step + * mbox due to down_resp_wait -> up_resp_wait transition on event + * LLPORT_E_UP + */ +} + +static void +bna_llport_sm_up_resp_wait(struct bna_llport *llport, + enum bna_llport_event event) +{ + switch (event) { + case LLPORT_E_STOP: + bfa_fsm_set_state(llport, bna_llport_sm_last_resp_wait); + break; + + case LLPORT_E_FAIL: + bfa_fsm_set_state(llport, bna_llport_sm_stopped); + break; + + case LLPORT_E_DOWN: + bfa_fsm_set_state(llport, bna_llport_sm_down_resp_wait); + break; + + case LLPORT_E_FWRESP_UP: + bfa_fsm_set_state(llport, bna_llport_sm_up); + break; + + case LLPORT_E_FWRESP_DOWN: + /* down_resp_wait -> up_resp_wait transition on LLPORT_E_UP */ + bna_fw_llport_up(llport); + break; + + default: + bfa_sm_fault(llport->bna, event); + } +} + +static void +bna_llport_sm_down_resp_wait_entry(struct bna_llport *llport) +{ + /** + * NOTE: Do not call bna_fw_llport_down() here. That will over step + * mbox due to up_resp_wait -> down_resp_wait transition on event + * LLPORT_E_DOWN + */ +} + +static void +bna_llport_sm_down_resp_wait(struct bna_llport *llport, + enum bna_llport_event event) +{ + switch (event) { + case LLPORT_E_STOP: + bfa_fsm_set_state(llport, bna_llport_sm_last_resp_wait); + break; + + case LLPORT_E_FAIL: + bfa_fsm_set_state(llport, bna_llport_sm_stopped); + break; + + case LLPORT_E_UP: + bfa_fsm_set_state(llport, bna_llport_sm_up_resp_wait); + break; + + case LLPORT_E_FWRESP_UP: + /* up_resp_wait->down_resp_wait transition on LLPORT_E_DOWN */ + bna_fw_llport_down(llport); + break; + + case LLPORT_E_FWRESP_DOWN: + bfa_fsm_set_state(llport, bna_llport_sm_down); + break; + + default: + bfa_sm_fault(llport->bna, event); + } +} + +static void +bna_llport_sm_up_entry(struct bna_llport *llport) +{ +} + +static void +bna_llport_sm_up(struct bna_llport *llport, + enum bna_llport_event event) +{ + switch (event) { + case LLPORT_E_STOP: + bfa_fsm_set_state(llport, bna_llport_sm_last_resp_wait); + bna_fw_llport_down(llport); + break; + + case LLPORT_E_FAIL: + bfa_fsm_set_state(llport, bna_llport_sm_stopped); + break; + + case LLPORT_E_DOWN: + bfa_fsm_set_state(llport, bna_llport_sm_down_resp_wait); + bna_fw_llport_down(llport); + break; + + default: + bfa_sm_fault(llport->bna, event); + } +} + +static void +bna_llport_sm_last_resp_wait_entry(struct bna_llport *llport) +{ +} + +static void +bna_llport_sm_last_resp_wait(struct bna_llport *llport, + enum bna_llport_event event) +{ + switch (event) { + case LLPORT_E_FAIL: + bfa_fsm_set_state(llport, bna_llport_sm_stopped); + break; + + case LLPORT_E_DOWN: + /** + * This event is received due to Rx objects stopping in + * parallel to llport + */ + /* No-op */ + break; + + case LLPORT_E_FWRESP_UP: + /* up_resp_wait->last_resp_wait transition on LLPORT_T_STOP */ + bna_fw_llport_down(llport); + break; + + case LLPORT_E_FWRESP_DOWN: + bfa_fsm_set_state(llport, bna_llport_sm_stopped); + break; + + default: + bfa_sm_fault(llport->bna, event); + } +} + +static void +bna_fw_llport_admin_up(struct bna_llport *llport) +{ + struct bfi_ll_port_admin_req ll_req; + + memset(&ll_req, 0, sizeof(ll_req)); + ll_req.mh.msg_class = BFI_MC_LL; + ll_req.mh.msg_id = BFI_LL_H2I_PORT_ADMIN_REQ; + ll_req.mh.mtag.h2i.lpu_id = 0; + + ll_req.up = BNA_STATUS_T_ENABLED; + + bna_mbox_qe_fill(&llport->mbox_qe, &ll_req, sizeof(ll_req), + bna_fw_cb_llport_up, llport); + + bna_mbox_send(llport->bna, &llport->mbox_qe); +} + +static void +bna_fw_llport_up(struct bna_llport *llport) +{ + if (llport->type == BNA_PORT_T_REGULAR) + bna_fw_llport_admin_up(llport); +} + +static void +bna_fw_cb_llport_up(void *arg, int status) +{ + struct bna_llport *llport = (struct bna_llport *)arg; + + bfa_q_qe_init(&llport->mbox_qe.qe); + bfa_fsm_send_event(llport, LLPORT_E_FWRESP_UP); +} + +static void +bna_fw_llport_admin_down(struct bna_llport *llport) +{ + struct bfi_ll_port_admin_req ll_req; + + memset(&ll_req, 0, sizeof(ll_req)); + ll_req.mh.msg_class = BFI_MC_LL; + ll_req.mh.msg_id = BFI_LL_H2I_PORT_ADMIN_REQ; + ll_req.mh.mtag.h2i.lpu_id = 0; + + ll_req.up = BNA_STATUS_T_DISABLED; + + bna_mbox_qe_fill(&llport->mbox_qe, &ll_req, sizeof(ll_req), + bna_fw_cb_llport_down, llport); + + bna_mbox_send(llport->bna, &llport->mbox_qe); +} + +static void +bna_fw_llport_down(struct bna_llport *llport) +{ + if (llport->type == BNA_PORT_T_REGULAR) + bna_fw_llport_admin_down(llport); +} + +static void +bna_fw_cb_llport_down(void *arg, int status) +{ + struct bna_llport *llport = (struct bna_llport *)arg; + + bfa_q_qe_init(&llport->mbox_qe.qe); + bfa_fsm_send_event(llport, LLPORT_E_FWRESP_DOWN); +} + +void +bna_port_cb_llport_stopped(struct bna_port *port, + enum bna_cb_status status) +{ + bfa_wc_down(&port->chld_stop_wc); +} + +static void +bna_llport_init(struct bna_llport *llport, struct bna *bna) +{ + llport->flags |= BNA_LLPORT_F_ENABLED; + llport->type = BNA_PORT_T_REGULAR; + llport->bna = bna; + + llport->link_status = BNA_LINK_DOWN; + + llport->admin_up_count = 0; + + llport->stop_cbfn = NULL; + + bfa_q_qe_init(&llport->mbox_qe.qe); + + bfa_fsm_set_state(llport, bna_llport_sm_stopped); +} + +static void +bna_llport_uninit(struct bna_llport *llport) +{ + llport->flags &= ~BNA_LLPORT_F_ENABLED; + + llport->bna = NULL; +} + +static void +bna_llport_start(struct bna_llport *llport) +{ + bfa_fsm_send_event(llport, LLPORT_E_START); +} + +static void +bna_llport_stop(struct bna_llport *llport) +{ + llport->stop_cbfn = bna_port_cb_llport_stopped; + + bfa_fsm_send_event(llport, LLPORT_E_STOP); +} + +static void +bna_llport_fail(struct bna_llport *llport) +{ + bfa_fsm_send_event(llport, LLPORT_E_FAIL); +} + +int +bna_llport_state_get(struct bna_llport *llport) +{ + return bfa_sm_to_state(llport_sm_table, llport->fsm); +} + +void +bna_llport_admin_up(struct bna_llport *llport) +{ + llport->admin_up_count++; + + if (llport->admin_up_count == 1) { + llport->flags |= BNA_LLPORT_F_RX_ENABLED; + if (llport->flags & BNA_LLPORT_F_ENABLED) + bfa_fsm_send_event(llport, LLPORT_E_UP); + } +} + +void +bna_llport_admin_down(struct bna_llport *llport) +{ + llport->admin_up_count--; + + if (llport->admin_up_count == 0) { + llport->flags &= ~BNA_LLPORT_F_RX_ENABLED; + if (llport->flags & BNA_LLPORT_F_ENABLED) + bfa_fsm_send_event(llport, LLPORT_E_DOWN); + } +} + +/** + * PORT + */ +#define bna_port_chld_start(port)\ +do {\ + enum bna_tx_type tx_type = ((port)->type == BNA_PORT_T_REGULAR) ?\ + BNA_TX_T_REGULAR : BNA_TX_T_LOOPBACK;\ + enum bna_rx_type rx_type = ((port)->type == BNA_PORT_T_REGULAR) ?\ + BNA_RX_T_REGULAR : BNA_RX_T_LOOPBACK;\ + bna_llport_start(&(port)->llport);\ + bna_tx_mod_start(&(port)->bna->tx_mod, tx_type);\ + bna_rx_mod_start(&(port)->bna->rx_mod, rx_type);\ +} while (0) + +#define bna_port_chld_stop(port)\ +do {\ + enum bna_tx_type tx_type = ((port)->type == BNA_PORT_T_REGULAR) ?\ + BNA_TX_T_REGULAR : BNA_TX_T_LOOPBACK;\ + enum bna_rx_type rx_type = ((port)->type == BNA_PORT_T_REGULAR) ?\ + BNA_RX_T_REGULAR : BNA_RX_T_LOOPBACK;\ + bfa_wc_up(&(port)->chld_stop_wc);\ + bfa_wc_up(&(port)->chld_stop_wc);\ + bfa_wc_up(&(port)->chld_stop_wc);\ + bna_llport_stop(&(port)->llport);\ + bna_tx_mod_stop(&(port)->bna->tx_mod, tx_type);\ + bna_rx_mod_stop(&(port)->bna->rx_mod, rx_type);\ +} while (0) + +#define bna_port_chld_fail(port)\ +do {\ + bna_llport_fail(&(port)->llport);\ + bna_tx_mod_fail(&(port)->bna->tx_mod);\ + bna_rx_mod_fail(&(port)->bna->rx_mod);\ +} while (0) + +#define bna_port_rx_start(port)\ +do {\ + enum bna_rx_type rx_type = ((port)->type == BNA_PORT_T_REGULAR) ?\ + BNA_RX_T_REGULAR : BNA_RX_T_LOOPBACK;\ + bna_rx_mod_start(&(port)->bna->rx_mod, rx_type);\ +} while (0) + +#define bna_port_rx_stop(port)\ +do {\ + enum bna_rx_type rx_type = ((port)->type == BNA_PORT_T_REGULAR) ?\ + BNA_RX_T_REGULAR : BNA_RX_T_LOOPBACK;\ + bfa_wc_up(&(port)->chld_stop_wc);\ + bna_rx_mod_stop(&(port)->bna->rx_mod, rx_type);\ +} while (0) + +#define call_port_stop_cbfn(port, status)\ +do {\ + if ((port)->stop_cbfn)\ + (port)->stop_cbfn((port)->stop_cbarg, status);\ + (port)->stop_cbfn = NULL;\ + (port)->stop_cbarg = NULL;\ +} while (0) + +#define call_port_pause_cbfn(port, status)\ +do {\ + if ((port)->pause_cbfn)\ + (port)->pause_cbfn((port)->bna->bnad, status);\ + (port)->pause_cbfn = NULL;\ +} while (0) + +#define call_port_mtu_cbfn(port, status)\ +do {\ + if ((port)->mtu_cbfn)\ + (port)->mtu_cbfn((port)->bna->bnad, status);\ + (port)->mtu_cbfn = NULL;\ +} while (0) + +static void bna_fw_pause_set(struct bna_port *port); +static void bna_fw_cb_pause_set(void *arg, int status); +static void bna_fw_mtu_set(struct bna_port *port); +static void bna_fw_cb_mtu_set(void *arg, int status); + +enum bna_port_event { + PORT_E_START = 1, + PORT_E_STOP = 2, + PORT_E_FAIL = 3, + PORT_E_PAUSE_CFG = 4, + PORT_E_MTU_CFG = 5, + PORT_E_CHLD_STOPPED = 6, + PORT_E_FWRESP_PAUSE = 7, + PORT_E_FWRESP_MTU = 8 +}; + +enum bna_port_state { + BNA_PORT_STOPPED = 1, + BNA_PORT_MTU_INIT_WAIT = 2, + BNA_PORT_PAUSE_INIT_WAIT = 3, + BNA_PORT_LAST_RESP_WAIT = 4, + BNA_PORT_STARTED = 5, + BNA_PORT_PAUSE_CFG_WAIT = 6, + BNA_PORT_RX_STOP_WAIT = 7, + BNA_PORT_MTU_CFG_WAIT = 8, + BNA_PORT_CHLD_STOP_WAIT = 9 +}; + +bfa_fsm_state_decl(bna_port, stopped, struct bna_port, + enum bna_port_event); +bfa_fsm_state_decl(bna_port, mtu_init_wait, struct bna_port, + enum bna_port_event); +bfa_fsm_state_decl(bna_port, pause_init_wait, struct bna_port, + enum bna_port_event); +bfa_fsm_state_decl(bna_port, last_resp_wait, struct bna_port, + enum bna_port_event); +bfa_fsm_state_decl(bna_port, started, struct bna_port, + enum bna_port_event); +bfa_fsm_state_decl(bna_port, pause_cfg_wait, struct bna_port, + enum bna_port_event); +bfa_fsm_state_decl(bna_port, rx_stop_wait, struct bna_port, + enum bna_port_event); +bfa_fsm_state_decl(bna_port, mtu_cfg_wait, struct bna_port, + enum bna_port_event); +bfa_fsm_state_decl(bna_port, chld_stop_wait, struct bna_port, + enum bna_port_event); + +static struct bfa_sm_table port_sm_table[] = { + {BFA_SM(bna_port_sm_stopped), BNA_PORT_STOPPED}, + {BFA_SM(bna_port_sm_mtu_init_wait), BNA_PORT_MTU_INIT_WAIT}, + {BFA_SM(bna_port_sm_pause_init_wait), BNA_PORT_PAUSE_INIT_WAIT}, + {BFA_SM(bna_port_sm_last_resp_wait), BNA_PORT_LAST_RESP_WAIT}, + {BFA_SM(bna_port_sm_started), BNA_PORT_STARTED}, + {BFA_SM(bna_port_sm_pause_cfg_wait), BNA_PORT_PAUSE_CFG_WAIT}, + {BFA_SM(bna_port_sm_rx_stop_wait), BNA_PORT_RX_STOP_WAIT}, + {BFA_SM(bna_port_sm_mtu_cfg_wait), BNA_PORT_MTU_CFG_WAIT}, + {BFA_SM(bna_port_sm_chld_stop_wait), BNA_PORT_CHLD_STOP_WAIT} +}; + +static void +bna_port_sm_stopped_entry(struct bna_port *port) +{ + call_port_pause_cbfn(port, BNA_CB_SUCCESS); + call_port_mtu_cbfn(port, BNA_CB_SUCCESS); + call_port_stop_cbfn(port, BNA_CB_SUCCESS); +} + +static void +bna_port_sm_stopped(struct bna_port *port, enum bna_port_event event) +{ + switch (event) { + case PORT_E_START: + bfa_fsm_set_state(port, bna_port_sm_mtu_init_wait); + break; + + case PORT_E_STOP: + call_port_stop_cbfn(port, BNA_CB_SUCCESS); + break; + + case PORT_E_FAIL: + /* No-op */ + break; + + case PORT_E_PAUSE_CFG: + call_port_pause_cbfn(port, BNA_CB_SUCCESS); + break; + + case PORT_E_MTU_CFG: + call_port_mtu_cbfn(port, BNA_CB_SUCCESS); + break; + + case PORT_E_CHLD_STOPPED: + /** + * This event is received due to LLPort, Tx and Rx objects + * failing + */ + /* No-op */ + break; + + case PORT_E_FWRESP_PAUSE: + case PORT_E_FWRESP_MTU: + /** + * These events are received due to flushing of mbox when + * device fails + */ + /* No-op */ + break; + + default: + bfa_sm_fault(port->bna, event); + } +} + +static void +bna_port_sm_mtu_init_wait_entry(struct bna_port *port) +{ + bna_fw_mtu_set(port); +} + +static void +bna_port_sm_mtu_init_wait(struct bna_port *port, enum bna_port_event event) +{ + switch (event) { + case PORT_E_STOP: + bfa_fsm_set_state(port, bna_port_sm_last_resp_wait); + break; + + case PORT_E_FAIL: + bfa_fsm_set_state(port, bna_port_sm_stopped); + break; + + case PORT_E_PAUSE_CFG: + /* No-op */ + break; + + case PORT_E_MTU_CFG: + port->flags |= BNA_PORT_F_MTU_CHANGED; + break; + + case PORT_E_FWRESP_MTU: + if (port->flags & BNA_PORT_F_MTU_CHANGED) { + port->flags &= ~BNA_PORT_F_MTU_CHANGED; + bna_fw_mtu_set(port); + } else { + bfa_fsm_set_state(port, bna_port_sm_pause_init_wait); + } + break; + + default: + bfa_sm_fault(port->bna, event); + } +} + +static void +bna_port_sm_pause_init_wait_entry(struct bna_port *port) +{ + bna_fw_pause_set(port); +} + +static void +bna_port_sm_pause_init_wait(struct bna_port *port, + enum bna_port_event event) +{ + switch (event) { + case PORT_E_STOP: + bfa_fsm_set_state(port, bna_port_sm_last_resp_wait); + break; + + case PORT_E_FAIL: + bfa_fsm_set_state(port, bna_port_sm_stopped); + break; + + case PORT_E_PAUSE_CFG: + port->flags |= BNA_PORT_F_PAUSE_CHANGED; + break; + + case PORT_E_MTU_CFG: + port->flags |= BNA_PORT_F_MTU_CHANGED; + break; + + case PORT_E_FWRESP_PAUSE: + if (port->flags & BNA_PORT_F_PAUSE_CHANGED) { + port->flags &= ~BNA_PORT_F_PAUSE_CHANGED; + bna_fw_pause_set(port); + } else if (port->flags & BNA_PORT_F_MTU_CHANGED) { + port->flags &= ~BNA_PORT_F_MTU_CHANGED; + bfa_fsm_set_state(port, bna_port_sm_mtu_init_wait); + } else { + bfa_fsm_set_state(port, bna_port_sm_started); + bna_port_chld_start(port); + } + break; + + default: + bfa_sm_fault(port->bna, event); + } +} + +static void +bna_port_sm_last_resp_wait_entry(struct bna_port *port) +{ +} + +static void +bna_port_sm_last_resp_wait(struct bna_port *port, + enum bna_port_event event) +{ + switch (event) { + case PORT_E_FAIL: + case PORT_E_FWRESP_PAUSE: + case PORT_E_FWRESP_MTU: + bfa_fsm_set_state(port, bna_port_sm_stopped); + break; + + default: + bfa_sm_fault(port->bna, event); + } +} + +static void +bna_port_sm_started_entry(struct bna_port *port) +{ + /** + * NOTE: Do not call bna_port_chld_start() here, since it will be + * inadvertently called during pause_cfg_wait->started transition + * as well + */ + call_port_pause_cbfn(port, BNA_CB_SUCCESS); + call_port_mtu_cbfn(port, BNA_CB_SUCCESS); +} + +static void +bna_port_sm_started(struct bna_port *port, + enum bna_port_event event) +{ + switch (event) { + case PORT_E_STOP: + bfa_fsm_set_state(port, bna_port_sm_chld_stop_wait); + break; + + case PORT_E_FAIL: + bfa_fsm_set_state(port, bna_port_sm_stopped); + bna_port_chld_fail(port); + break; + + case PORT_E_PAUSE_CFG: + bfa_fsm_set_state(port, bna_port_sm_pause_cfg_wait); + break; + + case PORT_E_MTU_CFG: + bfa_fsm_set_state(port, bna_port_sm_rx_stop_wait); + break; + + default: + bfa_sm_fault(port->bna, event); + } +} + +static void +bna_port_sm_pause_cfg_wait_entry(struct bna_port *port) +{ + bna_fw_pause_set(port); +} + +static void +bna_port_sm_pause_cfg_wait(struct bna_port *port, + enum bna_port_event event) +{ + switch (event) { + case PORT_E_FAIL: + bfa_fsm_set_state(port, bna_port_sm_stopped); + bna_port_chld_fail(port); + break; + + case PORT_E_FWRESP_PAUSE: + bfa_fsm_set_state(port, bna_port_sm_started); + break; + + default: + bfa_sm_fault(port->bna, event); + } +} + +static void +bna_port_sm_rx_stop_wait_entry(struct bna_port *port) +{ + bna_port_rx_stop(port); +} + +static void +bna_port_sm_rx_stop_wait(struct bna_port *port, + enum bna_port_event event) +{ + switch (event) { + case PORT_E_FAIL: + bfa_fsm_set_state(port, bna_port_sm_stopped); + bna_port_chld_fail(port); + break; + + case PORT_E_CHLD_STOPPED: + bfa_fsm_set_state(port, bna_port_sm_mtu_cfg_wait); + break; + + default: + bfa_sm_fault(port->bna, event); + } +} + +static void +bna_port_sm_mtu_cfg_wait_entry(struct bna_port *port) +{ + bna_fw_mtu_set(port); +} + +static void +bna_port_sm_mtu_cfg_wait(struct bna_port *port, enum bna_port_event event) +{ + switch (event) { + case PORT_E_FAIL: + bfa_fsm_set_state(port, bna_port_sm_stopped); + bna_port_chld_fail(port); + break; + + case PORT_E_FWRESP_MTU: + bfa_fsm_set_state(port, bna_port_sm_started); + bna_port_rx_start(port); + break; + + default: + bfa_sm_fault(port->bna, event); + } +} + +static void +bna_port_sm_chld_stop_wait_entry(struct bna_port *port) +{ + bna_port_chld_stop(port); +} + +static void +bna_port_sm_chld_stop_wait(struct bna_port *port, + enum bna_port_event event) +{ + switch (event) { + case PORT_E_FAIL: + bfa_fsm_set_state(port, bna_port_sm_stopped); + bna_port_chld_fail(port); + break; + + case PORT_E_CHLD_STOPPED: + bfa_fsm_set_state(port, bna_port_sm_stopped); + break; + + default: + bfa_sm_fault(port->bna, event); + } +} + +static void +bna_fw_pause_set(struct bna_port *port) +{ + struct bfi_ll_set_pause_req ll_req; + + memset(&ll_req, 0, sizeof(ll_req)); + ll_req.mh.msg_class = BFI_MC_LL; + ll_req.mh.msg_id = BFI_LL_H2I_SET_PAUSE_REQ; + ll_req.mh.mtag.h2i.lpu_id = 0; + + ll_req.tx_pause = port->pause_config.tx_pause; + ll_req.rx_pause = port->pause_config.rx_pause; + + bna_mbox_qe_fill(&port->mbox_qe, &ll_req, sizeof(ll_req), + bna_fw_cb_pause_set, port); + + bna_mbox_send(port->bna, &port->mbox_qe); +} + +static void +bna_fw_cb_pause_set(void *arg, int status) +{ + struct bna_port *port = (struct bna_port *)arg; + + bfa_q_qe_init(&port->mbox_qe.qe); + bfa_fsm_send_event(port, PORT_E_FWRESP_PAUSE); +} + +void +bna_fw_mtu_set(struct bna_port *port) +{ + struct bfi_ll_mtu_info_req ll_req; + + bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_MTU_INFO_REQ, 0); + ll_req.mtu = htons((u16)port->mtu); + + bna_mbox_qe_fill(&port->mbox_qe, &ll_req, sizeof(ll_req), + bna_fw_cb_mtu_set, port); + bna_mbox_send(port->bna, &port->mbox_qe); +} + +void +bna_fw_cb_mtu_set(void *arg, int status) +{ + struct bna_port *port = (struct bna_port *)arg; + + bfa_q_qe_init(&port->mbox_qe.qe); + bfa_fsm_send_event(port, PORT_E_FWRESP_MTU); +} + +static void +bna_port_cb_chld_stopped(void *arg) +{ + struct bna_port *port = (struct bna_port *)arg; + + bfa_fsm_send_event(port, PORT_E_CHLD_STOPPED); +} + +void +bna_port_init(struct bna_port *port, struct bna *bna) +{ + port->bna = bna; + port->flags = 0; + port->mtu = 0; + port->type = BNA_PORT_T_REGULAR; + + port->link_cbfn = bnad_cb_port_link_status; + + port->chld_stop_wc.wc_resume = bna_port_cb_chld_stopped; + port->chld_stop_wc.wc_cbarg = port; + port->chld_stop_wc.wc_count = 0; + + port->stop_cbfn = NULL; + port->stop_cbarg = NULL; + + port->pause_cbfn = NULL; + + port->mtu_cbfn = NULL; + + bfa_q_qe_init(&port->mbox_qe.qe); + + bfa_fsm_set_state(port, bna_port_sm_stopped); + + bna_llport_init(&port->llport, bna); +} + +void +bna_port_uninit(struct bna_port *port) +{ + bna_llport_uninit(&port->llport); + + port->flags = 0; + + port->bna = NULL; +} + +int +bna_port_state_get(struct bna_port *port) +{ + return bfa_sm_to_state(port_sm_table, port->fsm); +} + +void +bna_port_start(struct bna_port *port) +{ + port->flags |= BNA_PORT_F_DEVICE_READY; + if (port->flags & BNA_PORT_F_ENABLED) + bfa_fsm_send_event(port, PORT_E_START); +} + +void +bna_port_stop(struct bna_port *port) +{ + port->stop_cbfn = bna_device_cb_port_stopped; + port->stop_cbarg = &port->bna->device; + + port->flags &= ~BNA_PORT_F_DEVICE_READY; + bfa_fsm_send_event(port, PORT_E_STOP); +} + +void +bna_port_fail(struct bna_port *port) +{ + port->flags &= ~BNA_PORT_F_DEVICE_READY; + bfa_fsm_send_event(port, PORT_E_FAIL); +} + +void +bna_port_cb_tx_stopped(struct bna_port *port, enum bna_cb_status status) +{ + bfa_wc_down(&port->chld_stop_wc); +} + +void +bna_port_cb_rx_stopped(struct bna_port *port, enum bna_cb_status status) +{ + bfa_wc_down(&port->chld_stop_wc); +} + +void +bna_port_cb_link_up(struct bna_port *port, struct bfi_ll_aen *aen, + int status) +{ + int i; + u8 prio_map; + + port->llport.link_status = BNA_LINK_UP; + if (aen->cee_linkup) + port->llport.link_status = BNA_CEE_UP; + + /* Compute the priority */ + prio_map = aen->prio_map; + if (prio_map) { + for (i = 0; i < 8; i++) { + if ((prio_map >> i) & 0x1) + break; + } + port->priority = i; + } else + port->priority = 0; + + /* Dispatch events */ + bna_tx_mod_cee_link_status(&port->bna->tx_mod, aen->cee_linkup); + bna_tx_mod_prio_changed(&port->bna->tx_mod, port->priority); + port->link_cbfn(port->bna->bnad, port->llport.link_status); +} + +void +bna_port_cb_link_down(struct bna_port *port, int status) +{ + port->llport.link_status = BNA_LINK_DOWN; + + /* Dispatch events */ + bna_tx_mod_cee_link_status(&port->bna->tx_mod, BNA_LINK_DOWN); + port->link_cbfn(port->bna->bnad, BNA_LINK_DOWN); +} + +int +bna_port_mtu_get(struct bna_port *port) +{ + return port->mtu; +} + +void +bna_port_enable(struct bna_port *port) +{ + if (port->fsm != (bfa_sm_t)bna_port_sm_stopped) + return; + + port->flags |= BNA_PORT_F_ENABLED; + + if (port->flags & BNA_PORT_F_DEVICE_READY) + bfa_fsm_send_event(port, PORT_E_START); +} + +void +bna_port_disable(struct bna_port *port, enum bna_cleanup_type type, + void (*cbfn)(void *, enum bna_cb_status)) +{ + if (type == BNA_SOFT_CLEANUP) { + (*cbfn)(port->bna->bnad, BNA_CB_SUCCESS); + return; + } + + port->stop_cbfn = cbfn; + port->stop_cbarg = port->bna->bnad; + + port->flags &= ~BNA_PORT_F_ENABLED; + + bfa_fsm_send_event(port, PORT_E_STOP); +} + +void +bna_port_pause_config(struct bna_port *port, + struct bna_pause_config *pause_config, + void (*cbfn)(struct bnad *, enum bna_cb_status)) +{ + port->pause_config = *pause_config; + + port->pause_cbfn = cbfn; + + bfa_fsm_send_event(port, PORT_E_PAUSE_CFG); +} + +void +bna_port_mtu_set(struct bna_port *port, int mtu, + void (*cbfn)(struct bnad *, enum bna_cb_status)) +{ + port->mtu = mtu; + + port->mtu_cbfn = cbfn; + + bfa_fsm_send_event(port, PORT_E_MTU_CFG); +} + +void +bna_port_mac_get(struct bna_port *port, mac_t *mac) +{ + *mac = bfa_ioc_get_mac(&port->bna->device.ioc); +} + +/** + * Should be called only when port is disabled + */ +void +bna_port_type_set(struct bna_port *port, enum bna_port_type type) +{ + port->type = type; + port->llport.type = type; +} + +/** + * Should be called only when port is disabled + */ +void +bna_port_linkcbfn_set(struct bna_port *port, + void (*linkcbfn)(struct bnad *, enum bna_link_status)) +{ + port->link_cbfn = linkcbfn; +} + +void +bna_port_admin_up(struct bna_port *port) +{ + struct bna_llport *llport = &port->llport; + + if (llport->flags & BNA_LLPORT_F_ENABLED) + return; + + llport->flags |= BNA_LLPORT_F_ENABLED; + + if (llport->flags & BNA_LLPORT_F_RX_ENABLED) + bfa_fsm_send_event(llport, LLPORT_E_UP); +} + +void +bna_port_admin_down(struct bna_port *port) +{ + struct bna_llport *llport = &port->llport; + + if (!(llport->flags & BNA_LLPORT_F_ENABLED)) + return; + + llport->flags &= ~BNA_LLPORT_F_ENABLED; + + if (llport->flags & BNA_LLPORT_F_RX_ENABLED) + bfa_fsm_send_event(llport, LLPORT_E_DOWN); +} + +/** + * DEVICE + */ +#define enable_mbox_intr(_device)\ +do {\ + u32 intr_status;\ + bna_intr_status_get((_device)->bna, intr_status);\ + bnad_cb_device_enable_mbox_intr((_device)->bna->bnad);\ + bna_mbox_intr_enable((_device)->bna);\ +} while (0) + +#define disable_mbox_intr(_device)\ +do {\ + bna_mbox_intr_disable((_device)->bna);\ + bnad_cb_device_disable_mbox_intr((_device)->bna->bnad);\ +} while (0) + +const struct bna_chip_regs_offset reg_offset[] = +{{HOST_PAGE_NUM_FN0, HOSTFN0_INT_STATUS, + HOSTFN0_INT_MASK, HOST_MSIX_ERR_INDEX_FN0}, +{HOST_PAGE_NUM_FN1, HOSTFN1_INT_STATUS, + HOSTFN1_INT_MASK, HOST_MSIX_ERR_INDEX_FN1}, +{HOST_PAGE_NUM_FN2, HOSTFN2_INT_STATUS, + HOSTFN2_INT_MASK, HOST_MSIX_ERR_INDEX_FN2}, +{HOST_PAGE_NUM_FN3, HOSTFN3_INT_STATUS, + HOSTFN3_INT_MASK, HOST_MSIX_ERR_INDEX_FN3}, +}; + +enum bna_device_event { + DEVICE_E_ENABLE = 1, + DEVICE_E_DISABLE = 2, + DEVICE_E_IOC_READY = 3, + DEVICE_E_IOC_FAILED = 4, + DEVICE_E_IOC_DISABLED = 5, + DEVICE_E_IOC_RESET = 6, + DEVICE_E_PORT_STOPPED = 7, +}; + +enum bna_device_state { + BNA_DEVICE_STOPPED = 1, + BNA_DEVICE_IOC_READY_WAIT = 2, + BNA_DEVICE_READY = 3, + BNA_DEVICE_PORT_STOP_WAIT = 4, + BNA_DEVICE_IOC_DISABLE_WAIT = 5, + BNA_DEVICE_FAILED = 6 +}; + +bfa_fsm_state_decl(bna_device, stopped, struct bna_device, + enum bna_device_event); +bfa_fsm_state_decl(bna_device, ioc_ready_wait, struct bna_device, + enum bna_device_event); +bfa_fsm_state_decl(bna_device, ready, struct bna_device, + enum bna_device_event); +bfa_fsm_state_decl(bna_device, port_stop_wait, struct bna_device, + enum bna_device_event); +bfa_fsm_state_decl(bna_device, ioc_disable_wait, struct bna_device, + enum bna_device_event); +bfa_fsm_state_decl(bna_device, failed, struct bna_device, + enum bna_device_event); + +static struct bfa_sm_table device_sm_table[] = { + {BFA_SM(bna_device_sm_stopped), BNA_DEVICE_STOPPED}, + {BFA_SM(bna_device_sm_ioc_ready_wait), BNA_DEVICE_IOC_READY_WAIT}, + {BFA_SM(bna_device_sm_ready), BNA_DEVICE_READY}, + {BFA_SM(bna_device_sm_port_stop_wait), BNA_DEVICE_PORT_STOP_WAIT}, + {BFA_SM(bna_device_sm_ioc_disable_wait), BNA_DEVICE_IOC_DISABLE_WAIT}, + {BFA_SM(bna_device_sm_failed), BNA_DEVICE_FAILED}, +}; + +static void +bna_device_sm_stopped_entry(struct bna_device *device) +{ + if (device->stop_cbfn) + device->stop_cbfn(device->stop_cbarg, BNA_CB_SUCCESS); + + device->stop_cbfn = NULL; + device->stop_cbarg = NULL; +} + +static void +bna_device_sm_stopped(struct bna_device *device, + enum bna_device_event event) +{ + switch (event) { + case DEVICE_E_ENABLE: + if (device->intr_type == BNA_INTR_T_MSIX) + bna_mbox_msix_idx_set(device); + bfa_ioc_enable(&device->ioc); + bfa_fsm_set_state(device, bna_device_sm_ioc_ready_wait); + break; + + case DEVICE_E_DISABLE: + bfa_fsm_set_state(device, bna_device_sm_stopped); + break; + + case DEVICE_E_IOC_RESET: + enable_mbox_intr(device); + break; + + case DEVICE_E_IOC_FAILED: + bfa_fsm_set_state(device, bna_device_sm_failed); + break; + + default: + bfa_sm_fault(device->bna, event); + } +} + +static void +bna_device_sm_ioc_ready_wait_entry(struct bna_device *device) +{ + /** + * Do not call bfa_ioc_enable() here. It must be called in the + * previous state due to failed -> ioc_ready_wait transition. + */ +} + +static void +bna_device_sm_ioc_ready_wait(struct bna_device *device, + enum bna_device_event event) +{ + switch (event) { + case DEVICE_E_DISABLE: + if (device->ready_cbfn) + device->ready_cbfn(device->ready_cbarg, + BNA_CB_INTERRUPT); + device->ready_cbfn = NULL; + device->ready_cbarg = NULL; + bfa_fsm_set_state(device, bna_device_sm_ioc_disable_wait); + break; + + case DEVICE_E_IOC_READY: + bfa_fsm_set_state(device, bna_device_sm_ready); + break; + + case DEVICE_E_IOC_FAILED: + bfa_fsm_set_state(device, bna_device_sm_failed); + break; + + case DEVICE_E_IOC_RESET: + enable_mbox_intr(device); + break; + + default: + bfa_sm_fault(device->bna, event); + } +} + +static void +bna_device_sm_ready_entry(struct bna_device *device) +{ + bna_mbox_mod_start(&device->bna->mbox_mod); + bna_port_start(&device->bna->port); + + if (device->ready_cbfn) + device->ready_cbfn(device->ready_cbarg, + BNA_CB_SUCCESS); + device->ready_cbfn = NULL; + device->ready_cbarg = NULL; +} + +static void +bna_device_sm_ready(struct bna_device *device, enum bna_device_event event) +{ + switch (event) { + case DEVICE_E_DISABLE: + bfa_fsm_set_state(device, bna_device_sm_port_stop_wait); + break; + + case DEVICE_E_IOC_FAILED: + bfa_fsm_set_state(device, bna_device_sm_failed); + break; + + default: + bfa_sm_fault(device->bna, event); + } +} + +static void +bna_device_sm_port_stop_wait_entry(struct bna_device *device) +{ + bna_port_stop(&device->bna->port); +} + +static void +bna_device_sm_port_stop_wait(struct bna_device *device, + enum bna_device_event event) +{ + switch (event) { + case DEVICE_E_PORT_STOPPED: + bna_mbox_mod_stop(&device->bna->mbox_mod); + bfa_fsm_set_state(device, bna_device_sm_ioc_disable_wait); + break; + + case DEVICE_E_IOC_FAILED: + disable_mbox_intr(device); + bna_port_fail(&device->bna->port); + break; + + default: + bfa_sm_fault(device->bna, event); + } +} + +static void +bna_device_sm_ioc_disable_wait_entry(struct bna_device *device) +{ + bfa_ioc_disable(&device->ioc); +} + +static void +bna_device_sm_ioc_disable_wait(struct bna_device *device, + enum bna_device_event event) +{ + switch (event) { + case DEVICE_E_IOC_DISABLED: + disable_mbox_intr(device); + bfa_fsm_set_state(device, bna_device_sm_stopped); + break; + + default: + bfa_sm_fault(device->bna, event); + } +} + +static void +bna_device_sm_failed_entry(struct bna_device *device) +{ + disable_mbox_intr(device); + bna_port_fail(&device->bna->port); + bna_mbox_mod_stop(&device->bna->mbox_mod); + + if (device->ready_cbfn) + device->ready_cbfn(device->ready_cbarg, + BNA_CB_FAIL); + device->ready_cbfn = NULL; + device->ready_cbarg = NULL; +} + +static void +bna_device_sm_failed(struct bna_device *device, + enum bna_device_event event) +{ + switch (event) { + case DEVICE_E_DISABLE: + bfa_fsm_set_state(device, bna_device_sm_ioc_disable_wait); + break; + + case DEVICE_E_IOC_RESET: + enable_mbox_intr(device); + bfa_fsm_set_state(device, bna_device_sm_ioc_ready_wait); + break; + + default: + bfa_sm_fault(device->bna, event); + } +} + +/* IOC callback functions */ + +static void +bna_device_cb_iocll_ready(void *dev, enum bfa_status error) +{ + struct bna_device *device = (struct bna_device *)dev; + + if (error) + bfa_fsm_send_event(device, DEVICE_E_IOC_FAILED); + else + bfa_fsm_send_event(device, DEVICE_E_IOC_READY); +} + +static void +bna_device_cb_iocll_disabled(void *dev) +{ + struct bna_device *device = (struct bna_device *)dev; + + bfa_fsm_send_event(device, DEVICE_E_IOC_DISABLED); +} + +static void +bna_device_cb_iocll_failed(void *dev) +{ + struct bna_device *device = (struct bna_device *)dev; + + bfa_fsm_send_event(device, DEVICE_E_IOC_FAILED); +} + +static void +bna_device_cb_iocll_reset(void *dev) +{ + struct bna_device *device = (struct bna_device *)dev; + + bfa_fsm_send_event(device, DEVICE_E_IOC_RESET); +} + +static struct bfa_ioc_cbfn bfa_iocll_cbfn = { + bna_device_cb_iocll_ready, + bna_device_cb_iocll_disabled, + bna_device_cb_iocll_failed, + bna_device_cb_iocll_reset +}; + +void +bna_device_init(struct bna_device *device, struct bna *bna, + struct bna_res_info *res_info) +{ + u64 dma; + + device->bna = bna; + + /** + * Attach IOC and claim: + * 1. DMA memory for IOC attributes + * 2. Kernel memory for FW trace + */ + bfa_ioc_attach(&device->ioc, device, &bfa_iocll_cbfn); + bfa_ioc_pci_init(&device->ioc, &bna->pcidev, BFI_MC_LL); + + BNA_GET_DMA_ADDR( + &res_info[BNA_RES_MEM_T_ATTR].res_u.mem_info.mdl[0].dma, dma); + bfa_ioc_mem_claim(&device->ioc, + res_info[BNA_RES_MEM_T_ATTR].res_u.mem_info.mdl[0].kva, + dma); + + bna_adv_device_init(device, bna, res_info); + /* + * Initialize mbox_mod only after IOC, so that mbox handler + * registration goes through + */ + device->intr_type = + res_info[BNA_RES_INTR_T_MBOX].res_u.intr_info.intr_type; + device->vector = + res_info[BNA_RES_INTR_T_MBOX].res_u.intr_info.idl[0].vector; + bna_mbox_mod_init(&bna->mbox_mod, bna); + + device->ready_cbfn = device->stop_cbfn = NULL; + device->ready_cbarg = device->stop_cbarg = NULL; + + bfa_fsm_set_state(device, bna_device_sm_stopped); +} + +void +bna_device_uninit(struct bna_device *device) +{ + bna_mbox_mod_uninit(&device->bna->mbox_mod); + + bfa_cee_detach(&device->bna->cee); + + bfa_ioc_detach(&device->ioc); + + device->bna = NULL; +} + +void +bna_device_cb_port_stopped(void *arg, enum bna_cb_status status) +{ + struct bna_device *device = (struct bna_device *)arg; + + bfa_fsm_send_event(device, DEVICE_E_PORT_STOPPED); +} + +int +bna_device_status_get(struct bna_device *device) +{ + return (device->fsm == (bfa_fsm_t)bna_device_sm_ready); +} + +void +bna_device_enable(struct bna_device *device) +{ + if (device->fsm != (bfa_fsm_t)bna_device_sm_stopped) { + bnad_cb_device_enabled(device->bna->bnad, BNA_CB_BUSY); + return; + } + + device->ready_cbfn = bnad_cb_device_enabled; + device->ready_cbarg = device->bna->bnad; + + bfa_fsm_send_event(device, DEVICE_E_ENABLE); +} + +void +bna_device_disable(struct bna_device *device, enum bna_cleanup_type type) +{ + if (type == BNA_SOFT_CLEANUP) { + bnad_cb_device_disabled(device->bna->bnad, BNA_CB_SUCCESS); + return; + } + + device->stop_cbfn = bnad_cb_device_disabled; + device->stop_cbarg = device->bna->bnad; + + bfa_fsm_send_event(device, DEVICE_E_DISABLE); +} + +int +bna_device_state_get(struct bna_device *device) +{ + return bfa_sm_to_state(device_sm_table, device->fsm); +} + +u32 bna_dim_vector[BNA_LOAD_T_MAX][BNA_BIAS_T_MAX] = { + {12, 20}, + {10, 18}, + {8, 16}, + {6, 12}, + {4, 8}, + {3, 6}, + {2, 4}, + {1, 2}, +}; + +u32 bna_napi_dim_vector[BNA_LOAD_T_MAX][BNA_BIAS_T_MAX] = { + {12, 12}, + {6, 10}, + {5, 10}, + {4, 8}, + {3, 6}, + {3, 6}, + {2, 4}, + {1, 2}, +}; + +/* device */ +void +bna_adv_device_init(struct bna_device *device, struct bna *bna, + struct bna_res_info *res_info) +{ + u8 *kva; + u64 dma; + + device->bna = bna; + + kva = res_info[BNA_RES_MEM_T_FWTRC].res_u.mem_info.mdl[0].kva; + + /** + * Attach common modules (Diag, SFP, CEE, Port) and claim respective + * DMA memory. + */ + BNA_GET_DMA_ADDR( + &res_info[BNA_RES_MEM_T_COM].res_u.mem_info.mdl[0].dma, dma); + kva = res_info[BNA_RES_MEM_T_COM].res_u.mem_info.mdl[0].kva; + + bfa_cee_attach(&bna->cee, &device->ioc, bna); + bfa_cee_mem_claim(&bna->cee, kva, dma); + kva += bfa_cee_meminfo(); + dma += bfa_cee_meminfo(); + +} + +/* utils */ + +void +bna_adv_res_req(struct bna_res_info *res_info) +{ + /* DMA memory for COMMON_MODULE */ + res_info[BNA_RES_MEM_T_COM].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_COM].res_u.mem_info.mem_type = BNA_MEM_T_DMA; + res_info[BNA_RES_MEM_T_COM].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_COM].res_u.mem_info.len = ALIGN( + bfa_cee_meminfo(), PAGE_SIZE); + + /* Virtual memory for retreiving fw_trc */ + res_info[BNA_RES_MEM_T_FWTRC].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_FWTRC].res_u.mem_info.mem_type = BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_FWTRC].res_u.mem_info.num = 0; + res_info[BNA_RES_MEM_T_FWTRC].res_u.mem_info.len = 0; + + /* DMA memory for retreiving stats */ + res_info[BNA_RES_MEM_T_STATS].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_STATS].res_u.mem_info.mem_type = BNA_MEM_T_DMA; + res_info[BNA_RES_MEM_T_STATS].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_STATS].res_u.mem_info.len = + ALIGN(BFI_HW_STATS_SIZE, PAGE_SIZE); + + /* Virtual memory for soft stats */ + res_info[BNA_RES_MEM_T_SWSTATS].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_SWSTATS].res_u.mem_info.mem_type = BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_SWSTATS].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_SWSTATS].res_u.mem_info.len = + sizeof(struct bna_sw_stats); +} + +static void +bna_sw_stats_get(struct bna *bna, struct bna_sw_stats *sw_stats) +{ + struct bna_tx *tx; + struct bna_txq *txq; + struct bna_rx *rx; + struct bna_rxp *rxp; + struct list_head *qe; + struct list_head *txq_qe; + struct list_head *rxp_qe; + struct list_head *mac_qe; + int i; + + sw_stats->device_state = bna_device_state_get(&bna->device); + sw_stats->port_state = bna_port_state_get(&bna->port); + sw_stats->port_flags = bna->port.flags; + sw_stats->llport_state = bna_llport_state_get(&bna->port.llport); + sw_stats->priority = bna->port.priority; + + i = 0; + list_for_each(qe, &bna->tx_mod.tx_active_q) { + tx = (struct bna_tx *)qe; + sw_stats->tx_stats[i].tx_state = bna_tx_state_get(tx); + sw_stats->tx_stats[i].tx_flags = tx->flags; + + sw_stats->tx_stats[i].num_txqs = 0; + sw_stats->tx_stats[i].txq_bmap[0] = 0; + sw_stats->tx_stats[i].txq_bmap[1] = 0; + list_for_each(txq_qe, &tx->txq_q) { + txq = (struct bna_txq *)txq_qe; + if (txq->txq_id < 32) + sw_stats->tx_stats[i].txq_bmap[0] |= + ((u32)1 << txq->txq_id); + else + sw_stats->tx_stats[i].txq_bmap[1] |= + ((u32) + 1 << (txq->txq_id - 32)); + sw_stats->tx_stats[i].num_txqs++; + } + + sw_stats->tx_stats[i].txf_id = tx->txf.txf_id; + + i++; + } + sw_stats->num_active_tx = i; + + i = 0; + list_for_each(qe, &bna->rx_mod.rx_active_q) { + rx = (struct bna_rx *)qe; + sw_stats->rx_stats[i].rx_state = bna_rx_state_get(rx); + sw_stats->rx_stats[i].rx_flags = rx->rx_flags; + + sw_stats->rx_stats[i].num_rxps = 0; + sw_stats->rx_stats[i].num_rxqs = 0; + sw_stats->rx_stats[i].rxq_bmap[0] = 0; + sw_stats->rx_stats[i].rxq_bmap[1] = 0; + sw_stats->rx_stats[i].cq_bmap[0] = 0; + sw_stats->rx_stats[i].cq_bmap[1] = 0; + list_for_each(rxp_qe, &rx->rxp_q) { + rxp = (struct bna_rxp *)rxp_qe; + + sw_stats->rx_stats[i].num_rxqs += 1; + + if (rxp->type == BNA_RXP_SINGLE) { + if (rxp->rxq.single.only->rxq_id < 32) { + sw_stats->rx_stats[i].rxq_bmap[0] |= + ((u32)1 << + rxp->rxq.single.only->rxq_id); + } else { + sw_stats->rx_stats[i].rxq_bmap[1] |= + ((u32)1 << + (rxp->rxq.single.only->rxq_id - 32)); + } + } else { + if (rxp->rxq.slr.large->rxq_id < 32) { + sw_stats->rx_stats[i].rxq_bmap[0] |= + ((u32)1 << + rxp->rxq.slr.large->rxq_id); + } else { + sw_stats->rx_stats[i].rxq_bmap[1] |= + ((u32)1 << + (rxp->rxq.slr.large->rxq_id - 32)); + } + + if (rxp->rxq.slr.small->rxq_id < 32) { + sw_stats->rx_stats[i].rxq_bmap[0] |= + ((u32)1 << + rxp->rxq.slr.small->rxq_id); + } else { + sw_stats->rx_stats[i].rxq_bmap[1] |= + ((u32)1 << + (rxp->rxq.slr.small->rxq_id - 32)); + } + sw_stats->rx_stats[i].num_rxqs += 1; + } + + if (rxp->cq.cq_id < 32) + sw_stats->rx_stats[i].cq_bmap[0] |= + (1 << rxp->cq.cq_id); + else + sw_stats->rx_stats[i].cq_bmap[1] |= + (1 << (rxp->cq.cq_id - 32)); + + sw_stats->rx_stats[i].num_rxps++; + } + + sw_stats->rx_stats[i].rxf_id = rx->rxf.rxf_id; + sw_stats->rx_stats[i].rxf_state = bna_rxf_state_get(&rx->rxf); + sw_stats->rx_stats[i].rxf_oper_state = rx->rxf.rxf_oper_state; + + sw_stats->rx_stats[i].num_active_ucast = 0; + if (rx->rxf.ucast_active_mac) + sw_stats->rx_stats[i].num_active_ucast++; + list_for_each(mac_qe, &rx->rxf.ucast_active_q) + sw_stats->rx_stats[i].num_active_ucast++; + + sw_stats->rx_stats[i].num_active_mcast = 0; + list_for_each(mac_qe, &rx->rxf.mcast_active_q) + sw_stats->rx_stats[i].num_active_mcast++; + + sw_stats->rx_stats[i].rxmode_active = rx->rxf.rxmode_active; + sw_stats->rx_stats[i].vlan_filter_status = + rx->rxf.vlan_filter_status; + memcpy(sw_stats->rx_stats[i].vlan_filter_table, + rx->rxf.vlan_filter_table, + sizeof(u32) * ((BFI_MAX_VLAN + 1) / 32)); + + sw_stats->rx_stats[i].rss_status = rx->rxf.rss_status; + sw_stats->rx_stats[i].hds_status = rx->rxf.hds_status; + + i++; + } + sw_stats->num_active_rx = i; +} + +static void +bna_fw_cb_stats_get(void *arg, int status) +{ + struct bna *bna = (struct bna *)arg; + u64 *p_stats; + int i, count; + int rxf_count, txf_count; + u64 rxf_bmap, txf_bmap; + + bfa_q_qe_init(&bna->mbox_qe.qe); + + if (status == 0) { + p_stats = (u64 *)bna->stats.hw_stats; + count = sizeof(struct bfi_ll_stats) / sizeof(u64); + for (i = 0; i < count; i++) + p_stats[i] = cpu_to_be64(p_stats[i]); + + rxf_count = 0; + rxf_bmap = (u64)bna->stats.rxf_bmap[0] | + ((u64)bna->stats.rxf_bmap[1] << 32); + for (i = 0; i < BFI_LL_RXF_ID_MAX; i++) + if (rxf_bmap & ((u64)1 << i)) + rxf_count++; + + txf_count = 0; + txf_bmap = (u64)bna->stats.txf_bmap[0] | + ((u64)bna->stats.txf_bmap[1] << 32); + for (i = 0; i < BFI_LL_TXF_ID_MAX; i++) + if (txf_bmap & ((u64)1 << i)) + txf_count++; + + p_stats = (u64 *)&bna->stats.hw_stats->rxf_stats[0] + + ((rxf_count * sizeof(struct bfi_ll_stats_rxf) + + txf_count * sizeof(struct bfi_ll_stats_txf))/ + sizeof(u64)); + + /* Populate the TXF stats from the firmware DMAed copy */ + for (i = (BFI_LL_TXF_ID_MAX - 1); i >= 0; i--) + if (txf_bmap & ((u64)1 << i)) { + p_stats -= sizeof(struct bfi_ll_stats_txf)/ + sizeof(u64); + memcpy(&bna->stats.hw_stats->txf_stats[i], + p_stats, + sizeof(struct bfi_ll_stats_txf)); + } + + /* Populate the RXF stats from the firmware DMAed copy */ + for (i = (BFI_LL_RXF_ID_MAX - 1); i >= 0; i--) + if (rxf_bmap & ((u64)1 << i)) { + p_stats -= sizeof(struct bfi_ll_stats_rxf)/ + sizeof(u64); + memcpy(&bna->stats.hw_stats->rxf_stats[i], + p_stats, + sizeof(struct bfi_ll_stats_rxf)); + } + + bna_sw_stats_get(bna, bna->stats.sw_stats); + bnad_cb_stats_get(bna->bnad, BNA_CB_SUCCESS, &bna->stats); + } else + bnad_cb_stats_get(bna->bnad, BNA_CB_FAIL, &bna->stats); +} + +static void +bna_fw_stats_get(struct bna *bna) +{ + struct bfi_ll_stats_req ll_req; + + bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_STATS_GET_REQ, 0); + ll_req.stats_mask = htons(BFI_LL_STATS_ALL); + + ll_req.rxf_id_mask[0] = htonl(bna->rx_mod.rxf_bmap[0]); + ll_req.rxf_id_mask[1] = htonl(bna->rx_mod.rxf_bmap[1]); + ll_req.txf_id_mask[0] = htonl(bna->tx_mod.txf_bmap[0]); + ll_req.txf_id_mask[1] = htonl(bna->tx_mod.txf_bmap[1]); + + ll_req.host_buffer.a32.addr_hi = bna->hw_stats_dma.msb; + ll_req.host_buffer.a32.addr_lo = bna->hw_stats_dma.lsb; + + bna_mbox_qe_fill(&bna->mbox_qe, &ll_req, sizeof(ll_req), + bna_fw_cb_stats_get, bna); + bna_mbox_send(bna, &bna->mbox_qe); + + bna->stats.rxf_bmap[0] = bna->rx_mod.rxf_bmap[0]; + bna->stats.rxf_bmap[1] = bna->rx_mod.rxf_bmap[1]; + bna->stats.txf_bmap[0] = bna->tx_mod.txf_bmap[0]; + bna->stats.txf_bmap[1] = bna->tx_mod.txf_bmap[1]; +} + +static void +bna_fw_cb_stats_clr(void *arg, int status) +{ + struct bna *bna = (struct bna *)arg; + + bfa_q_qe_init(&bna->mbox_qe.qe); + + memset(bna->stats.sw_stats, 0, sizeof(struct bna_sw_stats)); + memset(bna->stats.hw_stats, 0, sizeof(struct bfi_ll_stats)); + + bnad_cb_stats_clr(bna->bnad); +} + +static void +bna_fw_stats_clr(struct bna *bna) +{ + struct bfi_ll_stats_req ll_req; + + bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_STATS_CLEAR_REQ, 0); + ll_req.stats_mask = htons(BFI_LL_STATS_ALL); + ll_req.rxf_id_mask[0] = htonl(0xffffffff); + ll_req.rxf_id_mask[1] = htonl(0xffffffff); + ll_req.txf_id_mask[0] = htonl(0xffffffff); + ll_req.txf_id_mask[1] = htonl(0xffffffff); + + bna_mbox_qe_fill(&bna->mbox_qe, &ll_req, sizeof(ll_req), + bna_fw_cb_stats_clr, bna); + bna_mbox_send(bna, &bna->mbox_qe); +} + +void +bna_stats_get(struct bna *bna) +{ + if (bna_device_status_get(&bna->device)) + bna_fw_stats_get(bna); + else + bnad_cb_stats_get(bna->bnad, BNA_CB_FAIL, &bna->stats); +} + +void +bna_stats_clr(struct bna *bna) +{ + if (bna_device_status_get(&bna->device)) + bna_fw_stats_clr(bna); + else { + memset(&bna->stats.sw_stats, 0, + sizeof(struct bna_sw_stats)); + memset(bna->stats.hw_stats, 0, + sizeof(struct bfi_ll_stats)); + bnad_cb_stats_clr(bna->bnad); + } +} + +/* IB */ +void +bna_ib_coalescing_timeo_set(struct bna_ib *ib, u8 coalescing_timeo) +{ + ib->ib_config.coalescing_timeo = coalescing_timeo; + + if (ib->start_count) + ib->door_bell.doorbell_ack = BNA_DOORBELL_IB_INT_ACK( + (u32)ib->ib_config.coalescing_timeo, 0); +} + +/* RxF */ +void +bna_rxf_adv_init(struct bna_rxf *rxf, + struct bna_rx *rx, + struct bna_rx_config *q_config) +{ + switch (q_config->rxp_type) { + case BNA_RXP_SINGLE: + /* No-op */ + break; + case BNA_RXP_SLR: + rxf->ctrl_flags |= BNA_RXF_CF_SM_LG_RXQ; + break; + case BNA_RXP_HDS: + rxf->hds_cfg.hdr_type = q_config->hds_config.hdr_type; + rxf->hds_cfg.header_size = + q_config->hds_config.header_size; + rxf->forced_offset = 0; + break; + default: + break; + } + + if (q_config->rss_status == BNA_STATUS_T_ENABLED) { + rxf->ctrl_flags |= BNA_RXF_CF_RSS_ENABLE; + rxf->rss_cfg.hash_type = q_config->rss_config.hash_type; + rxf->rss_cfg.hash_mask = q_config->rss_config.hash_mask; + memcpy(&rxf->rss_cfg.toeplitz_hash_key[0], + &q_config->rss_config.toeplitz_hash_key[0], + sizeof(rxf->rss_cfg.toeplitz_hash_key)); + } +} + +static void +rxf_fltr_mbox_cmd(struct bna_rxf *rxf, u8 cmd, enum bna_status status) +{ + struct bfi_ll_rxf_req req; + + bfi_h2i_set(req.mh, BFI_MC_LL, cmd, 0); + + req.rxf_id = rxf->rxf_id; + req.enable = status; + + bna_mbox_qe_fill(&rxf->mbox_qe, &req, sizeof(req), + rxf_cb_cam_fltr_mbox_cmd, rxf); + + bna_mbox_send(rxf->rx->bna, &rxf->mbox_qe); +} + +void +__rxf_default_function_config(struct bna_rxf *rxf, enum bna_status status) +{ + struct bna_rx_fndb_ram *rx_fndb_ram; + u32 ctrl_flags; + int i; + + rx_fndb_ram = (struct bna_rx_fndb_ram *) + BNA_GET_MEM_BASE_ADDR(rxf->rx->bna->pcidev.pci_bar_kva, + RX_FNDB_RAM_BASE_OFFSET); + + for (i = 0; i < BFI_MAX_RXF; i++) { + if (status == BNA_STATUS_T_ENABLED) { + if (i == rxf->rxf_id) + continue; + + ctrl_flags = + readl(&rx_fndb_ram[i].control_flags); + ctrl_flags |= BNA_RXF_CF_DEFAULT_FUNCTION_ENABLE; + writel(ctrl_flags, + &rx_fndb_ram[i].control_flags); + } else { + ctrl_flags = + readl(&rx_fndb_ram[i].control_flags); + ctrl_flags &= ~BNA_RXF_CF_DEFAULT_FUNCTION_ENABLE; + writel(ctrl_flags, + &rx_fndb_ram[i].control_flags); + } + } +} + +int +rxf_process_packet_filter_ucast(struct bna_rxf *rxf) +{ + struct bna_mac *mac = NULL; + struct list_head *qe; + + /* Add additional MAC entries */ + if (!list_empty(&rxf->ucast_pending_add_q)) { + bfa_q_deq(&rxf->ucast_pending_add_q, &qe); + bfa_q_qe_init(qe); + mac = (struct bna_mac *)qe; + rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_UCAST_ADD_REQ, mac); + list_add_tail(&mac->qe, &rxf->ucast_active_q); + return 1; + } + + /* Delete MAC addresses previousely added */ + if (!list_empty(&rxf->ucast_pending_del_q)) { + bfa_q_deq(&rxf->ucast_pending_del_q, &qe); + bfa_q_qe_init(qe); + mac = (struct bna_mac *)qe; + rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_UCAST_DEL_REQ, mac); + bna_ucam_mod_mac_put(&rxf->rx->bna->ucam_mod, mac); + return 1; + } + + return 0; +} + +int +rxf_process_packet_filter_promisc(struct bna_rxf *rxf) +{ + struct bna *bna = rxf->rx->bna; + + /* Enable/disable promiscuous mode */ + if (is_promisc_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* move promisc configuration from pending -> active */ + promisc_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active |= BNA_RXMODE_PROMISC; + + /* Disable VLAN filter to allow all VLANs */ + __rxf_vlan_filter_set(rxf, BNA_STATUS_T_DISABLED); + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_PROMISCUOUS_SET_REQ, + BNA_STATUS_T_ENABLED); + return 1; + } else if (is_promisc_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* move promisc configuration from pending -> active */ + promisc_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_PROMISC; + bna->rxf_promisc_id = BFI_MAX_RXF; + + /* Revert VLAN filter */ + __rxf_vlan_filter_set(rxf, rxf->vlan_filter_status); + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_PROMISCUOUS_SET_REQ, + BNA_STATUS_T_DISABLED); + return 1; + } + + return 0; +} + +int +rxf_process_packet_filter_default(struct bna_rxf *rxf) +{ + struct bna *bna = rxf->rx->bna; + + /* Enable/disable default mode */ + if (is_default_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* move default configuration from pending -> active */ + default_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active |= BNA_RXMODE_DEFAULT; + + /* Disable VLAN filter to allow all VLANs */ + __rxf_vlan_filter_set(rxf, BNA_STATUS_T_DISABLED); + /* Redirect all other RxF vlan filtering to this one */ + __rxf_default_function_config(rxf, BNA_STATUS_T_ENABLED); + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_DEFAULT_SET_REQ, + BNA_STATUS_T_ENABLED); + return 1; + } else if (is_default_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* move default configuration from pending -> active */ + default_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_DEFAULT; + bna->rxf_default_id = BFI_MAX_RXF; + + /* Revert VLAN filter */ + __rxf_vlan_filter_set(rxf, rxf->vlan_filter_status); + /* Stop RxF vlan filter table redirection */ + __rxf_default_function_config(rxf, BNA_STATUS_T_DISABLED); + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_DEFAULT_SET_REQ, + BNA_STATUS_T_DISABLED); + return 1; + } + + return 0; +} + +int +rxf_process_packet_filter_allmulti(struct bna_rxf *rxf) +{ + /* Enable/disable allmulti mode */ + if (is_allmulti_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* move allmulti configuration from pending -> active */ + allmulti_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active |= BNA_RXMODE_ALLMULTI; + + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_FILTER_REQ, + BNA_STATUS_T_ENABLED); + return 1; + } else if (is_allmulti_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* move allmulti configuration from pending -> active */ + allmulti_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_ALLMULTI; + + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_FILTER_REQ, + BNA_STATUS_T_DISABLED); + return 1; + } + + return 0; +} + +int +rxf_clear_packet_filter_ucast(struct bna_rxf *rxf) +{ + struct bna_mac *mac = NULL; + struct list_head *qe; + + /* 1. delete pending ucast entries */ + if (!list_empty(&rxf->ucast_pending_del_q)) { + bfa_q_deq(&rxf->ucast_pending_del_q, &qe); + bfa_q_qe_init(qe); + mac = (struct bna_mac *)qe; + rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_UCAST_DEL_REQ, mac); + bna_ucam_mod_mac_put(&rxf->rx->bna->ucam_mod, mac); + return 1; + } + + /* 2. clear active ucast entries; move them to pending_add_q */ + if (!list_empty(&rxf->ucast_active_q)) { + bfa_q_deq(&rxf->ucast_active_q, &qe); + bfa_q_qe_init(qe); + mac = (struct bna_mac *)qe; + rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_UCAST_DEL_REQ, mac); + list_add_tail(&mac->qe, &rxf->ucast_pending_add_q); + return 1; + } + + return 0; +} + +int +rxf_clear_packet_filter_promisc(struct bna_rxf *rxf) +{ + struct bna *bna = rxf->rx->bna; + + /* 6. Execute pending promisc mode disable command */ + if (is_promisc_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* move promisc configuration from pending -> active */ + promisc_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_PROMISC; + bna->rxf_promisc_id = BFI_MAX_RXF; + + /* Revert VLAN filter */ + __rxf_vlan_filter_set(rxf, rxf->vlan_filter_status); + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_PROMISCUOUS_SET_REQ, + BNA_STATUS_T_DISABLED); + return 1; + } + + /* 7. Clear active promisc mode; move it to pending enable */ + if (rxf->rxmode_active & BNA_RXMODE_PROMISC) { + /* move promisc configuration from active -> pending */ + promisc_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_PROMISC; + + /* Revert VLAN filter */ + __rxf_vlan_filter_set(rxf, rxf->vlan_filter_status); + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_PROMISCUOUS_SET_REQ, + BNA_STATUS_T_DISABLED); + return 1; + } + + return 0; +} + +int +rxf_clear_packet_filter_default(struct bna_rxf *rxf) +{ + struct bna *bna = rxf->rx->bna; + + /* 8. Execute pending default mode disable command */ + if (is_default_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* move default configuration from pending -> active */ + default_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_DEFAULT; + bna->rxf_default_id = BFI_MAX_RXF; + + /* Revert VLAN filter */ + __rxf_vlan_filter_set(rxf, rxf->vlan_filter_status); + /* Stop RxF vlan filter table redirection */ + __rxf_default_function_config(rxf, BNA_STATUS_T_DISABLED); + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_DEFAULT_SET_REQ, + BNA_STATUS_T_DISABLED); + return 1; + } + + /* 9. Clear active default mode; move it to pending enable */ + if (rxf->rxmode_active & BNA_RXMODE_DEFAULT) { + /* move default configuration from active -> pending */ + default_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_DEFAULT; + + /* Revert VLAN filter */ + __rxf_vlan_filter_set(rxf, rxf->vlan_filter_status); + /* Stop RxF vlan filter table redirection */ + __rxf_default_function_config(rxf, BNA_STATUS_T_DISABLED); + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_DEFAULT_SET_REQ, + BNA_STATUS_T_DISABLED); + return 1; + } + + return 0; +} + +int +rxf_clear_packet_filter_allmulti(struct bna_rxf *rxf) +{ + /* 10. Execute pending allmulti mode disable command */ + if (is_allmulti_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* move allmulti configuration from pending -> active */ + allmulti_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_ALLMULTI; + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_FILTER_REQ, + BNA_STATUS_T_DISABLED); + return 1; + } + + /* 11. Clear active allmulti mode; move it to pending enable */ + if (rxf->rxmode_active & BNA_RXMODE_ALLMULTI) { + /* move allmulti configuration from active -> pending */ + allmulti_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_ALLMULTI; + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_FILTER_REQ, + BNA_STATUS_T_DISABLED); + return 1; + } + + return 0; +} + +void +rxf_reset_packet_filter_ucast(struct bna_rxf *rxf) +{ + struct list_head *qe; + struct bna_mac *mac; + + /* 1. Move active ucast entries to pending_add_q */ + while (!list_empty(&rxf->ucast_active_q)) { + bfa_q_deq(&rxf->ucast_active_q, &qe); + bfa_q_qe_init(qe); + list_add_tail(qe, &rxf->ucast_pending_add_q); + } + + /* 2. Throw away delete pending ucast entries */ + while (!list_empty(&rxf->ucast_pending_del_q)) { + bfa_q_deq(&rxf->ucast_pending_del_q, &qe); + bfa_q_qe_init(qe); + mac = (struct bna_mac *)qe; + bna_ucam_mod_mac_put(&rxf->rx->bna->ucam_mod, mac); + } +} + +void +rxf_reset_packet_filter_promisc(struct bna_rxf *rxf) +{ + struct bna *bna = rxf->rx->bna; + + /* 6. Clear pending promisc mode disable */ + if (is_promisc_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + promisc_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_PROMISC; + bna->rxf_promisc_id = BFI_MAX_RXF; + } + + /* 7. Move promisc mode config from active -> pending */ + if (rxf->rxmode_active & BNA_RXMODE_PROMISC) { + promisc_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_PROMISC; + } + +} + +void +rxf_reset_packet_filter_default(struct bna_rxf *rxf) +{ + struct bna *bna = rxf->rx->bna; + + /* 8. Clear pending default mode disable */ + if (is_default_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + default_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_DEFAULT; + bna->rxf_default_id = BFI_MAX_RXF; + } + + /* 9. Move default mode config from active -> pending */ + if (rxf->rxmode_active & BNA_RXMODE_DEFAULT) { + default_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_DEFAULT; + } +} + +void +rxf_reset_packet_filter_allmulti(struct bna_rxf *rxf) +{ + /* 10. Clear pending allmulti mode disable */ + if (is_allmulti_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + allmulti_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_ALLMULTI; + } + + /* 11. Move allmulti mode config from active -> pending */ + if (rxf->rxmode_active & BNA_RXMODE_ALLMULTI) { + allmulti_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_ALLMULTI; + } +} + +/** + * Should only be called by bna_rxf_mode_set. + * Helps deciding if h/w configuration is needed or not. + * Returns: + * 0 = no h/w change + * 1 = need h/w change + */ +int +rxf_promisc_enable(struct bna_rxf *rxf) +{ + struct bna *bna = rxf->rx->bna; + int ret = 0; + + /* There can not be any pending disable command */ + + /* Do nothing if pending enable or already enabled */ + if (is_promisc_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask) || + (rxf->rxmode_active & BNA_RXMODE_PROMISC)) { + /* Schedule enable */ + } else { + /* Promisc mode should not be active in the system */ + promisc_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + bna->rxf_promisc_id = rxf->rxf_id; + ret = 1; + } + + return ret; +} + +/** + * Should only be called by bna_rxf_mode_set. + * Helps deciding if h/w configuration is needed or not. + * Returns: + * 0 = no h/w change + * 1 = need h/w change + */ +int +rxf_promisc_disable(struct bna_rxf *rxf) +{ + struct bna *bna = rxf->rx->bna; + int ret = 0; + + /* There can not be any pending disable */ + + /* Turn off pending enable command , if any */ + if (is_promisc_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* Promisc mode should not be active */ + /* system promisc state should be pending */ + promisc_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + /* Remove the promisc state from the system */ + bna->rxf_promisc_id = BFI_MAX_RXF; + + /* Schedule disable */ + } else if (rxf->rxmode_active & BNA_RXMODE_PROMISC) { + /* Promisc mode should be active in the system */ + promisc_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + ret = 1; + + /* Do nothing if already disabled */ + } else { + } + + return ret; +} + +/** + * Should only be called by bna_rxf_mode_set. + * Helps deciding if h/w configuration is needed or not. + * Returns: + * 0 = no h/w change + * 1 = need h/w change + */ +int +rxf_default_enable(struct bna_rxf *rxf) +{ + struct bna *bna = rxf->rx->bna; + int ret = 0; + + /* There can not be any pending disable command */ + + /* Do nothing if pending enable or already enabled */ + if (is_default_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask) || + (rxf->rxmode_active & BNA_RXMODE_DEFAULT)) { + /* Schedule enable */ + } else { + /* Default mode should not be active in the system */ + default_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + bna->rxf_default_id = rxf->rxf_id; + ret = 1; + } + + return ret; +} + +/** + * Should only be called by bna_rxf_mode_set. + * Helps deciding if h/w configuration is needed or not. + * Returns: + * 0 = no h/w change + * 1 = need h/w change + */ +int +rxf_default_disable(struct bna_rxf *rxf) +{ + struct bna *bna = rxf->rx->bna; + int ret = 0; + + /* There can not be any pending disable */ + + /* Turn off pending enable command , if any */ + if (is_default_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* Promisc mode should not be active */ + /* system default state should be pending */ + default_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + /* Remove the default state from the system */ + bna->rxf_default_id = BFI_MAX_RXF; + + /* Schedule disable */ + } else if (rxf->rxmode_active & BNA_RXMODE_DEFAULT) { + /* Default mode should be active in the system */ + default_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + ret = 1; + + /* Do nothing if already disabled */ + } else { + } + + return ret; +} + +/** + * Should only be called by bna_rxf_mode_set. + * Helps deciding if h/w configuration is needed or not. + * Returns: + * 0 = no h/w change + * 1 = need h/w change + */ +int +rxf_allmulti_enable(struct bna_rxf *rxf) +{ + int ret = 0; + + /* There can not be any pending disable command */ + + /* Do nothing if pending enable or already enabled */ + if (is_allmulti_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask) || + (rxf->rxmode_active & BNA_RXMODE_ALLMULTI)) { + /* Schedule enable */ + } else { + allmulti_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + ret = 1; + } + + return ret; +} + +/** + * Should only be called by bna_rxf_mode_set. + * Helps deciding if h/w configuration is needed or not. + * Returns: + * 0 = no h/w change + * 1 = need h/w change + */ +int +rxf_allmulti_disable(struct bna_rxf *rxf) +{ + int ret = 0; + + /* There can not be any pending disable */ + + /* Turn off pending enable command , if any */ + if (is_allmulti_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* Allmulti mode should not be active */ + allmulti_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + + /* Schedule disable */ + } else if (rxf->rxmode_active & BNA_RXMODE_ALLMULTI) { + allmulti_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + ret = 1; + } + + return ret; +} + +/* RxF <- bnad */ +void +bna_rx_mcast_delall(struct bna_rx *rx, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)) +{ + struct bna_rxf *rxf = &rx->rxf; + struct list_head *qe; + struct bna_mac *mac; + int need_hw_config = 0; + + /* Purge all entries from pending_add_q */ + while (!list_empty(&rxf->mcast_pending_add_q)) { + bfa_q_deq(&rxf->mcast_pending_add_q, &qe); + mac = (struct bna_mac *)qe; + bfa_q_qe_init(&mac->qe); + bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac); + } + + /* Schedule all entries in active_q for deletion */ + while (!list_empty(&rxf->mcast_active_q)) { + bfa_q_deq(&rxf->mcast_active_q, &qe); + mac = (struct bna_mac *)qe; + bfa_q_qe_init(&mac->qe); + list_add_tail(&mac->qe, &rxf->mcast_pending_del_q); + need_hw_config = 1; + } + + if (need_hw_config) { + rxf->cam_fltr_cbfn = cbfn; + rxf->cam_fltr_cbarg = rx->bna->bnad; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + return; + } + + if (cbfn) + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); +} + +/* RxF <- Rx */ +void +bna_rx_receive_resume(struct bna_rx *rx, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)) +{ + struct bna_rxf *rxf = &rx->rxf; + + if (rxf->rxf_oper_state == BNA_RXF_OPER_STATE_PAUSED) { + rxf->oper_state_cbfn = cbfn; + rxf->oper_state_cbarg = rx->bna->bnad; + bfa_fsm_send_event(rxf, RXF_E_RESUME); + } else if (cbfn) + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); +} + +void +bna_rx_receive_pause(struct bna_rx *rx, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)) +{ + struct bna_rxf *rxf = &rx->rxf; + + if (rxf->rxf_oper_state == BNA_RXF_OPER_STATE_RUNNING) { + rxf->oper_state_cbfn = cbfn; + rxf->oper_state_cbarg = rx->bna->bnad; + bfa_fsm_send_event(rxf, RXF_E_PAUSE); + } else if (cbfn) + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); +} + +/* RxF <- bnad */ +enum bna_cb_status +bna_rx_ucast_add(struct bna_rx *rx, u8 *addr, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)) +{ + struct bna_rxf *rxf = &rx->rxf; + struct list_head *qe; + struct bna_mac *mac; + + /* Check if already added */ + list_for_each(qe, &rxf->ucast_active_q) { + mac = (struct bna_mac *)qe; + if (BNA_MAC_IS_EQUAL(mac->addr, addr)) { + if (cbfn) + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); + return BNA_CB_SUCCESS; + } + } + + /* Check if pending addition */ + list_for_each(qe, &rxf->ucast_pending_add_q) { + mac = (struct bna_mac *)qe; + if (BNA_MAC_IS_EQUAL(mac->addr, addr)) { + if (cbfn) + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); + return BNA_CB_SUCCESS; + } + } + + mac = bna_ucam_mod_mac_get(&rxf->rx->bna->ucam_mod); + if (mac == NULL) + return BNA_CB_UCAST_CAM_FULL; + bfa_q_qe_init(&mac->qe); + memcpy(mac->addr, addr, ETH_ALEN); + list_add_tail(&mac->qe, &rxf->ucast_pending_add_q); + + rxf->cam_fltr_cbfn = cbfn; + rxf->cam_fltr_cbarg = rx->bna->bnad; + + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + + return BNA_CB_SUCCESS; +} + +/* RxF <- bnad */ +enum bna_cb_status +bna_rx_ucast_del(struct bna_rx *rx, u8 *addr, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)) +{ + struct bna_rxf *rxf = &rx->rxf; + struct list_head *qe; + struct bna_mac *mac; + + list_for_each(qe, &rxf->ucast_pending_add_q) { + mac = (struct bna_mac *)qe; + if (BNA_MAC_IS_EQUAL(mac->addr, addr)) { + list_del(qe); + bfa_q_qe_init(qe); + bna_ucam_mod_mac_put(&rxf->rx->bna->ucam_mod, mac); + if (cbfn) + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); + return BNA_CB_SUCCESS; + } + } + + list_for_each(qe, &rxf->ucast_active_q) { + mac = (struct bna_mac *)qe; + if (BNA_MAC_IS_EQUAL(mac->addr, addr)) { + list_del(qe); + bfa_q_qe_init(qe); + list_add_tail(qe, &rxf->ucast_pending_del_q); + rxf->cam_fltr_cbfn = cbfn; + rxf->cam_fltr_cbarg = rx->bna->bnad; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + return BNA_CB_SUCCESS; + } + } + + return BNA_CB_INVALID_MAC; +} + +/* RxF <- bnad */ +enum bna_cb_status +bna_rx_mode_set(struct bna_rx *rx, enum bna_rxmode new_mode, + enum bna_rxmode bitmask, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)) +{ + struct bna_rxf *rxf = &rx->rxf; + int need_hw_config = 0; + + /* Error checks */ + + if (is_promisc_enable(new_mode, bitmask)) { + /* If promisc mode is already enabled elsewhere in the system */ + if ((rx->bna->rxf_promisc_id != BFI_MAX_RXF) && + (rx->bna->rxf_promisc_id != rxf->rxf_id)) + goto err_return; + + /* If default mode is already enabled in the system */ + if (rx->bna->rxf_default_id != BFI_MAX_RXF) + goto err_return; + + /* Trying to enable promiscuous and default mode together */ + if (is_default_enable(new_mode, bitmask)) + goto err_return; + } + + if (is_default_enable(new_mode, bitmask)) { + /* If default mode is already enabled elsewhere in the system */ + if ((rx->bna->rxf_default_id != BFI_MAX_RXF) && + (rx->bna->rxf_default_id != rxf->rxf_id)) { + goto err_return; + } + + /* If promiscuous mode is already enabled in the system */ + if (rx->bna->rxf_promisc_id != BFI_MAX_RXF) + goto err_return; + } + + /* Process the commands */ + + if (is_promisc_enable(new_mode, bitmask)) { + if (rxf_promisc_enable(rxf)) + need_hw_config = 1; + } else if (is_promisc_disable(new_mode, bitmask)) { + if (rxf_promisc_disable(rxf)) + need_hw_config = 1; + } + + if (is_default_enable(new_mode, bitmask)) { + if (rxf_default_enable(rxf)) + need_hw_config = 1; + } else if (is_default_disable(new_mode, bitmask)) { + if (rxf_default_disable(rxf)) + need_hw_config = 1; + } + + if (is_allmulti_enable(new_mode, bitmask)) { + if (rxf_allmulti_enable(rxf)) + need_hw_config = 1; + } else if (is_allmulti_disable(new_mode, bitmask)) { + if (rxf_allmulti_disable(rxf)) + need_hw_config = 1; + } + + /* Trigger h/w if needed */ + + if (need_hw_config) { + rxf->cam_fltr_cbfn = cbfn; + rxf->cam_fltr_cbarg = rx->bna->bnad; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + } else if (cbfn) + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); + + return BNA_CB_SUCCESS; + +err_return: + return BNA_CB_FAIL; +} + +/* RxF <- bnad */ +void +bna_rx_rss_enable(struct bna_rx *rx) +{ + struct bna_rxf *rxf = &rx->rxf; + + rxf->rxf_flags |= BNA_RXF_FL_RSS_CONFIG_PENDING; + rxf->rss_status = BNA_STATUS_T_ENABLED; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); +} + +/* RxF <- bnad */ +void +bna_rx_rss_disable(struct bna_rx *rx) +{ + struct bna_rxf *rxf = &rx->rxf; + + rxf->rxf_flags |= BNA_RXF_FL_RSS_CONFIG_PENDING; + rxf->rss_status = BNA_STATUS_T_DISABLED; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); +} + +/* RxF <- bnad */ +void +bna_rx_rss_reconfig(struct bna_rx *rx, struct bna_rxf_rss *rss_config) +{ + struct bna_rxf *rxf = &rx->rxf; + rxf->rxf_flags |= BNA_RXF_FL_RSS_CONFIG_PENDING; + rxf->rss_status = BNA_STATUS_T_ENABLED; + rxf->rss_cfg = *rss_config; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); +} + +void +/* RxF <- bnad */ +bna_rx_vlanfilter_enable(struct bna_rx *rx) +{ + struct bna_rxf *rxf = &rx->rxf; + + if (rxf->vlan_filter_status == BNA_STATUS_T_DISABLED) { + rxf->rxf_flags |= BNA_RXF_FL_VLAN_CONFIG_PENDING; + rxf->vlan_filter_status = BNA_STATUS_T_ENABLED; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + } +} + +/* RxF <- bnad */ +void +bna_rx_vlanfilter_disable(struct bna_rx *rx) +{ + struct bna_rxf *rxf = &rx->rxf; + + if (rxf->vlan_filter_status == BNA_STATUS_T_ENABLED) { + rxf->rxf_flags |= BNA_RXF_FL_VLAN_CONFIG_PENDING; + rxf->vlan_filter_status = BNA_STATUS_T_DISABLED; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + } +} + +/* Rx */ + +struct bna_rxp * +bna_rx_get_rxp(struct bna_rx *rx, int vector) +{ + struct bna_rxp *rxp; + struct list_head *qe; + + list_for_each(qe, &rx->rxp_q) { + rxp = (struct bna_rxp *)qe; + if (rxp->vector == vector) + return rxp; + } + return NULL; +} + +/* + * bna_rx_rss_rit_set() + * Sets the Q ids for the specified msi-x vectors in the RIT. + * Maximum rit size supported is 64, which should be the max size of the + * vectors array. + */ + +void +bna_rx_rss_rit_set(struct bna_rx *rx, unsigned int *vectors, int nvectors) +{ + int i; + struct bna_rxp *rxp; + struct bna_rxq *q0 = NULL, *q1 = NULL; + struct bna *bna; + struct bna_rxf *rxf; + + /* Build the RIT contents for this RX */ + bna = rx->bna; + + rxf = &rx->rxf; + for (i = 0; i < nvectors; i++) { + rxp = bna_rx_get_rxp(rx, vectors[i]); + + GET_RXQS(rxp, q0, q1); + rxf->rit_segment->rit[i].large_rxq_id = q0->rxq_id; + rxf->rit_segment->rit[i].small_rxq_id = (q1 ? q1->rxq_id : 0); + } + + rxf->rit_segment->rit_size = nvectors; + + /* Subsequent call to enable/reconfig RSS will update the RIT in h/w */ +} + +/* Rx <- bnad */ +void +bna_rx_coalescing_timeo_set(struct bna_rx *rx, int coalescing_timeo) +{ + struct bna_rxp *rxp; + struct list_head *qe; + + list_for_each(qe, &rx->rxp_q) { + rxp = (struct bna_rxp *)qe; + rxp->cq.ccb->rx_coalescing_timeo = coalescing_timeo; + bna_ib_coalescing_timeo_set(rxp->cq.ib, coalescing_timeo); + } +} + +/* Rx <- bnad */ +void +bna_rx_dim_reconfig(struct bna *bna, u32 vector[][BNA_BIAS_T_MAX]) +{ + int i, j; + + for (i = 0; i < BNA_LOAD_T_MAX; i++) + for (j = 0; j < BNA_BIAS_T_MAX; j++) + bna->rx_mod.dim_vector[i][j] = vector[i][j]; +} + +/* Rx <- bnad */ +void +bna_rx_dim_update(struct bna_ccb *ccb) +{ + struct bna *bna = ccb->cq->rx->bna; + u32 load, bias; + u32 pkt_rt, small_rt, large_rt; + u8 coalescing_timeo; + + if ((ccb->pkt_rate.small_pkt_cnt == 0) && + (ccb->pkt_rate.large_pkt_cnt == 0)) + return; + + /* Arrive at preconfigured coalescing timeo value based on pkt rate */ + + small_rt = ccb->pkt_rate.small_pkt_cnt; + large_rt = ccb->pkt_rate.large_pkt_cnt; + + pkt_rt = small_rt + large_rt; + + if (pkt_rt < BNA_PKT_RATE_10K) + load = BNA_LOAD_T_LOW_4; + else if (pkt_rt < BNA_PKT_RATE_20K) + load = BNA_LOAD_T_LOW_3; + else if (pkt_rt < BNA_PKT_RATE_30K) + load = BNA_LOAD_T_LOW_2; + else if (pkt_rt < BNA_PKT_RATE_40K) + load = BNA_LOAD_T_LOW_1; + else if (pkt_rt < BNA_PKT_RATE_50K) + load = BNA_LOAD_T_HIGH_1; + else if (pkt_rt < BNA_PKT_RATE_60K) + load = BNA_LOAD_T_HIGH_2; + else if (pkt_rt < BNA_PKT_RATE_80K) + load = BNA_LOAD_T_HIGH_3; + else + load = BNA_LOAD_T_HIGH_4; + + if (small_rt > (large_rt << 1)) + bias = 0; + else + bias = 1; + + ccb->pkt_rate.small_pkt_cnt = 0; + ccb->pkt_rate.large_pkt_cnt = 0; + + coalescing_timeo = bna->rx_mod.dim_vector[load][bias]; + ccb->rx_coalescing_timeo = coalescing_timeo; + + /* Set it to IB */ + bna_ib_coalescing_timeo_set(ccb->cq->ib, coalescing_timeo); +} + +/* Tx */ +/* TX <- bnad */ +enum bna_cb_status +bna_tx_prio_set(struct bna_tx *tx, int prio, + void (*cbfn)(struct bnad *, struct bna_tx *, + enum bna_cb_status)) +{ + if (tx->flags & BNA_TX_F_PRIO_LOCK) + return BNA_CB_FAIL; + else { + tx->prio_change_cbfn = cbfn; + bna_tx_prio_changed(tx, prio); + } + + return BNA_CB_SUCCESS; +} + +/* TX <- bnad */ +void +bna_tx_coalescing_timeo_set(struct bna_tx *tx, int coalescing_timeo) +{ + struct bna_txq *txq; + struct list_head *qe; + + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + bna_ib_coalescing_timeo_set(txq->ib, coalescing_timeo); + } +} + +/* + * Private data + */ + +struct bna_ritseg_pool_cfg { + u32 pool_size; + u32 pool_entry_size; +}; +init_ritseg_pool(ritseg_pool_cfg); + +/* + * Private functions + */ +static void +bna_ucam_mod_init(struct bna_ucam_mod *ucam_mod, struct bna *bna, + struct bna_res_info *res_info) +{ + int i; + + ucam_mod->ucmac = (struct bna_mac *) + res_info[BNA_RES_MEM_T_UCMAC_ARRAY].res_u.mem_info.mdl[0].kva; + + INIT_LIST_HEAD(&ucam_mod->free_q); + for (i = 0; i < BFI_MAX_UCMAC; i++) { + bfa_q_qe_init(&ucam_mod->ucmac[i].qe); + list_add_tail(&ucam_mod->ucmac[i].qe, &ucam_mod->free_q); + } + + ucam_mod->bna = bna; +} + +static void +bna_ucam_mod_uninit(struct bna_ucam_mod *ucam_mod) +{ + struct list_head *qe; + int i = 0; + + list_for_each(qe, &ucam_mod->free_q) + i++; + + ucam_mod->bna = NULL; +} + +static void +bna_mcam_mod_init(struct bna_mcam_mod *mcam_mod, struct bna *bna, + struct bna_res_info *res_info) +{ + int i; + + mcam_mod->mcmac = (struct bna_mac *) + res_info[BNA_RES_MEM_T_MCMAC_ARRAY].res_u.mem_info.mdl[0].kva; + + INIT_LIST_HEAD(&mcam_mod->free_q); + for (i = 0; i < BFI_MAX_MCMAC; i++) { + bfa_q_qe_init(&mcam_mod->mcmac[i].qe); + list_add_tail(&mcam_mod->mcmac[i].qe, &mcam_mod->free_q); + } + + mcam_mod->bna = bna; +} + +static void +bna_mcam_mod_uninit(struct bna_mcam_mod *mcam_mod) +{ + struct list_head *qe; + int i = 0; + + list_for_each(qe, &mcam_mod->free_q) + i++; + + mcam_mod->bna = NULL; +} + +static void +bna_rit_mod_init(struct bna_rit_mod *rit_mod, + struct bna_res_info *res_info) +{ + int i; + int j; + int count; + int offset; + + rit_mod->rit = (struct bna_rit_entry *) + res_info[BNA_RES_MEM_T_RIT_ENTRY].res_u.mem_info.mdl[0].kva; + rit_mod->rit_segment = (struct bna_rit_segment *) + res_info[BNA_RES_MEM_T_RIT_SEGMENT].res_u.mem_info.mdl[0].kva; + + count = 0; + offset = 0; + for (i = 0; i < BFI_RIT_SEG_TOTAL_POOLS; i++) { + INIT_LIST_HEAD(&rit_mod->rit_seg_pool[i]); + for (j = 0; j < ritseg_pool_cfg[i].pool_size; j++) { + bfa_q_qe_init(&rit_mod->rit_segment[count].qe); + rit_mod->rit_segment[count].max_rit_size = + ritseg_pool_cfg[i].pool_entry_size; + rit_mod->rit_segment[count].rit_offset = offset; + rit_mod->rit_segment[count].rit = + &rit_mod->rit[offset]; + list_add_tail(&rit_mod->rit_segment[count].qe, + &rit_mod->rit_seg_pool[i]); + count++; + offset += ritseg_pool_cfg[i].pool_entry_size; + } + } +} + +static void +bna_rit_mod_uninit(struct bna_rit_mod *rit_mod) +{ + struct bna_rit_segment *rit_segment; + struct list_head *qe; + int i; + int j; + + for (i = 0; i < BFI_RIT_SEG_TOTAL_POOLS; i++) { + j = 0; + list_for_each(qe, &rit_mod->rit_seg_pool[i]) { + rit_segment = (struct bna_rit_segment *)qe; + j++; + } + } +} + +/* + * Public functions + */ + +/* Called during probe(), before calling bna_init() */ +void +bna_res_req(struct bna_res_info *res_info) +{ + bna_adv_res_req(res_info); + + /* DMA memory for retrieving IOC attributes */ + res_info[BNA_RES_MEM_T_ATTR].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_ATTR].res_u.mem_info.mem_type = BNA_MEM_T_DMA; + res_info[BNA_RES_MEM_T_ATTR].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_ATTR].res_u.mem_info.len = + ALIGN(bfa_ioc_meminfo(), PAGE_SIZE); + + /* DMA memory for index segment of an IB */ + res_info[BNA_RES_MEM_T_IBIDX].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_IBIDX].res_u.mem_info.mem_type = BNA_MEM_T_DMA; + res_info[BNA_RES_MEM_T_IBIDX].res_u.mem_info.len = + BFI_IBIDX_SIZE * BFI_IBIDX_MAX_SEGSIZE; + res_info[BNA_RES_MEM_T_IBIDX].res_u.mem_info.num = BFI_MAX_IB; + + /* Virtual memory for IB objects - stored by IB module */ + res_info[BNA_RES_MEM_T_IB_ARRAY].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_IB_ARRAY].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_IB_ARRAY].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_IB_ARRAY].res_u.mem_info.len = + BFI_MAX_IB * sizeof(struct bna_ib); + + /* Virtual memory for intr objects - stored by IB module */ + res_info[BNA_RES_MEM_T_INTR_ARRAY].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_INTR_ARRAY].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_INTR_ARRAY].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_INTR_ARRAY].res_u.mem_info.len = + BFI_MAX_IB * sizeof(struct bna_intr); + + /* Virtual memory for idx_seg objects - stored by IB module */ + res_info[BNA_RES_MEM_T_IDXSEG_ARRAY].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_IDXSEG_ARRAY].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_IDXSEG_ARRAY].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_IDXSEG_ARRAY].res_u.mem_info.len = + BFI_IBIDX_TOTAL_SEGS * sizeof(struct bna_ibidx_seg); + + /* Virtual memory for Tx objects - stored by Tx module */ + res_info[BNA_RES_MEM_T_TX_ARRAY].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_TX_ARRAY].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_TX_ARRAY].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_TX_ARRAY].res_u.mem_info.len = + BFI_MAX_TXQ * sizeof(struct bna_tx); + + /* Virtual memory for TxQ - stored by Tx module */ + res_info[BNA_RES_MEM_T_TXQ_ARRAY].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_TXQ_ARRAY].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_TXQ_ARRAY].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_TXQ_ARRAY].res_u.mem_info.len = + BFI_MAX_TXQ * sizeof(struct bna_txq); + + /* Virtual memory for Rx objects - stored by Rx module */ + res_info[BNA_RES_MEM_T_RX_ARRAY].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_RX_ARRAY].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_RX_ARRAY].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_RX_ARRAY].res_u.mem_info.len = + BFI_MAX_RXQ * sizeof(struct bna_rx); + + /* Virtual memory for RxPath - stored by Rx module */ + res_info[BNA_RES_MEM_T_RXP_ARRAY].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_RXP_ARRAY].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_RXP_ARRAY].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_RXP_ARRAY].res_u.mem_info.len = + BFI_MAX_RXQ * sizeof(struct bna_rxp); + + /* Virtual memory for RxQ - stored by Rx module */ + res_info[BNA_RES_MEM_T_RXQ_ARRAY].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_RXQ_ARRAY].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_RXQ_ARRAY].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_RXQ_ARRAY].res_u.mem_info.len = + BFI_MAX_RXQ * sizeof(struct bna_rxq); + + /* Virtual memory for Unicast MAC address - stored by ucam module */ + res_info[BNA_RES_MEM_T_UCMAC_ARRAY].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_UCMAC_ARRAY].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_UCMAC_ARRAY].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_UCMAC_ARRAY].res_u.mem_info.len = + BFI_MAX_UCMAC * sizeof(struct bna_mac); + + /* Virtual memory for Multicast MAC address - stored by mcam module */ + res_info[BNA_RES_MEM_T_MCMAC_ARRAY].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_MCMAC_ARRAY].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_MCMAC_ARRAY].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_MCMAC_ARRAY].res_u.mem_info.len = + BFI_MAX_MCMAC * sizeof(struct bna_mac); + + /* Virtual memory for RIT entries */ + res_info[BNA_RES_MEM_T_RIT_ENTRY].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_RIT_ENTRY].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_RIT_ENTRY].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_RIT_ENTRY].res_u.mem_info.len = + BFI_MAX_RIT_SIZE * sizeof(struct bna_rit_entry); + + /* Virtual memory for RIT segment table */ + res_info[BNA_RES_MEM_T_RIT_SEGMENT].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_RIT_SEGMENT].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_RIT_SEGMENT].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_RIT_SEGMENT].res_u.mem_info.len = + BFI_RIT_TOTAL_SEGS * sizeof(struct bna_rit_segment); + + /* Interrupt resource for mailbox interrupt */ + res_info[BNA_RES_INTR_T_MBOX].res_type = BNA_RES_T_INTR; + res_info[BNA_RES_INTR_T_MBOX].res_u.intr_info.intr_type = + BNA_INTR_T_MSIX; + res_info[BNA_RES_INTR_T_MBOX].res_u.intr_info.num = 1; +} + +/* Called during probe() */ +void +bna_init(struct bna *bna, struct bnad *bnad, struct bfa_pcidev *pcidev, + struct bna_res_info *res_info) +{ + bna->bnad = bnad; + bna->pcidev = *pcidev; + + bna->stats.hw_stats = (struct bfi_ll_stats *) + res_info[BNA_RES_MEM_T_STATS].res_u.mem_info.mdl[0].kva; + bna->hw_stats_dma.msb = + res_info[BNA_RES_MEM_T_STATS].res_u.mem_info.mdl[0].dma.msb; + bna->hw_stats_dma.lsb = + res_info[BNA_RES_MEM_T_STATS].res_u.mem_info.mdl[0].dma.lsb; + bna->stats.sw_stats = (struct bna_sw_stats *) + res_info[BNA_RES_MEM_T_SWSTATS].res_u.mem_info.mdl[0].kva; + + bna->regs.page_addr = bna->pcidev.pci_bar_kva + + reg_offset[bna->pcidev.pci_func].page_addr; + bna->regs.fn_int_status = bna->pcidev.pci_bar_kva + + reg_offset[bna->pcidev.pci_func].fn_int_status; + bna->regs.fn_int_mask = bna->pcidev.pci_bar_kva + + reg_offset[bna->pcidev.pci_func].fn_int_mask; + + if (bna->pcidev.pci_func < 3) + bna->port_num = 0; + else + bna->port_num = 1; + + /* Also initializes diag, cee, sfp, phy_port and mbox_mod */ + bna_device_init(&bna->device, bna, res_info); + + bna_port_init(&bna->port, bna); + + bna_tx_mod_init(&bna->tx_mod, bna, res_info); + + bna_rx_mod_init(&bna->rx_mod, bna, res_info); + + bna_ib_mod_init(&bna->ib_mod, bna, res_info); + + bna_rit_mod_init(&bna->rit_mod, res_info); + + bna_ucam_mod_init(&bna->ucam_mod, bna, res_info); + + bna_mcam_mod_init(&bna->mcam_mod, bna, res_info); + + bna->rxf_default_id = BFI_MAX_RXF; + bna->rxf_promisc_id = BFI_MAX_RXF; + + /* Mbox q element for posting stat request to f/w */ + bfa_q_qe_init(&bna->mbox_qe.qe); +} + +void +bna_uninit(struct bna *bna) +{ + bna_mcam_mod_uninit(&bna->mcam_mod); + + bna_ucam_mod_uninit(&bna->ucam_mod); + + bna_rit_mod_uninit(&bna->rit_mod); + + bna_ib_mod_uninit(&bna->ib_mod); + + bna_rx_mod_uninit(&bna->rx_mod); + + bna_tx_mod_uninit(&bna->tx_mod); + + bna_port_uninit(&bna->port); + + bna_device_uninit(&bna->device); + + bna->bnad = NULL; +} + +struct bna_mac * +bna_ucam_mod_mac_get(struct bna_ucam_mod *ucam_mod) +{ + struct list_head *qe; + + if (list_empty(&ucam_mod->free_q)) + return NULL; + + bfa_q_deq(&ucam_mod->free_q, &qe); + + return (struct bna_mac *)qe; +} + +void +bna_ucam_mod_mac_put(struct bna_ucam_mod *ucam_mod, struct bna_mac *mac) +{ + list_add_tail(&mac->qe, &ucam_mod->free_q); +} + +struct bna_mac * +bna_mcam_mod_mac_get(struct bna_mcam_mod *mcam_mod) +{ + struct list_head *qe; + + if (list_empty(&mcam_mod->free_q)) + return NULL; + + bfa_q_deq(&mcam_mod->free_q, &qe); + + return (struct bna_mac *)qe; +} + +void +bna_mcam_mod_mac_put(struct bna_mcam_mod *mcam_mod, struct bna_mac *mac) +{ + list_add_tail(&mac->qe, &mcam_mod->free_q); +} + +/** + * Note: This should be called in the same locking context as the call to + * bna_rit_mod_seg_get() + */ +int +bna_rit_mod_can_satisfy(struct bna_rit_mod *rit_mod, int seg_size) +{ + int i; + + /* Select the pool for seg_size */ + for (i = 0; i < BFI_RIT_SEG_TOTAL_POOLS; i++) { + if (seg_size <= ritseg_pool_cfg[i].pool_entry_size) + break; + } + + if (i == BFI_RIT_SEG_TOTAL_POOLS) + return 0; + + if (list_empty(&rit_mod->rit_seg_pool[i])) + return 0; + + return 1; +} + +struct bna_rit_segment * +bna_rit_mod_seg_get(struct bna_rit_mod *rit_mod, int seg_size) +{ + struct bna_rit_segment *seg; + struct list_head *qe; + int i; + + /* Select the pool for seg_size */ + for (i = 0; i < BFI_RIT_SEG_TOTAL_POOLS; i++) { + if (seg_size <= ritseg_pool_cfg[i].pool_entry_size) + break; + } + + if (i == BFI_RIT_SEG_TOTAL_POOLS) + return NULL; + + if (list_empty(&rit_mod->rit_seg_pool[i])) + return NULL; + + bfa_q_deq(&rit_mod->rit_seg_pool[i], &qe); + seg = (struct bna_rit_segment *)qe; + bfa_q_qe_init(&seg->qe); + seg->rit_size = seg_size; + + return seg; +} + +void +bna_rit_mod_seg_put(struct bna_rit_mod *rit_mod, + struct bna_rit_segment *seg) +{ + int i; + + /* Select the pool for seg->max_rit_size */ + for (i = 0; i < BFI_RIT_SEG_TOTAL_POOLS; i++) { + if (seg->max_rit_size == ritseg_pool_cfg[i].pool_entry_size) + break; + } + + seg->rit_size = 0; + list_add_tail(&seg->qe, &rit_mod->rit_seg_pool[i]); +} diff --git a/drivers/net/bna/bna_hw.h b/drivers/net/bna/bna_hw.h new file mode 100644 index 000000000000..67eb376c5c7e --- /dev/null +++ b/drivers/net/bna/bna_hw.h @@ -0,0 +1,1491 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + * + * File for interrupt macros and functions + */ + +#ifndef __BNA_HW_H__ +#define __BNA_HW_H__ + +#include "bfi_ctreg.h" + +/** + * + * SW imposed limits + * + */ + +#ifndef BNA_BIOS_BUILD + +#define BFI_MAX_TXQ 64 +#define BFI_MAX_RXQ 64 +#define BFI_MAX_RXF 64 +#define BFI_MAX_IB 128 +#define BFI_MAX_RIT_SIZE 256 +#define BFI_RSS_RIT_SIZE 64 +#define BFI_NONRSS_RIT_SIZE 1 +#define BFI_MAX_UCMAC 256 +#define BFI_MAX_MCMAC 512 +#define BFI_IBIDX_SIZE 4 +#define BFI_MAX_VLAN 4095 + +/** + * There are 2 free IB index pools: + * pool1: 120 segments of 1 index each + * pool8: 1 segment of 8 indexes + */ +#define BFI_IBIDX_POOL1_SIZE 116 +#define BFI_IBIDX_POOL1_ENTRY_SIZE 1 +#define BFI_IBIDX_POOL2_SIZE 2 +#define BFI_IBIDX_POOL2_ENTRY_SIZE 2 +#define BFI_IBIDX_POOL8_SIZE 1 +#define BFI_IBIDX_POOL8_ENTRY_SIZE 8 +#define BFI_IBIDX_TOTAL_POOLS 3 +#define BFI_IBIDX_TOTAL_SEGS 119 /* (POOL1 + POOL2 + POOL8)_SIZE */ +#define BFI_IBIDX_MAX_SEGSIZE 8 +#define init_ibidx_pool(name) \ +static struct bna_ibidx_pool name[BFI_IBIDX_TOTAL_POOLS] = \ +{ \ + { BFI_IBIDX_POOL1_SIZE, BFI_IBIDX_POOL1_ENTRY_SIZE }, \ + { BFI_IBIDX_POOL2_SIZE, BFI_IBIDX_POOL2_ENTRY_SIZE }, \ + { BFI_IBIDX_POOL8_SIZE, BFI_IBIDX_POOL8_ENTRY_SIZE } \ +} + +/** + * There are 2 free RIT segment pools: + * Pool1: 192 segments of 1 RIT entry each + * Pool2: 1 segment of 64 RIT entry + */ +#define BFI_RIT_SEG_POOL1_SIZE 192 +#define BFI_RIT_SEG_POOL1_ENTRY_SIZE 1 +#define BFI_RIT_SEG_POOLRSS_SIZE 1 +#define BFI_RIT_SEG_POOLRSS_ENTRY_SIZE 64 +#define BFI_RIT_SEG_TOTAL_POOLS 2 +#define BFI_RIT_TOTAL_SEGS 193 /* POOL1_SIZE + POOLRSS_SIZE */ +#define init_ritseg_pool(name) \ +static struct bna_ritseg_pool_cfg name[BFI_RIT_SEG_TOTAL_POOLS] = \ +{ \ + { BFI_RIT_SEG_POOL1_SIZE, BFI_RIT_SEG_POOL1_ENTRY_SIZE }, \ + { BFI_RIT_SEG_POOLRSS_SIZE, BFI_RIT_SEG_POOLRSS_ENTRY_SIZE } \ +} + +#else /* BNA_BIOS_BUILD */ + +#define BFI_MAX_TXQ 1 +#define BFI_MAX_RXQ 1 +#define BFI_MAX_RXF 1 +#define BFI_MAX_IB 2 +#define BFI_MAX_RIT_SIZE 2 +#define BFI_RSS_RIT_SIZE 64 +#define BFI_NONRSS_RIT_SIZE 1 +#define BFI_MAX_UCMAC 1 +#define BFI_MAX_MCMAC 8 +#define BFI_IBIDX_SIZE 4 +#define BFI_MAX_VLAN 4095 +/* There is one free pool: 2 segments of 1 index each */ +#define BFI_IBIDX_POOL1_SIZE 2 +#define BFI_IBIDX_POOL1_ENTRY_SIZE 1 +#define BFI_IBIDX_TOTAL_POOLS 1 +#define BFI_IBIDX_TOTAL_SEGS 2 /* POOL1_SIZE */ +#define BFI_IBIDX_MAX_SEGSIZE 1 +#define init_ibidx_pool(name) \ +static struct bna_ibidx_pool name[BFI_IBIDX_TOTAL_POOLS] = \ +{ \ + { BFI_IBIDX_POOL1_SIZE, BFI_IBIDX_POOL1_ENTRY_SIZE } \ +} + +#define BFI_RIT_SEG_POOL1_SIZE 1 +#define BFI_RIT_SEG_POOL1_ENTRY_SIZE 1 +#define BFI_RIT_SEG_TOTAL_POOLS 1 +#define BFI_RIT_TOTAL_SEGS 1 /* POOL1_SIZE */ +#define init_ritseg_pool(name) \ +static struct bna_ritseg_pool_cfg name[BFI_RIT_SEG_TOTAL_POOLS] = \ +{ \ + { BFI_RIT_SEG_POOL1_SIZE, BFI_RIT_SEG_POOL1_ENTRY_SIZE } \ +} + +#endif /* BNA_BIOS_BUILD */ + +#define BFI_RSS_HASH_KEY_LEN 10 + +#define BFI_COALESCING_TIMER_UNIT 5 /* 5us */ +#define BFI_MAX_COALESCING_TIMEO 0xFF /* in 5us units */ +#define BFI_MAX_INTERPKT_COUNT 0xFF +#define BFI_MAX_INTERPKT_TIMEO 0xF /* in 0.5us units */ +#define BFI_TX_COALESCING_TIMEO 20 /* 20 * 5 = 100us */ +#define BFI_TX_INTERPKT_COUNT 32 +#define BFI_RX_COALESCING_TIMEO 12 /* 12 * 5 = 60us */ +#define BFI_RX_INTERPKT_COUNT 6 /* Pkt Cnt = 6 */ +#define BFI_RX_INTERPKT_TIMEO 3 /* 3 * 0.5 = 1.5us */ + +#define BFI_TXQ_WI_SIZE 64 /* bytes */ +#define BFI_RXQ_WI_SIZE 8 /* bytes */ +#define BFI_CQ_WI_SIZE 16 /* bytes */ +#define BFI_TX_MAX_WRR_QUOTA 0xFFF + +#define BFI_TX_MAX_VECTORS_PER_WI 4 +#define BFI_TX_MAX_VECTORS_PER_PKT 0xFF +#define BFI_TX_MAX_DATA_PER_VECTOR 0xFFFF +#define BFI_TX_MAX_DATA_PER_PKT 0xFFFFFF + +/* Small Q buffer size */ +#define BFI_SMALL_RXBUF_SIZE 128 + +/* Defined separately since BFA_FLASH_DMA_BUF_SZ is in bfa_flash.c */ +#define BFI_FLASH_DMA_BUF_SZ 0x010000 /* 64K DMA */ +#define BFI_HW_STATS_SIZE 0x4000 /* 16K DMA */ + +/** + * + * HW register offsets, macros + * + */ + +/* DMA Block Register Host Window Start Address */ +#define DMA_BLK_REG_ADDR 0x00013000 + +/* DMA Block Internal Registers */ +#define DMA_CTRL_REG0 (DMA_BLK_REG_ADDR + 0x000) +#define DMA_CTRL_REG1 (DMA_BLK_REG_ADDR + 0x004) +#define DMA_ERR_INT_STATUS (DMA_BLK_REG_ADDR + 0x008) +#define DMA_ERR_INT_ENABLE (DMA_BLK_REG_ADDR + 0x00c) +#define DMA_ERR_INT_STATUS_SET (DMA_BLK_REG_ADDR + 0x010) + +/* APP Block Register Address Offset from BAR0 */ +#define APP_BLK_REG_ADDR 0x00014000 + +/* Host Function Interrupt Mask Registers */ +#define HOSTFN0_INT_MASK (APP_BLK_REG_ADDR + 0x004) +#define HOSTFN1_INT_MASK (APP_BLK_REG_ADDR + 0x104) +#define HOSTFN2_INT_MASK (APP_BLK_REG_ADDR + 0x304) +#define HOSTFN3_INT_MASK (APP_BLK_REG_ADDR + 0x404) + +/** + * Host Function PCIe Error Registers + * Duplicates "Correctable" & "Uncorrectable" + * registers in PCIe Config space. + */ +#define FN0_PCIE_ERR_REG (APP_BLK_REG_ADDR + 0x014) +#define FN1_PCIE_ERR_REG (APP_BLK_REG_ADDR + 0x114) +#define FN2_PCIE_ERR_REG (APP_BLK_REG_ADDR + 0x314) +#define FN3_PCIE_ERR_REG (APP_BLK_REG_ADDR + 0x414) + +/* Host Function Error Type Status Registers */ +#define FN0_ERR_TYPE_STATUS_REG (APP_BLK_REG_ADDR + 0x018) +#define FN1_ERR_TYPE_STATUS_REG (APP_BLK_REG_ADDR + 0x118) +#define FN2_ERR_TYPE_STATUS_REG (APP_BLK_REG_ADDR + 0x318) +#define FN3_ERR_TYPE_STATUS_REG (APP_BLK_REG_ADDR + 0x418) + +/* Host Function Error Type Mask Registers */ +#define FN0_ERR_TYPE_MSK_STATUS_REG (APP_BLK_REG_ADDR + 0x01c) +#define FN1_ERR_TYPE_MSK_STATUS_REG (APP_BLK_REG_ADDR + 0x11c) +#define FN2_ERR_TYPE_MSK_STATUS_REG (APP_BLK_REG_ADDR + 0x31c) +#define FN3_ERR_TYPE_MSK_STATUS_REG (APP_BLK_REG_ADDR + 0x41c) + +/* Catapult Host Semaphore Status Registers (App block) */ +#define HOST_SEM_STS0_REG (APP_BLK_REG_ADDR + 0x630) +#define HOST_SEM_STS1_REG (APP_BLK_REG_ADDR + 0x634) +#define HOST_SEM_STS2_REG (APP_BLK_REG_ADDR + 0x638) +#define HOST_SEM_STS3_REG (APP_BLK_REG_ADDR + 0x63c) +#define HOST_SEM_STS4_REG (APP_BLK_REG_ADDR + 0x640) +#define HOST_SEM_STS5_REG (APP_BLK_REG_ADDR + 0x644) +#define HOST_SEM_STS6_REG (APP_BLK_REG_ADDR + 0x648) +#define HOST_SEM_STS7_REG (APP_BLK_REG_ADDR + 0x64c) + +/* PCIe Misc Register */ +#define PCIE_MISC_REG (APP_BLK_REG_ADDR + 0x200) + +/* Temp Sensor Control Registers */ +#define TEMPSENSE_CNTL_REG (APP_BLK_REG_ADDR + 0x250) +#define TEMPSENSE_STAT_REG (APP_BLK_REG_ADDR + 0x254) + +/* APP Block local error registers */ +#define APP_LOCAL_ERR_STAT (APP_BLK_REG_ADDR + 0x258) +#define APP_LOCAL_ERR_MSK (APP_BLK_REG_ADDR + 0x25c) + +/* PCIe Link Error registers */ +#define PCIE_LNK_ERR_STAT (APP_BLK_REG_ADDR + 0x260) +#define PCIE_LNK_ERR_MSK (APP_BLK_REG_ADDR + 0x264) + +/** + * FCoE/FIP Ethertype Register + * 31:16 -- Chip wide value for FIP type + * 15:0 -- Chip wide value for FCoE type + */ +#define FCOE_FIP_ETH_TYPE (APP_BLK_REG_ADDR + 0x280) + +/** + * Reserved Ethertype Register + * 31:16 -- Reserved + * 15:0 -- Other ethertype + */ +#define RESV_ETH_TYPE (APP_BLK_REG_ADDR + 0x284) + +/** + * Host Command Status Registers + * Each set consists of 3 registers : + * clear, set, cmd + * 16 such register sets in all + * See catapult_spec.pdf for detailed functionality + * Put each type in a single macro accessed by _num ? + */ +#define HOST_CMDSTS0_CLR_REG (APP_BLK_REG_ADDR + 0x500) +#define HOST_CMDSTS0_SET_REG (APP_BLK_REG_ADDR + 0x504) +#define HOST_CMDSTS0_REG (APP_BLK_REG_ADDR + 0x508) +#define HOST_CMDSTS1_CLR_REG (APP_BLK_REG_ADDR + 0x510) +#define HOST_CMDSTS1_SET_REG (APP_BLK_REG_ADDR + 0x514) +#define HOST_CMDSTS1_REG (APP_BLK_REG_ADDR + 0x518) +#define HOST_CMDSTS2_CLR_REG (APP_BLK_REG_ADDR + 0x520) +#define HOST_CMDSTS2_SET_REG (APP_BLK_REG_ADDR + 0x524) +#define HOST_CMDSTS2_REG (APP_BLK_REG_ADDR + 0x528) +#define HOST_CMDSTS3_CLR_REG (APP_BLK_REG_ADDR + 0x530) +#define HOST_CMDSTS3_SET_REG (APP_BLK_REG_ADDR + 0x534) +#define HOST_CMDSTS3_REG (APP_BLK_REG_ADDR + 0x538) +#define HOST_CMDSTS4_CLR_REG (APP_BLK_REG_ADDR + 0x540) +#define HOST_CMDSTS4_SET_REG (APP_BLK_REG_ADDR + 0x544) +#define HOST_CMDSTS4_REG (APP_BLK_REG_ADDR + 0x548) +#define HOST_CMDSTS5_CLR_REG (APP_BLK_REG_ADDR + 0x550) +#define HOST_CMDSTS5_SET_REG (APP_BLK_REG_ADDR + 0x554) +#define HOST_CMDSTS5_REG (APP_BLK_REG_ADDR + 0x558) +#define HOST_CMDSTS6_CLR_REG (APP_BLK_REG_ADDR + 0x560) +#define HOST_CMDSTS6_SET_REG (APP_BLK_REG_ADDR + 0x564) +#define HOST_CMDSTS6_REG (APP_BLK_REG_ADDR + 0x568) +#define HOST_CMDSTS7_CLR_REG (APP_BLK_REG_ADDR + 0x570) +#define HOST_CMDSTS7_SET_REG (APP_BLK_REG_ADDR + 0x574) +#define HOST_CMDSTS7_REG (APP_BLK_REG_ADDR + 0x578) +#define HOST_CMDSTS8_CLR_REG (APP_BLK_REG_ADDR + 0x580) +#define HOST_CMDSTS8_SET_REG (APP_BLK_REG_ADDR + 0x584) +#define HOST_CMDSTS8_REG (APP_BLK_REG_ADDR + 0x588) +#define HOST_CMDSTS9_CLR_REG (APP_BLK_REG_ADDR + 0x590) +#define HOST_CMDSTS9_SET_REG (APP_BLK_REG_ADDR + 0x594) +#define HOST_CMDSTS9_REG (APP_BLK_REG_ADDR + 0x598) +#define HOST_CMDSTS10_CLR_REG (APP_BLK_REG_ADDR + 0x5A0) +#define HOST_CMDSTS10_SET_REG (APP_BLK_REG_ADDR + 0x5A4) +#define HOST_CMDSTS10_REG (APP_BLK_REG_ADDR + 0x5A8) +#define HOST_CMDSTS11_CLR_REG (APP_BLK_REG_ADDR + 0x5B0) +#define HOST_CMDSTS11_SET_REG (APP_BLK_REG_ADDR + 0x5B4) +#define HOST_CMDSTS11_REG (APP_BLK_REG_ADDR + 0x5B8) +#define HOST_CMDSTS12_CLR_REG (APP_BLK_REG_ADDR + 0x5C0) +#define HOST_CMDSTS12_SET_REG (APP_BLK_REG_ADDR + 0x5C4) +#define HOST_CMDSTS12_REG (APP_BLK_REG_ADDR + 0x5C8) +#define HOST_CMDSTS13_CLR_REG (APP_BLK_REG_ADDR + 0x5D0) +#define HOST_CMDSTS13_SET_REG (APP_BLK_REG_ADDR + 0x5D4) +#define HOST_CMDSTS13_REG (APP_BLK_REG_ADDR + 0x5D8) +#define HOST_CMDSTS14_CLR_REG (APP_BLK_REG_ADDR + 0x5E0) +#define HOST_CMDSTS14_SET_REG (APP_BLK_REG_ADDR + 0x5E4) +#define HOST_CMDSTS14_REG (APP_BLK_REG_ADDR + 0x5E8) +#define HOST_CMDSTS15_CLR_REG (APP_BLK_REG_ADDR + 0x5F0) +#define HOST_CMDSTS15_SET_REG (APP_BLK_REG_ADDR + 0x5F4) +#define HOST_CMDSTS15_REG (APP_BLK_REG_ADDR + 0x5F8) + +/** + * LPU0 Block Register Address Offset from BAR0 + * Range 0x18000 - 0x18033 + */ +#define LPU0_BLK_REG_ADDR 0x00018000 + +/** + * LPU0 Registers + * Should they be directly used from host, + * except for diagnostics ? + * CTL_REG : Control register + * CMD_REG : Triggers exec. of cmd. in + * Mailbox memory + */ +#define LPU0_MBOX_CTL_REG (LPU0_BLK_REG_ADDR + 0x000) +#define LPU0_MBOX_CMD_REG (LPU0_BLK_REG_ADDR + 0x004) +#define LPU0_MBOX_LINK_0REG (LPU0_BLK_REG_ADDR + 0x008) +#define LPU1_MBOX_LINK_0REG (LPU0_BLK_REG_ADDR + 0x00c) +#define LPU0_MBOX_STATUS_0REG (LPU0_BLK_REG_ADDR + 0x010) +#define LPU1_MBOX_STATUS_0REG (LPU0_BLK_REG_ADDR + 0x014) +#define LPU0_ERR_STATUS_REG (LPU0_BLK_REG_ADDR + 0x018) +#define LPU0_ERR_SET_REG (LPU0_BLK_REG_ADDR + 0x020) + +/** + * LPU1 Block Register Address Offset from BAR0 + * Range 0x18400 - 0x18433 + */ +#define LPU1_BLK_REG_ADDR 0x00018400 + +/** + * LPU1 Registers + * Same as LPU0 registers above + */ +#define LPU1_MBOX_CTL_REG (LPU1_BLK_REG_ADDR + 0x000) +#define LPU1_MBOX_CMD_REG (LPU1_BLK_REG_ADDR + 0x004) +#define LPU0_MBOX_LINK_1REG (LPU1_BLK_REG_ADDR + 0x008) +#define LPU1_MBOX_LINK_1REG (LPU1_BLK_REG_ADDR + 0x00c) +#define LPU0_MBOX_STATUS_1REG (LPU1_BLK_REG_ADDR + 0x010) +#define LPU1_MBOX_STATUS_1REG (LPU1_BLK_REG_ADDR + 0x014) +#define LPU1_ERR_STATUS_REG (LPU1_BLK_REG_ADDR + 0x018) +#define LPU1_ERR_SET_REG (LPU1_BLK_REG_ADDR + 0x020) + +/** + * PSS Block Register Address Offset from BAR0 + * Range 0x18800 - 0x188DB + */ +#define PSS_BLK_REG_ADDR 0x00018800 + +/** + * PSS Registers + * For details, see catapult_spec.pdf + * ERR_STATUS_REG : Indicates error in PSS module + * RAM_ERR_STATUS_REG : Indicates RAM module that detected error + */ +#define ERR_STATUS_SET (PSS_BLK_REG_ADDR + 0x018) +#define PSS_RAM_ERR_STATUS_REG (PSS_BLK_REG_ADDR + 0x01C) + +/** + * PSS Semaphore Lock Registers, total 16 + * First read when unlocked returns 0, + * and is set to 1, atomically. + * Subsequent reads returns 1. + * To clear set the value to 0. + * Range : 0x20 to 0x5c + */ +#define PSS_SEM_LOCK_REG(_num) \ + (PSS_BLK_REG_ADDR + 0x020 + ((_num) << 2)) + +/** + * PSS Semaphore Status Registers, + * corresponding to the lock registers above + */ +#define PSS_SEM_STATUS_REG(_num) \ + (PSS_BLK_REG_ADDR + 0x060 + ((_num) << 2)) + +/** + * Catapult CPQ Registers + * Defines for Mailbox Registers + * Used to send mailbox commands to firmware from + * host. The data part is written to the MBox + * memory, registers are used to indicate that + * a commnad is resident in memory. + * + * Note : LPU0<->LPU1 mailboxes are not listed here + */ +#define CPQ_BLK_REG_ADDR 0x00019000 + +#define HOSTFN0_LPU0_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x130) +#define HOSTFN0_LPU1_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x134) +#define LPU0_HOSTFN0_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x138) +#define LPU1_HOSTFN0_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x13C) + +#define HOSTFN1_LPU0_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x140) +#define HOSTFN1_LPU1_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x144) +#define LPU0_HOSTFN1_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x148) +#define LPU1_HOSTFN1_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x14C) + +#define HOSTFN2_LPU0_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x170) +#define HOSTFN2_LPU1_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x174) +#define LPU0_HOSTFN2_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x178) +#define LPU1_HOSTFN2_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x17C) + +#define HOSTFN3_LPU0_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x180) +#define HOSTFN3_LPU1_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x184) +#define LPU0_HOSTFN3_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x188) +#define LPU1_HOSTFN3_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x18C) + +/* Host Function Force Parity Error Registers */ +#define HOSTFN0_LPU_FORCE_PERR (CPQ_BLK_REG_ADDR + 0x120) +#define HOSTFN1_LPU_FORCE_PERR (CPQ_BLK_REG_ADDR + 0x124) +#define HOSTFN2_LPU_FORCE_PERR (CPQ_BLK_REG_ADDR + 0x128) +#define HOSTFN3_LPU_FORCE_PERR (CPQ_BLK_REG_ADDR + 0x12C) + +/* LL Port[0|1] Halt Mask Registers */ +#define LL_HALT_MSK_P0 (CPQ_BLK_REG_ADDR + 0x1A0) +#define LL_HALT_MSK_P1 (CPQ_BLK_REG_ADDR + 0x1B0) + +/* LL Port[0|1] Error Mask Registers */ +#define LL_ERR_MSK_P0 (CPQ_BLK_REG_ADDR + 0x1D0) +#define LL_ERR_MSK_P1 (CPQ_BLK_REG_ADDR + 0x1D4) + +/* EMC FLI (Flash Controller) Block Register Address Offset from BAR0 */ +#define FLI_BLK_REG_ADDR 0x0001D000 + +/* EMC FLI Registers */ +#define FLI_CMD_REG (FLI_BLK_REG_ADDR + 0x000) +#define FLI_ADDR_REG (FLI_BLK_REG_ADDR + 0x004) +#define FLI_CTL_REG (FLI_BLK_REG_ADDR + 0x008) +#define FLI_WRDATA_REG (FLI_BLK_REG_ADDR + 0x00C) +#define FLI_RDDATA_REG (FLI_BLK_REG_ADDR + 0x010) +#define FLI_DEV_STATUS_REG (FLI_BLK_REG_ADDR + 0x014) +#define FLI_SIG_WD_REG (FLI_BLK_REG_ADDR + 0x018) + +/** + * RO register + * 31:16 -- Vendor Id + * 15:0 -- Device Id + */ +#define FLI_DEV_VENDOR_REG (FLI_BLK_REG_ADDR + 0x01C) +#define FLI_ERR_STATUS_REG (FLI_BLK_REG_ADDR + 0x020) + +/** + * RAD (RxAdm) Block Register Address Offset from BAR0 + * RAD0 Range : 0x20000 - 0x203FF + * RAD1 Range : 0x20400 - 0x207FF + */ +#define RAD0_BLK_REG_ADDR 0x00020000 +#define RAD1_BLK_REG_ADDR 0x00020400 + +/* RAD0 Registers */ +#define RAD0_CTL_REG (RAD0_BLK_REG_ADDR + 0x000) +#define RAD0_PE_PARM_REG (RAD0_BLK_REG_ADDR + 0x004) +#define RAD0_BCN_REG (RAD0_BLK_REG_ADDR + 0x008) + +/* Default function ID register */ +#define RAD0_DEFAULT_REG (RAD0_BLK_REG_ADDR + 0x00C) + +/* Default promiscuous ID register */ +#define RAD0_PROMISC_REG (RAD0_BLK_REG_ADDR + 0x010) + +#define RAD0_BCNQ_REG (RAD0_BLK_REG_ADDR + 0x014) + +/* + * This register selects 1 of 8 PM Q's using + * VLAN pri, for non-BCN packets without a VLAN tag + */ +#define RAD0_DEFAULTQ_REG (RAD0_BLK_REG_ADDR + 0x018) + +#define RAD0_ERR_STS (RAD0_BLK_REG_ADDR + 0x01C) +#define RAD0_SET_ERR_STS (RAD0_BLK_REG_ADDR + 0x020) +#define RAD0_ERR_INT_EN (RAD0_BLK_REG_ADDR + 0x024) +#define RAD0_FIRST_ERR (RAD0_BLK_REG_ADDR + 0x028) +#define RAD0_FORCE_ERR (RAD0_BLK_REG_ADDR + 0x02C) + +#define RAD0_IF_RCVD (RAD0_BLK_REG_ADDR + 0x030) +#define RAD0_IF_RCVD_OCTETS_HIGH (RAD0_BLK_REG_ADDR + 0x034) +#define RAD0_IF_RCVD_OCTETS_LOW (RAD0_BLK_REG_ADDR + 0x038) +#define RAD0_IF_RCVD_VLAN (RAD0_BLK_REG_ADDR + 0x03C) +#define RAD0_IF_RCVD_UCAST (RAD0_BLK_REG_ADDR + 0x040) +#define RAD0_IF_RCVD_UCAST_OCTETS_HIGH (RAD0_BLK_REG_ADDR + 0x044) +#define RAD0_IF_RCVD_UCAST_OCTETS_LOW (RAD0_BLK_REG_ADDR + 0x048) +#define RAD0_IF_RCVD_UCAST_VLAN (RAD0_BLK_REG_ADDR + 0x04C) +#define RAD0_IF_RCVD_MCAST (RAD0_BLK_REG_ADDR + 0x050) +#define RAD0_IF_RCVD_MCAST_OCTETS_HIGH (RAD0_BLK_REG_ADDR + 0x054) +#define RAD0_IF_RCVD_MCAST_OCTETS_LOW (RAD0_BLK_REG_ADDR + 0x058) +#define RAD0_IF_RCVD_MCAST_VLAN (RAD0_BLK_REG_ADDR + 0x05C) +#define RAD0_IF_RCVD_BCAST (RAD0_BLK_REG_ADDR + 0x060) +#define RAD0_IF_RCVD_BCAST_OCTETS_HIGH (RAD0_BLK_REG_ADDR + 0x064) +#define RAD0_IF_RCVD_BCAST_OCTETS_LOW (RAD0_BLK_REG_ADDR + 0x068) +#define RAD0_IF_RCVD_BCAST_VLAN (RAD0_BLK_REG_ADDR + 0x06C) +#define RAD0_DROPPED_FRAMES (RAD0_BLK_REG_ADDR + 0x070) + +#define RAD0_MAC_MAN_1H (RAD0_BLK_REG_ADDR + 0x080) +#define RAD0_MAC_MAN_1L (RAD0_BLK_REG_ADDR + 0x084) +#define RAD0_MAC_MAN_2H (RAD0_BLK_REG_ADDR + 0x088) +#define RAD0_MAC_MAN_2L (RAD0_BLK_REG_ADDR + 0x08C) +#define RAD0_MAC_MAN_3H (RAD0_BLK_REG_ADDR + 0x090) +#define RAD0_MAC_MAN_3L (RAD0_BLK_REG_ADDR + 0x094) +#define RAD0_MAC_MAN_4H (RAD0_BLK_REG_ADDR + 0x098) +#define RAD0_MAC_MAN_4L (RAD0_BLK_REG_ADDR + 0x09C) + +#define RAD0_LAST4_IP (RAD0_BLK_REG_ADDR + 0x100) + +/* RAD1 Registers */ +#define RAD1_CTL_REG (RAD1_BLK_REG_ADDR + 0x000) +#define RAD1_PE_PARM_REG (RAD1_BLK_REG_ADDR + 0x004) +#define RAD1_BCN_REG (RAD1_BLK_REG_ADDR + 0x008) + +/* Default function ID register */ +#define RAD1_DEFAULT_REG (RAD1_BLK_REG_ADDR + 0x00C) + +/* Promiscuous function ID register */ +#define RAD1_PROMISC_REG (RAD1_BLK_REG_ADDR + 0x010) + +#define RAD1_BCNQ_REG (RAD1_BLK_REG_ADDR + 0x014) + +/* + * This register selects 1 of 8 PM Q's using + * VLAN pri, for non-BCN packets without a VLAN tag + */ +#define RAD1_DEFAULTQ_REG (RAD1_BLK_REG_ADDR + 0x018) + +#define RAD1_ERR_STS (RAD1_BLK_REG_ADDR + 0x01C) +#define RAD1_SET_ERR_STS (RAD1_BLK_REG_ADDR + 0x020) +#define RAD1_ERR_INT_EN (RAD1_BLK_REG_ADDR + 0x024) + +/** + * TXA Block Register Address Offset from BAR0 + * TXA0 Range : 0x21000 - 0x213FF + * TXA1 Range : 0x21400 - 0x217FF + */ +#define TXA0_BLK_REG_ADDR 0x00021000 +#define TXA1_BLK_REG_ADDR 0x00021400 + +/* TXA Registers */ +#define TXA0_CTRL_REG (TXA0_BLK_REG_ADDR + 0x000) +#define TXA1_CTRL_REG (TXA1_BLK_REG_ADDR + 0x000) + +/** + * TSO Sequence # Registers (RO) + * Total 8 (for 8 queues) + * Holds the last seq.# for TSO frames + * See catapult_spec.pdf for more details + */ +#define TXA0_TSO_TCP_SEQ_REG(_num) \ + (TXA0_BLK_REG_ADDR + 0x020 + ((_num) << 2)) + +#define TXA1_TSO_TCP_SEQ_REG(_num) \ + (TXA1_BLK_REG_ADDR + 0x020 + ((_num) << 2)) + +/** + * TSO IP ID # Registers (RO) + * Total 8 (for 8 queues) + * Holds the last IP ID for TSO frames + * See catapult_spec.pdf for more details + */ +#define TXA0_TSO_IP_INFO_REG(_num) \ + (TXA0_BLK_REG_ADDR + 0x040 + ((_num) << 2)) + +#define TXA1_TSO_IP_INFO_REG(_num) \ + (TXA1_BLK_REG_ADDR + 0x040 + ((_num) << 2)) + +/** + * RXA Block Register Address Offset from BAR0 + * RXA0 Range : 0x21800 - 0x21BFF + * RXA1 Range : 0x21C00 - 0x21FFF + */ +#define RXA0_BLK_REG_ADDR 0x00021800 +#define RXA1_BLK_REG_ADDR 0x00021C00 + +/* RXA Registers */ +#define RXA0_CTL_REG (RXA0_BLK_REG_ADDR + 0x040) +#define RXA1_CTL_REG (RXA1_BLK_REG_ADDR + 0x040) + +/** + * PPLB Block Register Address Offset from BAR0 + * PPLB0 Range : 0x22000 - 0x223FF + * PPLB1 Range : 0x22400 - 0x227FF + */ +#define PLB0_BLK_REG_ADDR 0x00022000 +#define PLB1_BLK_REG_ADDR 0x00022400 + +/** + * PLB Registers + * Holds RL timer used time stamps in RLT tagged frames + */ +#define PLB0_ECM_TIMER_REG (PLB0_BLK_REG_ADDR + 0x05C) +#define PLB1_ECM_TIMER_REG (PLB1_BLK_REG_ADDR + 0x05C) + +/* Controls the rate-limiter on each of the priority class */ +#define PLB0_RL_CTL (PLB0_BLK_REG_ADDR + 0x060) +#define PLB1_RL_CTL (PLB1_BLK_REG_ADDR + 0x060) + +/** + * Max byte register, total 8, 0-7 + * see catapult_spec.pdf for details + */ +#define PLB0_RL_MAX_BC(_num) \ + (PLB0_BLK_REG_ADDR + 0x064 + ((_num) << 2)) +#define PLB1_RL_MAX_BC(_num) \ + (PLB1_BLK_REG_ADDR + 0x064 + ((_num) << 2)) + +/** + * RL Time Unit Register for priority 0-7 + * 4 bits per priority + * (2^rl_unit)*1us is the actual time period + */ +#define PLB0_RL_TU_PRIO (PLB0_BLK_REG_ADDR + 0x084) +#define PLB1_RL_TU_PRIO (PLB1_BLK_REG_ADDR + 0x084) + +/** + * RL byte count register, + * bytes transmitted in (rl_unit*1)us time period + * 1 per priority, 8 in all, 0-7. + */ +#define PLB0_RL_BYTE_CNT(_num) \ + (PLB0_BLK_REG_ADDR + 0x088 + ((_num) << 2)) +#define PLB1_RL_BYTE_CNT(_num) \ + (PLB1_BLK_REG_ADDR + 0x088 + ((_num) << 2)) + +/** + * RL Min factor register + * 2 bits per priority, + * 4 factors possible: 1, 0.5, 0.25, 0 + * 2'b00 - 0; 2'b01 - 0.25; 2'b10 - 0.5; 2'b11 - 1 + */ +#define PLB0_RL_MIN_REG (PLB0_BLK_REG_ADDR + 0x0A8) +#define PLB1_RL_MIN_REG (PLB1_BLK_REG_ADDR + 0x0A8) + +/** + * RL Max factor register + * 2 bits per priority, + * 4 factors possible: 1, 0.5, 0.25, 0 + * 2'b00 - 0; 2'b01 - 0.25; 2'b10 - 0.5; 2'b11 - 1 + */ +#define PLB0_RL_MAX_REG (PLB0_BLK_REG_ADDR + 0x0AC) +#define PLB1_RL_MAX_REG (PLB1_BLK_REG_ADDR + 0x0AC) + +/* MAC SERDES Address Paging register */ +#define PLB0_EMS_ADD_REG (PLB0_BLK_REG_ADDR + 0xD0) +#define PLB1_EMS_ADD_REG (PLB1_BLK_REG_ADDR + 0xD0) + +/* LL EMS Registers */ +#define LL_EMS0_BLK_REG_ADDR 0x00026800 +#define LL_EMS1_BLK_REG_ADDR 0x00026C00 + +/** + * BPC Block Register Address Offset from BAR0 + * BPC0 Range : 0x23000 - 0x233FF + * BPC1 Range : 0x23400 - 0x237FF + */ +#define BPC0_BLK_REG_ADDR 0x00023000 +#define BPC1_BLK_REG_ADDR 0x00023400 + +/** + * PMM Block Register Address Offset from BAR0 + * PMM0 Range : 0x23800 - 0x23BFF + * PMM1 Range : 0x23C00 - 0x23FFF + */ +#define PMM0_BLK_REG_ADDR 0x00023800 +#define PMM1_BLK_REG_ADDR 0x00023C00 + +/** + * HQM Block Register Address Offset from BAR0 + * HQM0 Range : 0x24000 - 0x243FF + * HQM1 Range : 0x24400 - 0x247FF + */ +#define HQM0_BLK_REG_ADDR 0x00024000 +#define HQM1_BLK_REG_ADDR 0x00024400 + +/** + * HQM Control Register + * Controls some aspects of IB + * See catapult_spec.pdf for details + */ +#define HQM0_CTL_REG (HQM0_BLK_REG_ADDR + 0x000) +#define HQM1_CTL_REG (HQM1_BLK_REG_ADDR + 0x000) + +/** + * HQM Stop Q Semaphore Registers. + * Only one Queue resource can be stopped at + * any given time. This register controls access + * to the single stop Q resource. + * See catapult_spec.pdf for details + */ +#define HQM0_RXQ_STOP_SEM (HQM0_BLK_REG_ADDR + 0x028) +#define HQM0_TXQ_STOP_SEM (HQM0_BLK_REG_ADDR + 0x02C) +#define HQM1_RXQ_STOP_SEM (HQM1_BLK_REG_ADDR + 0x028) +#define HQM1_TXQ_STOP_SEM (HQM1_BLK_REG_ADDR + 0x02C) + +/** + * LUT Block Register Address Offset from BAR0 + * LUT0 Range : 0x25800 - 0x25BFF + * LUT1 Range : 0x25C00 - 0x25FFF + */ +#define LUT0_BLK_REG_ADDR 0x00025800 +#define LUT1_BLK_REG_ADDR 0x00025C00 + +/** + * LUT Registers + * See catapult_spec.pdf for details + */ +#define LUT0_ERR_STS (LUT0_BLK_REG_ADDR + 0x000) +#define LUT1_ERR_STS (LUT1_BLK_REG_ADDR + 0x000) +#define LUT0_SET_ERR_STS (LUT0_BLK_REG_ADDR + 0x004) +#define LUT1_SET_ERR_STS (LUT1_BLK_REG_ADDR + 0x004) + +/** + * TRC (Debug/Trace) Register Offset from BAR0 + * Range : 0x26000 -- 0x263FFF + */ +#define TRC_BLK_REG_ADDR 0x00026000 + +/** + * TRC Registers + * See catapult_spec.pdf for details of each + */ +#define TRC_CTL_REG (TRC_BLK_REG_ADDR + 0x000) +#define TRC_MODS_REG (TRC_BLK_REG_ADDR + 0x004) +#define TRC_TRGC_REG (TRC_BLK_REG_ADDR + 0x008) +#define TRC_CNT1_REG (TRC_BLK_REG_ADDR + 0x010) +#define TRC_CNT2_REG (TRC_BLK_REG_ADDR + 0x014) +#define TRC_NXTS_REG (TRC_BLK_REG_ADDR + 0x018) +#define TRC_DIRR_REG (TRC_BLK_REG_ADDR + 0x01C) + +/** + * TRC Trigger match filters, total 10 + * Determines the trigger condition + */ +#define TRC_TRGM_REG(_num) \ + (TRC_BLK_REG_ADDR + 0x040 + ((_num) << 2)) + +/** + * TRC Next State filters, total 10 + * Determines the next state conditions + */ +#define TRC_NXTM_REG(_num) \ + (TRC_BLK_REG_ADDR + 0x080 + ((_num) << 2)) + +/** + * TRC Store Match filters, total 10 + * Determines the store conditions + */ +#define TRC_STRM_REG(_num) \ + (TRC_BLK_REG_ADDR + 0x0C0 + ((_num) << 2)) + +/* DOORBELLS ACCESS */ + +/** + * Catapult doorbells + * Each doorbell-queue set has + * 1 RxQ, 1 TxQ, 2 IBs in that order + * Size of each entry in 32 bytes, even though only 1 word + * is used. For Non-VM case each doorbell-q set is + * separated by 128 bytes, for VM case it is separated + * by 4K bytes + * Non VM case Range : 0x38000 - 0x39FFF + * VM case Range : 0x100000 - 0x11FFFF + * The range applies to both HQMs + */ +#define HQM_DOORBELL_BLK_BASE_ADDR 0x00038000 +#define HQM_DOORBELL_VM_BLK_BASE_ADDR 0x00100000 + +/* MEMORY ACCESS */ + +/** + * Catapult H/W Block Memory Access Address + * To the host a memory space of 32K (page) is visible + * at a time. The address range is from 0x08000 to 0x0FFFF + */ +#define HW_BLK_HOST_MEM_ADDR 0x08000 + +/** + * Catapult LUT Memory Access Page Numbers + * Range : LUT0 0xa0-0xa1 + * LUT1 0xa2-0xa3 + */ +#define LUT0_MEM_BLK_BASE_PG_NUM 0x000000A0 +#define LUT1_MEM_BLK_BASE_PG_NUM 0x000000A2 + +/** + * Catapult RxFn Database Memory Block Base Offset + * + * The Rx function database exists in LUT block. + * In PCIe space this is accessible as a 256x32 + * bit block. Each entry in this database is 4 + * (4 byte) words. Max. entries is 64. + * Address of an entry corresponding to a function + * = base_addr + (function_no. * 16) + */ +#define RX_FNDB_RAM_BASE_OFFSET 0x0000B400 + +/** + * Catapult TxFn Database Memory Block Base Offset Address + * + * The Tx function database exists in LUT block. + * In PCIe space this is accessible as a 64x32 + * bit block. Each entry in this database is 1 + * (4 byte) word. Max. entries is 64. + * Address of an entry corresponding to a function + * = base_addr + (function_no. * 4) + */ +#define TX_FNDB_RAM_BASE_OFFSET 0x0000B800 + +/** + * Catapult Unicast CAM Base Offset Address + * + * Exists in LUT memory space. + * Shared by both the LL & FCoE driver. + * Size is 256x48 bits; mapped to PCIe space + * 512x32 bit blocks. For each address, bits + * are written in the order : [47:32] and then + * [31:0]. + */ +#define UCAST_CAM_BASE_OFFSET 0x0000A800 + +/** + * Catapult Unicast RAM Base Offset Address + * + * Exists in LUT memory space. + * Shared by both the LL & FCoE driver. + * Size is 256x9 bits. + */ +#define UCAST_RAM_BASE_OFFSET 0x0000B000 + +/** + * Catapult Mulicast CAM Base Offset Address + * + * Exists in LUT memory space. + * Shared by both the LL & FCoE driver. + * Size is 256x48 bits; mapped to PCIe space + * 512x32 bit blocks. For each address, bits + * are written in the order : [47:32] and then + * [31:0]. + */ +#define MCAST_CAM_BASE_OFFSET 0x0000A000 + +/** + * Catapult VLAN RAM Base Offset Address + * + * Exists in LUT memory space. + * Size is 4096x66 bits; mapped to PCIe space as + * 8192x32 bit blocks. + * All the 4K entries are within the address range + * 0x0000 to 0x8000, so in the first LUT page. + */ +#define VLAN_RAM_BASE_OFFSET 0x00000000 + +/** + * Catapult Tx Stats RAM Base Offset Address + * + * Exists in LUT memory space. + * Size is 1024x33 bits; + * Each Tx function has 64 bytes of space + */ +#define TX_STATS_RAM_BASE_OFFSET 0x00009000 + +/** + * Catapult Rx Stats RAM Base Offset Address + * + * Exists in LUT memory space. + * Size is 1024x33 bits; + * Each Rx function has 64 bytes of space + */ +#define RX_STATS_RAM_BASE_OFFSET 0x00008000 + +/* Catapult RXA Memory Access Page Numbers */ +#define RXA0_MEM_BLK_BASE_PG_NUM 0x0000008C +#define RXA1_MEM_BLK_BASE_PG_NUM 0x0000008D + +/** + * Catapult Multicast Vector Table Base Offset Address + * + * Exists in RxA memory space. + * Organized as 512x65 bit block. + * However for each entry 16 bytes allocated (power of 2) + * Total size 512*16 bytes. + * There are two logical divisions, 256 entries each : + * a) Entries 0x00 to 0xff (256) -- Approx. MVT + * Offset 0x000 to 0xFFF + * b) Entries 0x100 to 0x1ff (256) -- Exact MVT + * Offsets 0x1000 to 0x1FFF + */ +#define MCAST_APPROX_MVT_BASE_OFFSET 0x00000000 +#define MCAST_EXACT_MVT_BASE_OFFSET 0x00001000 + +/** + * Catapult RxQ Translate Table (RIT) Base Offset Address + * + * Exists in RxA memory space + * Total no. of entries 64 + * Each entry is 1 (4 byte) word. + * 31:12 -- Reserved + * 11:0 -- Two 6 bit RxQ Ids + */ +#define FUNCTION_TO_RXQ_TRANSLATE 0x00002000 + +/* Catapult RxAdm (RAD) Memory Access Page Numbers */ +#define RAD0_MEM_BLK_BASE_PG_NUM 0x00000086 +#define RAD1_MEM_BLK_BASE_PG_NUM 0x00000087 + +/** + * Catapult RSS Table Base Offset Address + * + * Exists in RAD memory space. + * Each entry is 352 bits, but alligned on + * 64 byte (512 bit) boundary. Accessed + * 4 byte words, the whole entry can be + * broken into 11 word accesses. + */ +#define RSS_TABLE_BASE_OFFSET 0x00000800 + +/** + * Catapult CPQ Block Page Number + * This value is written to the page number registers + * to access the memory associated with the mailboxes. + */ +#define CPQ_BLK_PG_NUM 0x00000005 + +/** + * Clarification : + * LL functions are 2 & 3; can HostFn0/HostFn1 + * <-> LPU0/LPU1 memories be used ? + */ +/** + * Catapult HostFn0/HostFn1 to LPU0/LPU1 Mbox memory + * Per catapult_spec.pdf, the offset of the mbox + * memory is in the register space at an offset of 0x200 + */ +#define CPQ_BLK_REG_MBOX_ADDR (CPQ_BLK_REG_ADDR + 0x200) + +#define HOSTFN_LPU_MBOX (CPQ_BLK_REG_MBOX_ADDR + 0x000) + +/* Catapult LPU0/LPU1 to HostFn0/HostFn1 Mbox memory */ +#define LPU_HOSTFN_MBOX (CPQ_BLK_REG_MBOX_ADDR + 0x080) + +/** + * Catapult HQM Block Page Number + * This is written to the page number register for + * the appropriate function to access the memory + * associated with HQM + */ +#define HQM0_BLK_PG_NUM 0x00000096 +#define HQM1_BLK_PG_NUM 0x00000097 + +/** + * Note that TxQ and RxQ entries are interlaced + * the HQM memory, i.e RXQ0, TXQ0, RXQ1, TXQ1.. etc. + */ + +#define HQM_RXTX_Q_RAM_BASE_OFFSET 0x00004000 + +/** + * CQ Memory + * Exists in HQM Memory space + * Each entry is 16 (4 byte) words of which + * only 12 words are used for configuration + * Total 64 entries per HQM memory space + */ +#define HQM_CQ_RAM_BASE_OFFSET 0x00006000 + +/** + * Interrupt Block (IB) Memory + * Exists in HQM Memory space + * Each entry is 8 (4 byte) words of which + * only 5 words are used for configuration + * Total 128 entries per HQM memory space + */ +#define HQM_IB_RAM_BASE_OFFSET 0x00001000 + +/** + * Index Table (IT) Memory + * Exists in HQM Memory space + * Each entry is 1 (4 byte) word which + * is used for configuration + * Total 128 entries per HQM memory space + */ +#define HQM_INDX_TBL_RAM_BASE_OFFSET 0x00002000 + +/** + * PSS Block Memory Page Number + * This is written to the appropriate page number + * register to access the CPU memory. + * Also known as the PSS secondary memory (SMEM). + * Range : 0x180 to 0x1CF + * See catapult_spec.pdf for details + */ +#define PSS_BLK_PG_NUM 0x00000180 + +/** + * Offsets of different instances of PSS SMEM + * 2.5M of continuous 1T memory space : 2 blocks + * of 1M each (32 pages each, page=32KB) and 4 smaller + * blocks of 128K each (4 pages each, page=32KB) + * PSS_LMEM_INST0 is used for firmware download + */ +#define PSS_LMEM_INST0 0x00000000 +#define PSS_LMEM_INST1 0x00100000 +#define PSS_LMEM_INST2 0x00200000 +#define PSS_LMEM_INST3 0x00220000 +#define PSS_LMEM_INST4 0x00240000 +#define PSS_LMEM_INST5 0x00260000 + +#define BNA_PCI_REG_CT_ADDRSZ (0x40000) + +#define BNA_GET_PAGE_NUM(_base_page, _offset) \ + ((_base_page) + ((_offset) >> 15)) + +#define BNA_GET_PAGE_OFFSET(_offset) \ + ((_offset) & 0x7fff) + +#define BNA_GET_MEM_BASE_ADDR(_bar0, _base_offset) \ + ((_bar0) + HW_BLK_HOST_MEM_ADDR \ + + BNA_GET_PAGE_OFFSET((_base_offset))) + +#define BNA_GET_VLAN_MEM_ENTRY_ADDR(_bar0, _fn_id, _vlan_id)\ + (_bar0 + (HW_BLK_HOST_MEM_ADDR) \ + + (BNA_GET_PAGE_OFFSET(VLAN_RAM_BASE_OFFSET)) \ + + (((_fn_id) & 0x3f) << 9) \ + + (((_vlan_id) & 0xfe0) >> 3)) + +/** + * + * Interrupt related bits, flags and macros + * + */ + +#define __LPU02HOST_MBOX0_STATUS_BITS 0x00100000 +#define __LPU12HOST_MBOX0_STATUS_BITS 0x00200000 +#define __LPU02HOST_MBOX1_STATUS_BITS 0x00400000 +#define __LPU12HOST_MBOX1_STATUS_BITS 0x00800000 + +#define __LPU02HOST_MBOX0_MASK_BITS 0x00100000 +#define __LPU12HOST_MBOX0_MASK_BITS 0x00200000 +#define __LPU02HOST_MBOX1_MASK_BITS 0x00400000 +#define __LPU12HOST_MBOX1_MASK_BITS 0x00800000 + +#define __LPU2HOST_MBOX_MASK_BITS \ + (__LPU02HOST_MBOX0_MASK_BITS | __LPU02HOST_MBOX1_MASK_BITS | \ + __LPU12HOST_MBOX0_MASK_BITS | __LPU12HOST_MBOX1_MASK_BITS) + +#define __LPU2HOST_IB_STATUS_BITS 0x0000ffff + +#define BNA_IS_LPU0_MBOX_INTR(_intr_status) \ + ((_intr_status) & (__LPU02HOST_MBOX0_STATUS_BITS | \ + __LPU02HOST_MBOX1_STATUS_BITS)) + +#define BNA_IS_LPU1_MBOX_INTR(_intr_status) \ + ((_intr_status) & (__LPU12HOST_MBOX0_STATUS_BITS | \ + __LPU12HOST_MBOX1_STATUS_BITS)) + +#define BNA_IS_MBOX_INTR(_intr_status) \ + ((_intr_status) & \ + (__LPU02HOST_MBOX0_STATUS_BITS | \ + __LPU02HOST_MBOX1_STATUS_BITS | \ + __LPU12HOST_MBOX0_STATUS_BITS | \ + __LPU12HOST_MBOX1_STATUS_BITS)) + +#define __EMC_ERROR_STATUS_BITS 0x00010000 +#define __LPU0_ERROR_STATUS_BITS 0x00020000 +#define __LPU1_ERROR_STATUS_BITS 0x00040000 +#define __PSS_ERROR_STATUS_BITS 0x00080000 + +#define __HALT_STATUS_BITS 0x01000000 + +#define __EMC_ERROR_MASK_BITS 0x00010000 +#define __LPU0_ERROR_MASK_BITS 0x00020000 +#define __LPU1_ERROR_MASK_BITS 0x00040000 +#define __PSS_ERROR_MASK_BITS 0x00080000 + +#define __HALT_MASK_BITS 0x01000000 + +#define __ERROR_MASK_BITS \ + (__EMC_ERROR_MASK_BITS | __LPU0_ERROR_MASK_BITS | \ + __LPU1_ERROR_MASK_BITS | __PSS_ERROR_MASK_BITS | \ + __HALT_MASK_BITS) + +#define BNA_IS_ERR_INTR(_intr_status) \ + ((_intr_status) & \ + (__EMC_ERROR_STATUS_BITS | \ + __LPU0_ERROR_STATUS_BITS | \ + __LPU1_ERROR_STATUS_BITS | \ + __PSS_ERROR_STATUS_BITS | \ + __HALT_STATUS_BITS)) + +#define BNA_IS_MBOX_ERR_INTR(_intr_status) \ + (BNA_IS_MBOX_INTR((_intr_status)) | \ + BNA_IS_ERR_INTR((_intr_status))) + +#define BNA_IS_INTX_DATA_INTR(_intr_status) \ + ((_intr_status) & __LPU2HOST_IB_STATUS_BITS) + +#define BNA_INTR_STATUS_MBOX_CLR(_intr_status) \ +do { \ + (_intr_status) &= ~(__LPU02HOST_MBOX0_STATUS_BITS | \ + __LPU02HOST_MBOX1_STATUS_BITS | \ + __LPU12HOST_MBOX0_STATUS_BITS | \ + __LPU12HOST_MBOX1_STATUS_BITS); \ +} while (0) + +#define BNA_INTR_STATUS_ERR_CLR(_intr_status) \ +do { \ + (_intr_status) &= ~(__EMC_ERROR_STATUS_BITS | \ + __LPU0_ERROR_STATUS_BITS | \ + __LPU1_ERROR_STATUS_BITS | \ + __PSS_ERROR_STATUS_BITS | \ + __HALT_STATUS_BITS); \ +} while (0) + +#define bna_intx_disable(_bna, _cur_mask) \ +{ \ + (_cur_mask) = readl((_bna)->regs.fn_int_mask);\ + writel(0xffffffff, (_bna)->regs.fn_int_mask);\ +} + +#define bna_intx_enable(bna, new_mask) \ + writel((new_mask), (bna)->regs.fn_int_mask) + +#define bna_mbox_intr_disable(bna) \ + writel((readl((bna)->regs.fn_int_mask) | \ + (__LPU2HOST_MBOX_MASK_BITS | __ERROR_MASK_BITS)), \ + (bna)->regs.fn_int_mask) + +#define bna_mbox_intr_enable(bna) \ + writel((readl((bna)->regs.fn_int_mask) & \ + ~(__LPU2HOST_MBOX_MASK_BITS | __ERROR_MASK_BITS)), \ + (bna)->regs.fn_int_mask) + +#define bna_intr_status_get(_bna, _status) \ +{ \ + (_status) = readl((_bna)->regs.fn_int_status); \ + if ((_status)) { \ + writel((_status) & ~(__LPU02HOST_MBOX0_STATUS_BITS |\ + __LPU02HOST_MBOX1_STATUS_BITS |\ + __LPU12HOST_MBOX0_STATUS_BITS |\ + __LPU12HOST_MBOX1_STATUS_BITS), \ + (_bna)->regs.fn_int_status);\ + } \ +} + +#define bna_intr_status_get_no_clr(_bna, _status) \ + (_status) = readl((_bna)->regs.fn_int_status) + +#define bna_intr_mask_get(bna, mask) \ + (*mask) = readl((bna)->regs.fn_int_mask) + +#define bna_intr_ack(bna, intr_bmap) \ + writel((intr_bmap), (bna)->regs.fn_int_status) + +#define bna_ib_intx_disable(bna, ib_id) \ + writel(readl((bna)->regs.fn_int_mask) | \ + (1 << (ib_id)), \ + (bna)->regs.fn_int_mask) + +#define bna_ib_intx_enable(bna, ib_id) \ + writel(readl((bna)->regs.fn_int_mask) & \ + ~(1 << (ib_id)), \ + (bna)->regs.fn_int_mask) + +#define bna_mbox_msix_idx_set(_device) \ +do {\ + writel(((_device)->vector & 0x000001FF), \ + (_device)->bna->pcidev.pci_bar_kva + \ + reg_offset[(_device)->bna->pcidev.pci_func].msix_idx);\ +} while (0) + +/** + * + * TxQ, RxQ, CQ related bits, offsets, macros + * + */ + +#define BNA_Q_IDLE_STATE 0x00008001 + +#define BNA_GET_DOORBELL_BASE_ADDR(_bar0) \ + ((_bar0) + HQM_DOORBELL_BLK_BASE_ADDR) + +#define BNA_GET_DOORBELL_ENTRY_OFFSET(_entry) \ + ((HQM_DOORBELL_BLK_BASE_ADDR) \ + + (_entry << 7)) + +#define BNA_DOORBELL_IB_INT_ACK(_timeout, _events) \ + (0x80000000 | ((_timeout) << 16) | (_events)) + +#define BNA_DOORBELL_IB_INT_DISABLE (0x40000000) + +/* TxQ Entry Opcodes */ +#define BNA_TXQ_WI_SEND (0x402) /* Single Frame Transmission */ +#define BNA_TXQ_WI_SEND_LSO (0x403) /* Multi-Frame Transmission */ +#define BNA_TXQ_WI_EXTENSION (0x104) /* Extension WI */ + +/* TxQ Entry Control Flags */ +#define BNA_TXQ_WI_CF_FCOE_CRC (1 << 8) +#define BNA_TXQ_WI_CF_IPID_MODE (1 << 5) +#define BNA_TXQ_WI_CF_INS_PRIO (1 << 4) +#define BNA_TXQ_WI_CF_INS_VLAN (1 << 3) +#define BNA_TXQ_WI_CF_UDP_CKSUM (1 << 2) +#define BNA_TXQ_WI_CF_TCP_CKSUM (1 << 1) +#define BNA_TXQ_WI_CF_IP_CKSUM (1 << 0) + +#define BNA_TXQ_WI_L4_HDR_N_OFFSET(_hdr_size, _offset) \ + (((_hdr_size) << 10) | ((_offset) & 0x3FF)) + +/* + * Completion Q defines + */ +/* CQ Entry Flags */ +#define BNA_CQ_EF_MAC_ERROR (1 << 0) +#define BNA_CQ_EF_FCS_ERROR (1 << 1) +#define BNA_CQ_EF_TOO_LONG (1 << 2) +#define BNA_CQ_EF_FC_CRC_OK (1 << 3) + +#define BNA_CQ_EF_RSVD1 (1 << 4) +#define BNA_CQ_EF_L4_CKSUM_OK (1 << 5) +#define BNA_CQ_EF_L3_CKSUM_OK (1 << 6) +#define BNA_CQ_EF_HDS_HEADER (1 << 7) + +#define BNA_CQ_EF_UDP (1 << 8) +#define BNA_CQ_EF_TCP (1 << 9) +#define BNA_CQ_EF_IP_OPTIONS (1 << 10) +#define BNA_CQ_EF_IPV6 (1 << 11) + +#define BNA_CQ_EF_IPV4 (1 << 12) +#define BNA_CQ_EF_VLAN (1 << 13) +#define BNA_CQ_EF_RSS (1 << 14) +#define BNA_CQ_EF_RSVD2 (1 << 15) + +#define BNA_CQ_EF_MCAST_MATCH (1 << 16) +#define BNA_CQ_EF_MCAST (1 << 17) +#define BNA_CQ_EF_BCAST (1 << 18) +#define BNA_CQ_EF_REMOTE (1 << 19) + +#define BNA_CQ_EF_LOCAL (1 << 20) + +/** + * + * Data structures + * + */ + +enum txf_flags { + BFI_TXF_CF_ENABLE = 1 << 0, + BFI_TXF_CF_VLAN_FILTER = 1 << 8, + BFI_TXF_CF_VLAN_ADMIT = 1 << 9, + BFI_TXF_CF_VLAN_INSERT = 1 << 10, + BFI_TXF_CF_RSVD1 = 1 << 11, + BFI_TXF_CF_MAC_SA_CHECK = 1 << 12, + BFI_TXF_CF_VLAN_WI_BASED = 1 << 13, + BFI_TXF_CF_VSWITCH_MCAST = 1 << 14, + BFI_TXF_CF_VSWITCH_UCAST = 1 << 15, + BFI_TXF_CF_RSVD2 = 0x7F << 1 +}; + +enum ib_flags { + BFI_IB_CF_MASTER_ENABLE = (1 << 0), + BFI_IB_CF_MSIX_MODE = (1 << 1), + BFI_IB_CF_COALESCING_MODE = (1 << 2), + BFI_IB_CF_INTER_PKT_ENABLE = (1 << 3), + BFI_IB_CF_INT_ENABLE = (1 << 4), + BFI_IB_CF_INTER_PKT_DMA = (1 << 5), + BFI_IB_CF_ACK_PENDING = (1 << 6), + BFI_IB_CF_RESERVED1 = (1 << 7) +}; + +enum rss_hash_type { + BFI_RSS_T_V4_TCP = (1 << 11), + BFI_RSS_T_V4_IP = (1 << 10), + BFI_RSS_T_V6_TCP = (1 << 9), + BFI_RSS_T_V6_IP = (1 << 8) +}; +enum hds_header_type { + BNA_HDS_T_V4_TCP = (1 << 11), + BNA_HDS_T_V4_UDP = (1 << 10), + BNA_HDS_T_V6_TCP = (1 << 9), + BNA_HDS_T_V6_UDP = (1 << 8), + BNA_HDS_FORCED = (1 << 7), +}; +enum rxf_flags { + BNA_RXF_CF_SM_LG_RXQ = (1 << 15), + BNA_RXF_CF_DEFAULT_VLAN = (1 << 14), + BNA_RXF_CF_DEFAULT_FUNCTION_ENABLE = (1 << 13), + BNA_RXF_CF_VLAN_STRIP = (1 << 12), + BNA_RXF_CF_RSS_ENABLE = (1 << 8) +}; +struct bna_chip_regs_offset { + u32 page_addr; + u32 fn_int_status; + u32 fn_int_mask; + u32 msix_idx; +}; +extern const struct bna_chip_regs_offset reg_offset[]; + +struct bna_chip_regs { + void __iomem *page_addr; + void __iomem *fn_int_status; + void __iomem *fn_int_mask; +}; + +struct bna_txq_mem { + u32 pg_tbl_addr_lo; + u32 pg_tbl_addr_hi; + u32 cur_q_entry_lo; + u32 cur_q_entry_hi; + u32 reserved1; + u32 reserved2; + u32 pg_cnt_n_prd_ptr; /* 31:16->total page count */ + /* 15:0 ->producer pointer (index?) */ + u32 entry_n_pg_size; /* 31:16->entry size */ + /* 15:0 ->page size */ + u32 int_blk_n_cns_ptr; /* 31:24->Int Blk Id; */ + /* 23:16->Int Blk Offset */ + /* 15:0 ->consumer pointer(index?) */ + u32 cns_ptr2_n_q_state; /* 31:16->cons. ptr 2; 15:0-> Q state */ + u32 nxt_qid_n_fid_n_pri; /* 17:10->next */ + /* QId;9:3->FID;2:0->Priority */ + u32 wvc_n_cquota_n_rquota; /* 31:24->WI Vector Count; */ + /* 23:12->Cfg Quota; */ + /* 11:0 ->Run Quota */ + u32 reserved3[4]; +}; + +struct bna_rxq_mem { + u32 pg_tbl_addr_lo; + u32 pg_tbl_addr_hi; + u32 cur_q_entry_lo; + u32 cur_q_entry_hi; + u32 reserved1; + u32 reserved2; + u32 pg_cnt_n_prd_ptr; /* 31:16->total page count */ + /* 15:0 ->producer pointer (index?) */ + u32 entry_n_pg_size; /* 31:16->entry size */ + /* 15:0 ->page size */ + u32 sg_n_cq_n_cns_ptr; /* 31:28->reserved; 27:24->sg count */ + /* 23:16->CQ; */ + /* 15:0->consumer pointer(index?) */ + u32 buf_sz_n_q_state; /* 31:16->buffer size; 15:0-> Q state */ + u32 next_qid; /* 17:10->next QId */ + u32 reserved3; + u32 reserved4[4]; +}; + +struct bna_rxtx_q_mem { + struct bna_rxq_mem rxq; + struct bna_txq_mem txq; +}; + +struct bna_cq_mem { + u32 pg_tbl_addr_lo; + u32 pg_tbl_addr_hi; + u32 cur_q_entry_lo; + u32 cur_q_entry_hi; + + u32 reserved1; + u32 reserved2; + u32 pg_cnt_n_prd_ptr; /* 31:16->total page count */ + /* 15:0 ->producer pointer (index?) */ + u32 entry_n_pg_size; /* 31:16->entry size */ + /* 15:0 ->page size */ + u32 int_blk_n_cns_ptr; /* 31:24->Int Blk Id; */ + /* 23:16->Int Blk Offset */ + /* 15:0 ->consumer pointer(index?) */ + u32 q_state; /* 31:16->reserved; 15:0-> Q state */ + u32 reserved3[2]; + u32 reserved4[4]; +}; + +struct bna_ib_blk_mem { + u32 host_addr_lo; + u32 host_addr_hi; + u32 clsc_n_ctrl_n_msix; /* 31:24->coalescing; */ + /* 23:16->coalescing cfg; */ + /* 15:8 ->control; */ + /* 7:0 ->msix; */ + u32 ipkt_n_ent_n_idxof; + u32 ipkt_cnt_cfg_n_unacked; + + u32 reserved[3]; +}; + +struct bna_idx_tbl_mem { + u32 idx; /* !< 31:16->res;15:0->idx; */ +}; + +struct bna_doorbell_qset { + u32 rxq[0x20 >> 2]; + u32 txq[0x20 >> 2]; + u32 ib0[0x20 >> 2]; + u32 ib1[0x20 >> 2]; +}; + +struct bna_rx_fndb_ram { + u32 rss_prop; + u32 size_routing_props; + u32 rit_hds_mcastq; + u32 control_flags; +}; + +struct bna_tx_fndb_ram { + u32 vlan_n_ctrl_flags; +}; + +/** + * @brief + * Structure which maps to RxFn Indirection Table (RIT) + * Size : 1 word + * See catapult_spec.pdf, RxA for details + */ +struct bna_rit_mem { + u32 rxq_ids; /* !< 31:12->res;11:0->two 6 bit RxQ Ids */ +}; + +/** + * @brief + * Structure which maps to RSS Table entry + * Size : 16 words + * See catapult_spec.pdf, RAD for details + */ +struct bna_rss_mem { + /* + * 31:12-> res + * 11:8 -> protocol type + * 7:0 -> hash index + */ + u32 type_n_hash; + u32 hash_key[10]; /* !< 40 byte Toeplitz hash key */ + u32 reserved[5]; +}; + +/* TxQ Vector (a.k.a. Tx-Buffer Descriptor) */ +struct bna_dma_addr { + u32 msb; + u32 lsb; +}; + +struct bna_txq_wi_vector { + u16 reserved; + u16 length; /* Only 14 LSB are valid */ + struct bna_dma_addr host_addr; /* Tx-Buf DMA addr */ +}; + +typedef u16 bna_txq_wi_opcode_t; + +typedef u16 bna_txq_wi_ctrl_flag_t; + +/** + * TxQ Entry Structure + * + * BEWARE: Load values into this structure with correct endianess. + */ +struct bna_txq_entry { + union { + struct { + u8 reserved; + u8 num_vectors; /* number of vectors present */ + bna_txq_wi_opcode_t opcode; /* Either */ + /* BNA_TXQ_WI_SEND or */ + /* BNA_TXQ_WI_SEND_LSO */ + bna_txq_wi_ctrl_flag_t flags; /* OR of all the flags */ + u16 l4_hdr_size_n_offset; + u16 vlan_tag; + u16 lso_mss; /* Only 14 LSB are valid */ + u32 frame_length; /* Only 24 LSB are valid */ + } wi; + + struct { + u16 reserved; + bna_txq_wi_opcode_t opcode; /* Must be */ + /* BNA_TXQ_WI_EXTENSION */ + u32 reserved2[3]; /* Place holder for */ + /* removed vector (12 bytes) */ + } wi_ext; + } hdr; + struct bna_txq_wi_vector vector[4]; +}; +#define wi_hdr hdr.wi +#define wi_ext_hdr hdr.wi_ext + +/* RxQ Entry Structure */ +struct bna_rxq_entry { /* Rx-Buffer */ + struct bna_dma_addr host_addr; /* Rx-Buffer DMA address */ +}; + +typedef u32 bna_cq_e_flag_t; + +/* CQ Entry Structure */ +struct bna_cq_entry { + bna_cq_e_flag_t flags; + u16 vlan_tag; + u16 length; + u32 rss_hash; + u8 valid; + u8 reserved1; + u8 reserved2; + u8 rxq_id; +}; + +#endif /* __BNA_HW_H__ */ diff --git a/drivers/net/bna/bna_txrx.c b/drivers/net/bna/bna_txrx.c new file mode 100644 index 000000000000..890846d55502 --- /dev/null +++ b/drivers/net/bna/bna_txrx.c @@ -0,0 +1,4209 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ +#include "bna.h" +#include "bfa_sm.h" +#include "bfi.h" + +/** + * IB + */ +#define bna_ib_find_free_ibidx(_mask, _pos)\ +do {\ + (_pos) = 0;\ + while (((_pos) < (BFI_IBIDX_MAX_SEGSIZE)) &&\ + ((1 << (_pos)) & (_mask)))\ + (_pos)++;\ +} while (0) + +#define bna_ib_count_ibidx(_mask, _count)\ +do {\ + int pos = 0;\ + (_count) = 0;\ + while (pos < (BFI_IBIDX_MAX_SEGSIZE)) {\ + if ((1 << pos) & (_mask))\ + (_count) = pos + 1;\ + pos++;\ + } \ +} while (0) + +#define bna_ib_select_segpool(_count, _q_idx)\ +do {\ + int i;\ + (_q_idx) = -1;\ + for (i = 0; i < BFI_IBIDX_TOTAL_POOLS; i++) {\ + if ((_count <= ibidx_pool[i].pool_entry_size)) {\ + (_q_idx) = i;\ + break;\ + } \ + } \ +} while (0) + +struct bna_ibidx_pool { + int pool_size; + int pool_entry_size; +}; +init_ibidx_pool(ibidx_pool); + +static struct bna_intr * +bna_intr_get(struct bna_ib_mod *ib_mod, enum bna_intr_type intr_type, + int vector) +{ + struct bna_intr *intr; + struct list_head *qe; + + list_for_each(qe, &ib_mod->intr_active_q) { + intr = (struct bna_intr *)qe; + + if ((intr->intr_type == intr_type) && + (intr->vector == vector)) { + intr->ref_count++; + return intr; + } + } + + if (list_empty(&ib_mod->intr_free_q)) + return NULL; + + bfa_q_deq(&ib_mod->intr_free_q, &intr); + bfa_q_qe_init(&intr->qe); + + intr->ref_count = 1; + intr->intr_type = intr_type; + intr->vector = vector; + + list_add_tail(&intr->qe, &ib_mod->intr_active_q); + + return intr; +} + +static void +bna_intr_put(struct bna_ib_mod *ib_mod, + struct bna_intr *intr) +{ + intr->ref_count--; + + if (intr->ref_count == 0) { + intr->ib = NULL; + list_del(&intr->qe); + bfa_q_qe_init(&intr->qe); + list_add_tail(&intr->qe, &ib_mod->intr_free_q); + } +} + +void +bna_ib_mod_init(struct bna_ib_mod *ib_mod, struct bna *bna, + struct bna_res_info *res_info) +{ + int i; + int j; + int count; + u8 offset; + struct bna_doorbell_qset *qset; + unsigned long off; + + ib_mod->bna = bna; + + ib_mod->ib = (struct bna_ib *) + res_info[BNA_RES_MEM_T_IB_ARRAY].res_u.mem_info.mdl[0].kva; + ib_mod->intr = (struct bna_intr *) + res_info[BNA_RES_MEM_T_INTR_ARRAY].res_u.mem_info.mdl[0].kva; + ib_mod->idx_seg = (struct bna_ibidx_seg *) + res_info[BNA_RES_MEM_T_IDXSEG_ARRAY].res_u.mem_info.mdl[0].kva; + + INIT_LIST_HEAD(&ib_mod->ib_free_q); + INIT_LIST_HEAD(&ib_mod->intr_free_q); + INIT_LIST_HEAD(&ib_mod->intr_active_q); + + for (i = 0; i < BFI_IBIDX_TOTAL_POOLS; i++) + INIT_LIST_HEAD(&ib_mod->ibidx_seg_pool[i]); + + for (i = 0; i < BFI_MAX_IB; i++) { + ib_mod->ib[i].ib_id = i; + + ib_mod->ib[i].ib_seg_host_addr_kva = + res_info[BNA_RES_MEM_T_IBIDX].res_u.mem_info.mdl[i].kva; + ib_mod->ib[i].ib_seg_host_addr.lsb = + res_info[BNA_RES_MEM_T_IBIDX].res_u.mem_info.mdl[i].dma.lsb; + ib_mod->ib[i].ib_seg_host_addr.msb = + res_info[BNA_RES_MEM_T_IBIDX].res_u.mem_info.mdl[i].dma.msb; + + qset = (struct bna_doorbell_qset *)0; + off = (unsigned long)(&qset[i >> 1].ib0[(i & 0x1) + * (0x20 >> 2)]); + ib_mod->ib[i].door_bell.doorbell_addr = off + + BNA_GET_DOORBELL_BASE_ADDR(bna->pcidev.pci_bar_kva); + + bfa_q_qe_init(&ib_mod->ib[i].qe); + list_add_tail(&ib_mod->ib[i].qe, &ib_mod->ib_free_q); + + bfa_q_qe_init(&ib_mod->intr[i].qe); + list_add_tail(&ib_mod->intr[i].qe, &ib_mod->intr_free_q); + } + + count = 0; + offset = 0; + for (i = 0; i < BFI_IBIDX_TOTAL_POOLS; i++) { + for (j = 0; j < ibidx_pool[i].pool_size; j++) { + bfa_q_qe_init(&ib_mod->idx_seg[count]); + ib_mod->idx_seg[count].ib_seg_size = + ibidx_pool[i].pool_entry_size; + ib_mod->idx_seg[count].ib_idx_tbl_offset = offset; + list_add_tail(&ib_mod->idx_seg[count].qe, + &ib_mod->ibidx_seg_pool[i]); + count++; + offset += ibidx_pool[i].pool_entry_size; + } + } +} + +void +bna_ib_mod_uninit(struct bna_ib_mod *ib_mod) +{ + int i; + int j; + struct list_head *qe; + + i = 0; + list_for_each(qe, &ib_mod->ib_free_q) + i++; + + i = 0; + list_for_each(qe, &ib_mod->intr_free_q) + i++; + + for (i = 0; i < BFI_IBIDX_TOTAL_POOLS; i++) { + j = 0; + list_for_each(qe, &ib_mod->ibidx_seg_pool[i]) + j++; + } + + ib_mod->bna = NULL; +} + +struct bna_ib * +bna_ib_get(struct bna_ib_mod *ib_mod, + enum bna_intr_type intr_type, + int vector) +{ + struct bna_ib *ib; + struct bna_intr *intr; + + if (intr_type == BNA_INTR_T_INTX) + vector = (1 << vector); + + intr = bna_intr_get(ib_mod, intr_type, vector); + if (intr == NULL) + return NULL; + + if (intr->ib) { + if (intr->ib->ref_count == BFI_IBIDX_MAX_SEGSIZE) { + bna_intr_put(ib_mod, intr); + return NULL; + } + intr->ib->ref_count++; + return intr->ib; + } + + if (list_empty(&ib_mod->ib_free_q)) { + bna_intr_put(ib_mod, intr); + return NULL; + } + + bfa_q_deq(&ib_mod->ib_free_q, &ib); + bfa_q_qe_init(&ib->qe); + + ib->ref_count = 1; + ib->start_count = 0; + ib->idx_mask = 0; + + ib->intr = intr; + ib->idx_seg = NULL; + intr->ib = ib; + + ib->bna = ib_mod->bna; + + return ib; +} + +void +bna_ib_put(struct bna_ib_mod *ib_mod, struct bna_ib *ib) +{ + bna_intr_put(ib_mod, ib->intr); + + ib->ref_count--; + + if (ib->ref_count == 0) { + ib->intr = NULL; + ib->bna = NULL; + list_add_tail(&ib->qe, &ib_mod->ib_free_q); + } +} + +/* Returns index offset - starting from 0 */ +int +bna_ib_reserve_idx(struct bna_ib *ib) +{ + struct bna_ib_mod *ib_mod = &ib->bna->ib_mod; + struct bna_ibidx_seg *idx_seg; + int idx; + int num_idx; + int q_idx; + + /* Find the first free index position */ + bna_ib_find_free_ibidx(ib->idx_mask, idx); + if (idx == BFI_IBIDX_MAX_SEGSIZE) + return -1; + + /* + * Calculate the total number of indexes held by this IB, + * including the index newly reserved above. + */ + bna_ib_count_ibidx((ib->idx_mask | (1 << idx)), num_idx); + + /* See if there is a free space in the index segment held by this IB */ + if (ib->idx_seg && (num_idx <= ib->idx_seg->ib_seg_size)) { + ib->idx_mask |= (1 << idx); + return idx; + } + + if (ib->start_count) + return -1; + + /* Allocate a new segment */ + bna_ib_select_segpool(num_idx, q_idx); + while (1) { + if (q_idx == BFI_IBIDX_TOTAL_POOLS) + return -1; + if (!list_empty(&ib_mod->ibidx_seg_pool[q_idx])) + break; + q_idx++; + } + bfa_q_deq(&ib_mod->ibidx_seg_pool[q_idx], &idx_seg); + bfa_q_qe_init(&idx_seg->qe); + + /* Free the old segment */ + if (ib->idx_seg) { + bna_ib_select_segpool(ib->idx_seg->ib_seg_size, q_idx); + list_add_tail(&ib->idx_seg->qe, &ib_mod->ibidx_seg_pool[q_idx]); + } + + ib->idx_seg = idx_seg; + + ib->idx_mask |= (1 << idx); + + return idx; +} + +void +bna_ib_release_idx(struct bna_ib *ib, int idx) +{ + struct bna_ib_mod *ib_mod = &ib->bna->ib_mod; + struct bna_ibidx_seg *idx_seg; + int num_idx; + int cur_q_idx; + int new_q_idx; + + ib->idx_mask &= ~(1 << idx); + + if (ib->start_count) + return; + + bna_ib_count_ibidx(ib->idx_mask, num_idx); + + /* + * Free the segment, if there are no more indexes in the segment + * held by this IB + */ + if (!num_idx) { + bna_ib_select_segpool(ib->idx_seg->ib_seg_size, cur_q_idx); + list_add_tail(&ib->idx_seg->qe, + &ib_mod->ibidx_seg_pool[cur_q_idx]); + ib->idx_seg = NULL; + return; + } + + /* See if we can move to a smaller segment */ + bna_ib_select_segpool(num_idx, new_q_idx); + bna_ib_select_segpool(ib->idx_seg->ib_seg_size, cur_q_idx); + while (new_q_idx < cur_q_idx) { + if (!list_empty(&ib_mod->ibidx_seg_pool[new_q_idx])) + break; + new_q_idx++; + } + if (new_q_idx < cur_q_idx) { + /* Select the new smaller segment */ + bfa_q_deq(&ib_mod->ibidx_seg_pool[new_q_idx], &idx_seg); + bfa_q_qe_init(&idx_seg->qe); + /* Free the old segment */ + list_add_tail(&ib->idx_seg->qe, + &ib_mod->ibidx_seg_pool[cur_q_idx]); + ib->idx_seg = idx_seg; + } +} + +int +bna_ib_config(struct bna_ib *ib, struct bna_ib_config *ib_config) +{ + if (ib->start_count) + return -1; + + ib->ib_config.coalescing_timeo = ib_config->coalescing_timeo; + ib->ib_config.interpkt_timeo = ib_config->interpkt_timeo; + ib->ib_config.interpkt_count = ib_config->interpkt_count; + ib->ib_config.ctrl_flags = ib_config->ctrl_flags; + + ib->ib_config.ctrl_flags |= BFI_IB_CF_MASTER_ENABLE; + if (ib->intr->intr_type == BNA_INTR_T_MSIX) + ib->ib_config.ctrl_flags |= BFI_IB_CF_MSIX_MODE; + + return 0; +} + +void +bna_ib_start(struct bna_ib *ib) +{ + struct bna_ib_blk_mem ib_cfg; + struct bna_ib_blk_mem *ib_mem; + u32 pg_num; + u32 intx_mask; + int i; + void __iomem *base_addr; + unsigned long off; + + ib->start_count++; + + if (ib->start_count > 1) + return; + + ib_cfg.host_addr_lo = (u32)(ib->ib_seg_host_addr.lsb); + ib_cfg.host_addr_hi = (u32)(ib->ib_seg_host_addr.msb); + + ib_cfg.clsc_n_ctrl_n_msix = (((u32) + ib->ib_config.coalescing_timeo << 16) | + ((u32)ib->ib_config.ctrl_flags << 8) | + (ib->intr->vector)); + ib_cfg.ipkt_n_ent_n_idxof = + ((u32) + (ib->ib_config.interpkt_timeo & 0xf) << 16) | + ((u32)ib->idx_seg->ib_seg_size << 8) | + (ib->idx_seg->ib_idx_tbl_offset); + ib_cfg.ipkt_cnt_cfg_n_unacked = ((u32) + ib->ib_config.interpkt_count << 24); + + pg_num = BNA_GET_PAGE_NUM(HQM0_BLK_PG_NUM + ib->bna->port_num, + HQM_IB_RAM_BASE_OFFSET); + writel(pg_num, ib->bna->regs.page_addr); + + base_addr = BNA_GET_MEM_BASE_ADDR(ib->bna->pcidev.pci_bar_kva, + HQM_IB_RAM_BASE_OFFSET); + + ib_mem = (struct bna_ib_blk_mem *)0; + off = (unsigned long)&ib_mem[ib->ib_id].host_addr_lo; + writel(htonl(ib_cfg.host_addr_lo), base_addr + off); + + off = (unsigned long)&ib_mem[ib->ib_id].host_addr_hi; + writel(htonl(ib_cfg.host_addr_hi), base_addr + off); + + off = (unsigned long)&ib_mem[ib->ib_id].clsc_n_ctrl_n_msix; + writel(ib_cfg.clsc_n_ctrl_n_msix, base_addr + off); + + off = (unsigned long)&ib_mem[ib->ib_id].ipkt_n_ent_n_idxof; + writel(ib_cfg.ipkt_n_ent_n_idxof, base_addr + off); + + off = (unsigned long)&ib_mem[ib->ib_id].ipkt_cnt_cfg_n_unacked; + writel(ib_cfg.ipkt_cnt_cfg_n_unacked, base_addr + off); + + ib->door_bell.doorbell_ack = BNA_DOORBELL_IB_INT_ACK( + (u32)ib->ib_config.coalescing_timeo, 0); + + pg_num = BNA_GET_PAGE_NUM(HQM0_BLK_PG_NUM + ib->bna->port_num, + HQM_INDX_TBL_RAM_BASE_OFFSET); + writel(pg_num, ib->bna->regs.page_addr); + + base_addr = BNA_GET_MEM_BASE_ADDR(ib->bna->pcidev.pci_bar_kva, + HQM_INDX_TBL_RAM_BASE_OFFSET); + for (i = 0; i < ib->idx_seg->ib_seg_size; i++) { + off = (unsigned long) + ((ib->idx_seg->ib_idx_tbl_offset + i) * BFI_IBIDX_SIZE); + writel(0, base_addr + off); + } + + if (ib->intr->intr_type == BNA_INTR_T_INTX) { + bna_intx_disable(ib->bna, intx_mask); + intx_mask &= ~(ib->intr->vector); + bna_intx_enable(ib->bna, intx_mask); + } +} + +void +bna_ib_stop(struct bna_ib *ib) +{ + u32 intx_mask; + + ib->start_count--; + + if (ib->start_count == 0) { + writel(BNA_DOORBELL_IB_INT_DISABLE, + ib->door_bell.doorbell_addr); + if (ib->intr->intr_type == BNA_INTR_T_INTX) { + bna_intx_disable(ib->bna, intx_mask); + intx_mask |= (ib->intr->vector); + bna_intx_enable(ib->bna, intx_mask); + } + } +} + +void +bna_ib_fail(struct bna_ib *ib) +{ + ib->start_count = 0; +} + +/** + * RXF + */ +static void rxf_enable(struct bna_rxf *rxf); +static void rxf_disable(struct bna_rxf *rxf); +static void __rxf_config_set(struct bna_rxf *rxf); +static void __rxf_rit_set(struct bna_rxf *rxf); +static void __bna_rxf_stat_clr(struct bna_rxf *rxf); +static int rxf_process_packet_filter(struct bna_rxf *rxf); +static int rxf_clear_packet_filter(struct bna_rxf *rxf); +static void rxf_reset_packet_filter(struct bna_rxf *rxf); +static void rxf_cb_enabled(void *arg, int status); +static void rxf_cb_disabled(void *arg, int status); +static void bna_rxf_cb_stats_cleared(void *arg, int status); +static void __rxf_enable(struct bna_rxf *rxf); +static void __rxf_disable(struct bna_rxf *rxf); + +bfa_fsm_state_decl(bna_rxf, stopped, struct bna_rxf, + enum bna_rxf_event); +bfa_fsm_state_decl(bna_rxf, start_wait, struct bna_rxf, + enum bna_rxf_event); +bfa_fsm_state_decl(bna_rxf, cam_fltr_mod_wait, struct bna_rxf, + enum bna_rxf_event); +bfa_fsm_state_decl(bna_rxf, started, struct bna_rxf, + enum bna_rxf_event); +bfa_fsm_state_decl(bna_rxf, cam_fltr_clr_wait, struct bna_rxf, + enum bna_rxf_event); +bfa_fsm_state_decl(bna_rxf, stop_wait, struct bna_rxf, + enum bna_rxf_event); +bfa_fsm_state_decl(bna_rxf, pause_wait, struct bna_rxf, + enum bna_rxf_event); +bfa_fsm_state_decl(bna_rxf, resume_wait, struct bna_rxf, + enum bna_rxf_event); +bfa_fsm_state_decl(bna_rxf, stat_clr_wait, struct bna_rxf, + enum bna_rxf_event); + +static struct bfa_sm_table rxf_sm_table[] = { + {BFA_SM(bna_rxf_sm_stopped), BNA_RXF_STOPPED}, + {BFA_SM(bna_rxf_sm_start_wait), BNA_RXF_START_WAIT}, + {BFA_SM(bna_rxf_sm_cam_fltr_mod_wait), BNA_RXF_CAM_FLTR_MOD_WAIT}, + {BFA_SM(bna_rxf_sm_started), BNA_RXF_STARTED}, + {BFA_SM(bna_rxf_sm_cam_fltr_clr_wait), BNA_RXF_CAM_FLTR_CLR_WAIT}, + {BFA_SM(bna_rxf_sm_stop_wait), BNA_RXF_STOP_WAIT}, + {BFA_SM(bna_rxf_sm_pause_wait), BNA_RXF_PAUSE_WAIT}, + {BFA_SM(bna_rxf_sm_resume_wait), BNA_RXF_RESUME_WAIT}, + {BFA_SM(bna_rxf_sm_stat_clr_wait), BNA_RXF_STAT_CLR_WAIT} +}; + +static void +bna_rxf_sm_stopped_entry(struct bna_rxf *rxf) +{ + call_rxf_stop_cbfn(rxf, BNA_CB_SUCCESS); +} + +static void +bna_rxf_sm_stopped(struct bna_rxf *rxf, enum bna_rxf_event event) +{ + switch (event) { + case RXF_E_START: + bfa_fsm_set_state(rxf, bna_rxf_sm_start_wait); + break; + + case RXF_E_STOP: + bfa_fsm_set_state(rxf, bna_rxf_sm_stopped); + break; + + case RXF_E_FAIL: + /* No-op */ + break; + + case RXF_E_CAM_FLTR_MOD: + call_rxf_cam_fltr_cbfn(rxf, BNA_CB_SUCCESS); + break; + + case RXF_E_STARTED: + case RXF_E_STOPPED: + case RXF_E_CAM_FLTR_RESP: + /** + * These events are received due to flushing of mbox + * when device fails + */ + /* No-op */ + break; + + case RXF_E_PAUSE: + rxf->rxf_oper_state = BNA_RXF_OPER_STATE_PAUSED; + call_rxf_pause_cbfn(rxf, BNA_CB_SUCCESS); + break; + + case RXF_E_RESUME: + rxf->rxf_oper_state = BNA_RXF_OPER_STATE_RUNNING; + call_rxf_resume_cbfn(rxf, BNA_CB_SUCCESS); + break; + + default: + bfa_sm_fault(rxf->rx->bna, event); + } +} + +static void +bna_rxf_sm_start_wait_entry(struct bna_rxf *rxf) +{ + __rxf_config_set(rxf); + __rxf_rit_set(rxf); + rxf_enable(rxf); +} + +static void +bna_rxf_sm_start_wait(struct bna_rxf *rxf, enum bna_rxf_event event) +{ + switch (event) { + case RXF_E_STOP: + /** + * STOP is originated from bnad. When this happens, + * it can not be waiting for filter update + */ + call_rxf_start_cbfn(rxf, BNA_CB_INTERRUPT); + bfa_fsm_set_state(rxf, bna_rxf_sm_stop_wait); + break; + + case RXF_E_FAIL: + call_rxf_cam_fltr_cbfn(rxf, BNA_CB_SUCCESS); + call_rxf_start_cbfn(rxf, BNA_CB_FAIL); + bfa_fsm_set_state(rxf, bna_rxf_sm_stopped); + break; + + case RXF_E_CAM_FLTR_MOD: + /* No-op */ + break; + + case RXF_E_STARTED: + /** + * Force rxf_process_filter() to go through initial + * config + */ + if ((rxf->ucast_active_mac != NULL) && + (rxf->ucast_pending_set == 0)) + rxf->ucast_pending_set = 1; + + if (rxf->rss_status == BNA_STATUS_T_ENABLED) + rxf->rxf_flags |= BNA_RXF_FL_RSS_CONFIG_PENDING; + + rxf->rxf_flags |= BNA_RXF_FL_VLAN_CONFIG_PENDING; + + bfa_fsm_set_state(rxf, bna_rxf_sm_cam_fltr_mod_wait); + break; + + case RXF_E_PAUSE: + case RXF_E_RESUME: + rxf->rxf_flags |= BNA_RXF_FL_OPERSTATE_CHANGED; + break; + + default: + bfa_sm_fault(rxf->rx->bna, event); + } +} + +static void +bna_rxf_sm_cam_fltr_mod_wait_entry(struct bna_rxf *rxf) +{ + if (!rxf_process_packet_filter(rxf)) { + /* No more pending CAM entries to update */ + bfa_fsm_set_state(rxf, bna_rxf_sm_started); + } +} + +static void +bna_rxf_sm_cam_fltr_mod_wait(struct bna_rxf *rxf, enum bna_rxf_event event) +{ + switch (event) { + case RXF_E_STOP: + /** + * STOP is originated from bnad. When this happens, + * it can not be waiting for filter update + */ + call_rxf_start_cbfn(rxf, BNA_CB_INTERRUPT); + bfa_fsm_set_state(rxf, bna_rxf_sm_cam_fltr_clr_wait); + break; + + case RXF_E_FAIL: + rxf_reset_packet_filter(rxf); + call_rxf_cam_fltr_cbfn(rxf, BNA_CB_SUCCESS); + call_rxf_start_cbfn(rxf, BNA_CB_FAIL); + bfa_fsm_set_state(rxf, bna_rxf_sm_stopped); + break; + + case RXF_E_CAM_FLTR_MOD: + /* No-op */ + break; + + case RXF_E_CAM_FLTR_RESP: + if (!rxf_process_packet_filter(rxf)) { + /* No more pending CAM entries to update */ + call_rxf_cam_fltr_cbfn(rxf, BNA_CB_SUCCESS); + bfa_fsm_set_state(rxf, bna_rxf_sm_started); + } + break; + + case RXF_E_PAUSE: + case RXF_E_RESUME: + rxf->rxf_flags |= BNA_RXF_FL_OPERSTATE_CHANGED; + break; + + default: + bfa_sm_fault(rxf->rx->bna, event); + } +} + +static void +bna_rxf_sm_started_entry(struct bna_rxf *rxf) +{ + call_rxf_start_cbfn(rxf, BNA_CB_SUCCESS); + + if (rxf->rxf_flags & BNA_RXF_FL_OPERSTATE_CHANGED) { + if (rxf->rxf_oper_state == BNA_RXF_OPER_STATE_PAUSED) + bfa_fsm_send_event(rxf, RXF_E_PAUSE); + else + bfa_fsm_send_event(rxf, RXF_E_RESUME); + } + +} + +static void +bna_rxf_sm_started(struct bna_rxf *rxf, enum bna_rxf_event event) +{ + switch (event) { + case RXF_E_STOP: + bfa_fsm_set_state(rxf, bna_rxf_sm_cam_fltr_clr_wait); + /* Hack to get FSM start clearing CAM entries */ + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_RESP); + break; + + case RXF_E_FAIL: + rxf_reset_packet_filter(rxf); + bfa_fsm_set_state(rxf, bna_rxf_sm_stopped); + break; + + case RXF_E_CAM_FLTR_MOD: + bfa_fsm_set_state(rxf, bna_rxf_sm_cam_fltr_mod_wait); + break; + + case RXF_E_PAUSE: + bfa_fsm_set_state(rxf, bna_rxf_sm_pause_wait); + break; + + case RXF_E_RESUME: + bfa_fsm_set_state(rxf, bna_rxf_sm_resume_wait); + break; + + default: + bfa_sm_fault(rxf->rx->bna, event); + } +} + +static void +bna_rxf_sm_cam_fltr_clr_wait_entry(struct bna_rxf *rxf) +{ + /** + * Note: Do not add rxf_clear_packet_filter here. + * It will overstep mbox when this transition happens: + * cam_fltr_mod_wait -> cam_fltr_clr_wait on RXF_E_STOP event + */ +} + +static void +bna_rxf_sm_cam_fltr_clr_wait(struct bna_rxf *rxf, enum bna_rxf_event event) +{ + switch (event) { + case RXF_E_FAIL: + /** + * FSM was in the process of stopping, initiated by + * bnad. When this happens, no one can be waiting for + * start or filter update + */ + rxf_reset_packet_filter(rxf); + bfa_fsm_set_state(rxf, bna_rxf_sm_stopped); + break; + + case RXF_E_CAM_FLTR_RESP: + if (!rxf_clear_packet_filter(rxf)) { + /* No more pending CAM entries to clear */ + bfa_fsm_set_state(rxf, bna_rxf_sm_stop_wait); + rxf_disable(rxf); + } + break; + + default: + bfa_sm_fault(rxf->rx->bna, event); + } +} + +static void +bna_rxf_sm_stop_wait_entry(struct bna_rxf *rxf) +{ + /** + * NOTE: Do not add rxf_disable here. + * It will overstep mbox when this transition happens: + * start_wait -> stop_wait on RXF_E_STOP event + */ +} + +static void +bna_rxf_sm_stop_wait(struct bna_rxf *rxf, enum bna_rxf_event event) +{ + switch (event) { + case RXF_E_FAIL: + /** + * FSM was in the process of stopping, initiated by + * bnad. When this happens, no one can be waiting for + * start or filter update + */ + bfa_fsm_set_state(rxf, bna_rxf_sm_stopped); + break; + + case RXF_E_STARTED: + /** + * This event is received due to abrupt transition from + * bna_rxf_sm_start_wait state on receiving + * RXF_E_STOP event + */ + rxf_disable(rxf); + break; + + case RXF_E_STOPPED: + /** + * FSM was in the process of stopping, initiated by + * bnad. When this happens, no one can be waiting for + * start or filter update + */ + bfa_fsm_set_state(rxf, bna_rxf_sm_stat_clr_wait); + break; + + case RXF_E_PAUSE: + rxf->rxf_oper_state = BNA_RXF_OPER_STATE_PAUSED; + break; + + case RXF_E_RESUME: + rxf->rxf_oper_state = BNA_RXF_OPER_STATE_RUNNING; + break; + + default: + bfa_sm_fault(rxf->rx->bna, event); + } +} + +static void +bna_rxf_sm_pause_wait_entry(struct bna_rxf *rxf) +{ + rxf->rxf_flags &= + ~(BNA_RXF_FL_OPERSTATE_CHANGED | BNA_RXF_FL_RXF_ENABLED); + __rxf_disable(rxf); +} + +static void +bna_rxf_sm_pause_wait(struct bna_rxf *rxf, enum bna_rxf_event event) +{ + switch (event) { + case RXF_E_FAIL: + /** + * FSM was in the process of disabling rxf, initiated by + * bnad. + */ + call_rxf_pause_cbfn(rxf, BNA_CB_FAIL); + bfa_fsm_set_state(rxf, bna_rxf_sm_stopped); + break; + + case RXF_E_STOPPED: + rxf->rxf_oper_state = BNA_RXF_OPER_STATE_PAUSED; + call_rxf_pause_cbfn(rxf, BNA_CB_SUCCESS); + bfa_fsm_set_state(rxf, bna_rxf_sm_started); + break; + + /* + * Since PAUSE/RESUME can only be sent by bnad, we don't expect + * any other event during these states + */ + default: + bfa_sm_fault(rxf->rx->bna, event); + } +} + +static void +bna_rxf_sm_resume_wait_entry(struct bna_rxf *rxf) +{ + rxf->rxf_flags &= ~(BNA_RXF_FL_OPERSTATE_CHANGED); + rxf->rxf_flags |= BNA_RXF_FL_RXF_ENABLED; + __rxf_enable(rxf); +} + +static void +bna_rxf_sm_resume_wait(struct bna_rxf *rxf, enum bna_rxf_event event) +{ + switch (event) { + case RXF_E_FAIL: + /** + * FSM was in the process of disabling rxf, initiated by + * bnad. + */ + call_rxf_resume_cbfn(rxf, BNA_CB_FAIL); + bfa_fsm_set_state(rxf, bna_rxf_sm_stopped); + break; + + case RXF_E_STARTED: + rxf->rxf_oper_state = BNA_RXF_OPER_STATE_RUNNING; + call_rxf_resume_cbfn(rxf, BNA_CB_SUCCESS); + bfa_fsm_set_state(rxf, bna_rxf_sm_started); + break; + + /* + * Since PAUSE/RESUME can only be sent by bnad, we don't expect + * any other event during these states + */ + default: + bfa_sm_fault(rxf->rx->bna, event); + } +} + +static void +bna_rxf_sm_stat_clr_wait_entry(struct bna_rxf *rxf) +{ + __bna_rxf_stat_clr(rxf); +} + +static void +bna_rxf_sm_stat_clr_wait(struct bna_rxf *rxf, enum bna_rxf_event event) +{ + switch (event) { + case RXF_E_FAIL: + case RXF_E_STAT_CLEARED: + bfa_fsm_set_state(rxf, bna_rxf_sm_stopped); + break; + + default: + bfa_sm_fault(rxf->rx->bna, event); + } +} + +static void +__rxf_enable(struct bna_rxf *rxf) +{ + struct bfi_ll_rxf_multi_req ll_req; + u32 bm[2] = {0, 0}; + + if (rxf->rxf_id < 32) + bm[0] = 1 << rxf->rxf_id; + else + bm[1] = 1 << (rxf->rxf_id - 32); + + bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_RX_REQ, 0); + ll_req.rxf_id_mask[0] = htonl(bm[0]); + ll_req.rxf_id_mask[1] = htonl(bm[1]); + ll_req.enable = 1; + + bna_mbox_qe_fill(&rxf->mbox_qe, &ll_req, sizeof(ll_req), + rxf_cb_enabled, rxf); + + bna_mbox_send(rxf->rx->bna, &rxf->mbox_qe); +} + +static void +__rxf_disable(struct bna_rxf *rxf) +{ + struct bfi_ll_rxf_multi_req ll_req; + u32 bm[2] = {0, 0}; + + if (rxf->rxf_id < 32) + bm[0] = 1 << rxf->rxf_id; + else + bm[1] = 1 << (rxf->rxf_id - 32); + + bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_RX_REQ, 0); + ll_req.rxf_id_mask[0] = htonl(bm[0]); + ll_req.rxf_id_mask[1] = htonl(bm[1]); + ll_req.enable = 0; + + bna_mbox_qe_fill(&rxf->mbox_qe, &ll_req, sizeof(ll_req), + rxf_cb_disabled, rxf); + + bna_mbox_send(rxf->rx->bna, &rxf->mbox_qe); +} + +static void +__rxf_config_set(struct bna_rxf *rxf) +{ + u32 i; + struct bna_rss_mem *rss_mem; + struct bna_rx_fndb_ram *rx_fndb_ram; + struct bna *bna = rxf->rx->bna; + void __iomem *base_addr; + unsigned long off; + + base_addr = BNA_GET_MEM_BASE_ADDR(bna->pcidev.pci_bar_kva, + RSS_TABLE_BASE_OFFSET); + + rss_mem = (struct bna_rss_mem *)0; + + /* Configure RSS if required */ + if (rxf->ctrl_flags & BNA_RXF_CF_RSS_ENABLE) { + /* configure RSS Table */ + writel(BNA_GET_PAGE_NUM(RAD0_MEM_BLK_BASE_PG_NUM + + bna->port_num, RSS_TABLE_BASE_OFFSET), + bna->regs.page_addr); + + /* temporarily disable RSS, while hash value is written */ + off = (unsigned long)&rss_mem[0].type_n_hash; + writel(0, base_addr + off); + + for (i = 0; i < BFI_RSS_HASH_KEY_LEN; i++) { + off = (unsigned long) + &rss_mem[0].hash_key[(BFI_RSS_HASH_KEY_LEN - 1) - i]; + writel(htonl(rxf->rss_cfg.toeplitz_hash_key[i]), + base_addr + off); + } + + off = (unsigned long)&rss_mem[0].type_n_hash; + writel(rxf->rss_cfg.hash_type | rxf->rss_cfg.hash_mask, + base_addr + off); + } + + /* Configure RxF */ + writel(BNA_GET_PAGE_NUM( + LUT0_MEM_BLK_BASE_PG_NUM + (bna->port_num * 2), + RX_FNDB_RAM_BASE_OFFSET), + bna->regs.page_addr); + + base_addr = BNA_GET_MEM_BASE_ADDR(bna->pcidev.pci_bar_kva, + RX_FNDB_RAM_BASE_OFFSET); + + rx_fndb_ram = (struct bna_rx_fndb_ram *)0; + + /* We always use RSS table 0 */ + off = (unsigned long)&rx_fndb_ram[rxf->rxf_id].rss_prop; + writel(rxf->ctrl_flags & BNA_RXF_CF_RSS_ENABLE, + base_addr + off); + + /* small large buffer enable/disable */ + off = (unsigned long)&rx_fndb_ram[rxf->rxf_id].size_routing_props; + writel((rxf->ctrl_flags & BNA_RXF_CF_SM_LG_RXQ) | 0x80, + base_addr + off); + + /* RIT offset, HDS forced offset, multicast RxQ Id */ + off = (unsigned long)&rx_fndb_ram[rxf->rxf_id].rit_hds_mcastq; + writel((rxf->rit_segment->rit_offset << 16) | + (rxf->forced_offset << 8) | + (rxf->hds_cfg.hdr_type & BNA_HDS_FORCED) | rxf->mcast_rxq_id, + base_addr + off); + + /* + * default vlan tag, default function enable, strip vlan bytes, + * HDS type, header size + */ + + off = (unsigned long)&rx_fndb_ram[rxf->rxf_id].control_flags; + writel(((u32)rxf->default_vlan_tag << 16) | + (rxf->ctrl_flags & + (BNA_RXF_CF_DEFAULT_VLAN | + BNA_RXF_CF_DEFAULT_FUNCTION_ENABLE | + BNA_RXF_CF_VLAN_STRIP)) | + (rxf->hds_cfg.hdr_type & ~BNA_HDS_FORCED) | + rxf->hds_cfg.header_size, + base_addr + off); +} + +void +__rxf_vlan_filter_set(struct bna_rxf *rxf, enum bna_status status) +{ + struct bna *bna = rxf->rx->bna; + int i; + + writel(BNA_GET_PAGE_NUM(LUT0_MEM_BLK_BASE_PG_NUM + + (bna->port_num * 2), VLAN_RAM_BASE_OFFSET), + bna->regs.page_addr); + + if (status == BNA_STATUS_T_ENABLED) { + /* enable VLAN filtering on this function */ + for (i = 0; i <= BFI_MAX_VLAN / 32; i++) { + writel(rxf->vlan_filter_table[i], + BNA_GET_VLAN_MEM_ENTRY_ADDR + (bna->pcidev.pci_bar_kva, rxf->rxf_id, + i * 32)); + } + } else { + /* disable VLAN filtering on this function */ + for (i = 0; i <= BFI_MAX_VLAN / 32; i++) { + writel(0xffffffff, + BNA_GET_VLAN_MEM_ENTRY_ADDR + (bna->pcidev.pci_bar_kva, rxf->rxf_id, + i * 32)); + } + } +} + +static void +__rxf_rit_set(struct bna_rxf *rxf) +{ + struct bna *bna = rxf->rx->bna; + struct bna_rit_mem *rit_mem; + int i; + void __iomem *base_addr; + unsigned long off; + + base_addr = BNA_GET_MEM_BASE_ADDR(bna->pcidev.pci_bar_kva, + FUNCTION_TO_RXQ_TRANSLATE); + + rit_mem = (struct bna_rit_mem *)0; + + writel(BNA_GET_PAGE_NUM(RXA0_MEM_BLK_BASE_PG_NUM + bna->port_num, + FUNCTION_TO_RXQ_TRANSLATE), + bna->regs.page_addr); + + for (i = 0; i < rxf->rit_segment->rit_size; i++) { + off = (unsigned long)&rit_mem[i + rxf->rit_segment->rit_offset]; + writel(rxf->rit_segment->rit[i].large_rxq_id << 6 | + rxf->rit_segment->rit[i].small_rxq_id, + base_addr + off); + } +} + +static void +__bna_rxf_stat_clr(struct bna_rxf *rxf) +{ + struct bfi_ll_stats_req ll_req; + u32 bm[2] = {0, 0}; + + if (rxf->rxf_id < 32) + bm[0] = 1 << rxf->rxf_id; + else + bm[1] = 1 << (rxf->rxf_id - 32); + + bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_STATS_CLEAR_REQ, 0); + ll_req.stats_mask = 0; + ll_req.txf_id_mask[0] = 0; + ll_req.txf_id_mask[1] = 0; + + ll_req.rxf_id_mask[0] = htonl(bm[0]); + ll_req.rxf_id_mask[1] = htonl(bm[1]); + + bna_mbox_qe_fill(&rxf->mbox_qe, &ll_req, sizeof(ll_req), + bna_rxf_cb_stats_cleared, rxf); + bna_mbox_send(rxf->rx->bna, &rxf->mbox_qe); +} + +static void +rxf_enable(struct bna_rxf *rxf) +{ + if (rxf->rxf_oper_state == BNA_RXF_OPER_STATE_PAUSED) + bfa_fsm_send_event(rxf, RXF_E_STARTED); + else { + rxf->rxf_flags |= BNA_RXF_FL_RXF_ENABLED; + __rxf_enable(rxf); + } +} + +static void +rxf_cb_enabled(void *arg, int status) +{ + struct bna_rxf *rxf = (struct bna_rxf *)arg; + + bfa_q_qe_init(&rxf->mbox_qe.qe); + bfa_fsm_send_event(rxf, RXF_E_STARTED); +} + +static void +rxf_disable(struct bna_rxf *rxf) +{ + if (rxf->rxf_oper_state == BNA_RXF_OPER_STATE_PAUSED) + bfa_fsm_send_event(rxf, RXF_E_STOPPED); + else + rxf->rxf_flags &= ~BNA_RXF_FL_RXF_ENABLED; + __rxf_disable(rxf); +} + +static void +rxf_cb_disabled(void *arg, int status) +{ + struct bna_rxf *rxf = (struct bna_rxf *)arg; + + bfa_q_qe_init(&rxf->mbox_qe.qe); + bfa_fsm_send_event(rxf, RXF_E_STOPPED); +} + +void +rxf_cb_cam_fltr_mbox_cmd(void *arg, int status) +{ + struct bna_rxf *rxf = (struct bna_rxf *)arg; + + bfa_q_qe_init(&rxf->mbox_qe.qe); + + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_RESP); +} + +static void +bna_rxf_cb_stats_cleared(void *arg, int status) +{ + struct bna_rxf *rxf = (struct bna_rxf *)arg; + + bfa_q_qe_init(&rxf->mbox_qe.qe); + bfa_fsm_send_event(rxf, RXF_E_STAT_CLEARED); +} + +void +rxf_cam_mbox_cmd(struct bna_rxf *rxf, u8 cmd, + const struct bna_mac *mac_addr) +{ + struct bfi_ll_mac_addr_req req; + + bfi_h2i_set(req.mh, BFI_MC_LL, cmd, 0); + + req.rxf_id = rxf->rxf_id; + memcpy(&req.mac_addr, (void *)&mac_addr->addr, ETH_ALEN); + + bna_mbox_qe_fill(&rxf->mbox_qe, &req, sizeof(req), + rxf_cb_cam_fltr_mbox_cmd, rxf); + + bna_mbox_send(rxf->rx->bna, &rxf->mbox_qe); +} + +static int +rxf_process_packet_filter_mcast(struct bna_rxf *rxf) +{ + struct bna_mac *mac = NULL; + struct list_head *qe; + + /* Add multicast entries */ + if (!list_empty(&rxf->mcast_pending_add_q)) { + bfa_q_deq(&rxf->mcast_pending_add_q, &qe); + bfa_q_qe_init(qe); + mac = (struct bna_mac *)qe; + rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_ADD_REQ, mac); + list_add_tail(&mac->qe, &rxf->mcast_active_q); + return 1; + } + + /* Delete multicast entries previousely added */ + if (!list_empty(&rxf->mcast_pending_del_q)) { + bfa_q_deq(&rxf->mcast_pending_del_q, &qe); + bfa_q_qe_init(qe); + mac = (struct bna_mac *)qe; + rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_DEL_REQ, mac); + bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac); + return 1; + } + + return 0; +} + +static int +rxf_process_packet_filter_vlan(struct bna_rxf *rxf) +{ + /* Apply the VLAN filter */ + if (rxf->rxf_flags & BNA_RXF_FL_VLAN_CONFIG_PENDING) { + rxf->rxf_flags &= ~BNA_RXF_FL_VLAN_CONFIG_PENDING; + if (!(rxf->rxmode_active & BNA_RXMODE_PROMISC) && + !(rxf->rxmode_active & BNA_RXMODE_DEFAULT)) + __rxf_vlan_filter_set(rxf, rxf->vlan_filter_status); + } + + /* Apply RSS configuration */ + if (rxf->rxf_flags & BNA_RXF_FL_RSS_CONFIG_PENDING) { + rxf->rxf_flags &= ~BNA_RXF_FL_RSS_CONFIG_PENDING; + if (rxf->rss_status == BNA_STATUS_T_DISABLED) { + /* RSS is being disabled */ + rxf->ctrl_flags &= ~BNA_RXF_CF_RSS_ENABLE; + __rxf_rit_set(rxf); + __rxf_config_set(rxf); + } else { + /* RSS is being enabled or reconfigured */ + rxf->ctrl_flags |= BNA_RXF_CF_RSS_ENABLE; + __rxf_rit_set(rxf); + __rxf_config_set(rxf); + } + } + + return 0; +} + +/** + * Processes pending ucast, mcast entry addition/deletion and issues mailbox + * command. Also processes pending filter configuration - promiscuous mode, + * default mode, allmutli mode and issues mailbox command or directly applies + * to h/w + */ +static int +rxf_process_packet_filter(struct bna_rxf *rxf) +{ + /* Set the default MAC first */ + if (rxf->ucast_pending_set > 0) { + rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_UCAST_SET_REQ, + rxf->ucast_active_mac); + rxf->ucast_pending_set--; + return 1; + } + + if (rxf_process_packet_filter_ucast(rxf)) + return 1; + + if (rxf_process_packet_filter_mcast(rxf)) + return 1; + + if (rxf_process_packet_filter_promisc(rxf)) + return 1; + + if (rxf_process_packet_filter_default(rxf)) + return 1; + + if (rxf_process_packet_filter_allmulti(rxf)) + return 1; + + if (rxf_process_packet_filter_vlan(rxf)) + return 1; + + return 0; +} + +static int +rxf_clear_packet_filter_mcast(struct bna_rxf *rxf) +{ + struct bna_mac *mac = NULL; + struct list_head *qe; + + /* 3. delete pending mcast entries */ + if (!list_empty(&rxf->mcast_pending_del_q)) { + bfa_q_deq(&rxf->mcast_pending_del_q, &qe); + bfa_q_qe_init(qe); + mac = (struct bna_mac *)qe; + rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_DEL_REQ, mac); + bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac); + return 1; + } + + /* 4. clear active mcast entries; move them to pending_add_q */ + if (!list_empty(&rxf->mcast_active_q)) { + bfa_q_deq(&rxf->mcast_active_q, &qe); + bfa_q_qe_init(qe); + mac = (struct bna_mac *)qe; + rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_DEL_REQ, mac); + list_add_tail(&mac->qe, &rxf->mcast_pending_add_q); + return 1; + } + + return 0; +} + +/** + * In the rxf stop path, processes pending ucast/mcast delete queue and issues + * the mailbox command. Moves the active ucast/mcast entries to pending add q, + * so that they are added to CAM again in the rxf start path. Moves the current + * filter settings - promiscuous, default, allmutli - to pending filter + * configuration + */ +static int +rxf_clear_packet_filter(struct bna_rxf *rxf) +{ + if (rxf_clear_packet_filter_ucast(rxf)) + return 1; + + if (rxf_clear_packet_filter_mcast(rxf)) + return 1; + + /* 5. clear active default MAC in the CAM */ + if (rxf->ucast_pending_set > 0) + rxf->ucast_pending_set = 0; + + if (rxf_clear_packet_filter_promisc(rxf)) + return 1; + + if (rxf_clear_packet_filter_default(rxf)) + return 1; + + if (rxf_clear_packet_filter_allmulti(rxf)) + return 1; + + return 0; +} + +static void +rxf_reset_packet_filter_mcast(struct bna_rxf *rxf) +{ + struct list_head *qe; + struct bna_mac *mac; + + /* 3. Move active mcast entries to pending_add_q */ + while (!list_empty(&rxf->mcast_active_q)) { + bfa_q_deq(&rxf->mcast_active_q, &qe); + bfa_q_qe_init(qe); + list_add_tail(qe, &rxf->mcast_pending_add_q); + } + + /* 4. Throw away delete pending mcast entries */ + while (!list_empty(&rxf->mcast_pending_del_q)) { + bfa_q_deq(&rxf->mcast_pending_del_q, &qe); + bfa_q_qe_init(qe); + mac = (struct bna_mac *)qe; + bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac); + } +} + +/** + * In the rxf fail path, throws away the ucast/mcast entries pending for + * deletion, moves all active ucast/mcast entries to pending queue so that + * they are added back to CAM in the rxf start path. Also moves the current + * filter configuration to pending filter configuration. + */ +static void +rxf_reset_packet_filter(struct bna_rxf *rxf) +{ + rxf_reset_packet_filter_ucast(rxf); + + rxf_reset_packet_filter_mcast(rxf); + + /* 5. Turn off ucast set flag */ + rxf->ucast_pending_set = 0; + + rxf_reset_packet_filter_promisc(rxf); + + rxf_reset_packet_filter_default(rxf); + + rxf_reset_packet_filter_allmulti(rxf); +} + +void +bna_rxf_init(struct bna_rxf *rxf, + struct bna_rx *rx, + struct bna_rx_config *q_config) +{ + struct list_head *qe; + struct bna_rxp *rxp; + + /* rxf_id is initialized during rx_mod init */ + rxf->rx = rx; + + INIT_LIST_HEAD(&rxf->ucast_pending_add_q); + INIT_LIST_HEAD(&rxf->ucast_pending_del_q); + rxf->ucast_pending_set = 0; + INIT_LIST_HEAD(&rxf->ucast_active_q); + rxf->ucast_active_mac = NULL; + + INIT_LIST_HEAD(&rxf->mcast_pending_add_q); + INIT_LIST_HEAD(&rxf->mcast_pending_del_q); + INIT_LIST_HEAD(&rxf->mcast_active_q); + + bfa_q_qe_init(&rxf->mbox_qe.qe); + + if (q_config->vlan_strip_status == BNA_STATUS_T_ENABLED) + rxf->ctrl_flags |= BNA_RXF_CF_VLAN_STRIP; + + rxf->rxf_oper_state = (q_config->paused) ? + BNA_RXF_OPER_STATE_PAUSED : BNA_RXF_OPER_STATE_RUNNING; + + bna_rxf_adv_init(rxf, rx, q_config); + + rxf->rit_segment = bna_rit_mod_seg_get(&rxf->rx->bna->rit_mod, + q_config->num_paths); + + list_for_each(qe, &rx->rxp_q) { + rxp = (struct bna_rxp *)qe; + if (q_config->rxp_type == BNA_RXP_SINGLE) + rxf->mcast_rxq_id = rxp->rxq.single.only->rxq_id; + else + rxf->mcast_rxq_id = rxp->rxq.slr.large->rxq_id; + break; + } + + rxf->vlan_filter_status = BNA_STATUS_T_DISABLED; + memset(rxf->vlan_filter_table, 0, + (sizeof(u32) * ((BFI_MAX_VLAN + 1) / 32))); + + bfa_fsm_set_state(rxf, bna_rxf_sm_stopped); +} + +void +bna_rxf_uninit(struct bna_rxf *rxf) +{ + struct bna_mac *mac; + + bna_rit_mod_seg_put(&rxf->rx->bna->rit_mod, rxf->rit_segment); + rxf->rit_segment = NULL; + + rxf->ucast_pending_set = 0; + + while (!list_empty(&rxf->ucast_pending_add_q)) { + bfa_q_deq(&rxf->ucast_pending_add_q, &mac); + bfa_q_qe_init(&mac->qe); + bna_ucam_mod_mac_put(&rxf->rx->bna->ucam_mod, mac); + } + + if (rxf->ucast_active_mac) { + bfa_q_qe_init(&rxf->ucast_active_mac->qe); + bna_ucam_mod_mac_put(&rxf->rx->bna->ucam_mod, + rxf->ucast_active_mac); + rxf->ucast_active_mac = NULL; + } + + while (!list_empty(&rxf->mcast_pending_add_q)) { + bfa_q_deq(&rxf->mcast_pending_add_q, &mac); + bfa_q_qe_init(&mac->qe); + bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac); + } + + rxf->rx = NULL; +} + +void +bna_rxf_start(struct bna_rxf *rxf) +{ + rxf->start_cbfn = bna_rx_cb_rxf_started; + rxf->start_cbarg = rxf->rx; + rxf->rxf_flags &= ~BNA_RXF_FL_FAILED; + bfa_fsm_send_event(rxf, RXF_E_START); +} + +void +bna_rxf_stop(struct bna_rxf *rxf) +{ + rxf->stop_cbfn = bna_rx_cb_rxf_stopped; + rxf->stop_cbarg = rxf->rx; + bfa_fsm_send_event(rxf, RXF_E_STOP); +} + +void +bna_rxf_fail(struct bna_rxf *rxf) +{ + rxf->rxf_flags |= BNA_RXF_FL_FAILED; + bfa_fsm_send_event(rxf, RXF_E_FAIL); +} + +int +bna_rxf_state_get(struct bna_rxf *rxf) +{ + return bfa_sm_to_state(rxf_sm_table, rxf->fsm); +} + +enum bna_cb_status +bna_rx_ucast_set(struct bna_rx *rx, u8 *ucmac, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)) +{ + struct bna_rxf *rxf = &rx->rxf; + + if (rxf->ucast_active_mac == NULL) { + rxf->ucast_active_mac = + bna_ucam_mod_mac_get(&rxf->rx->bna->ucam_mod); + if (rxf->ucast_active_mac == NULL) + return BNA_CB_UCAST_CAM_FULL; + bfa_q_qe_init(&rxf->ucast_active_mac->qe); + } + + memcpy(rxf->ucast_active_mac->addr, ucmac, ETH_ALEN); + rxf->ucast_pending_set++; + rxf->cam_fltr_cbfn = cbfn; + rxf->cam_fltr_cbarg = rx->bna->bnad; + + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + + return BNA_CB_SUCCESS; +} + +enum bna_cb_status +bna_rx_mcast_add(struct bna_rx *rx, u8 *addr, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)) +{ + struct bna_rxf *rxf = &rx->rxf; + struct list_head *qe; + struct bna_mac *mac; + + /* Check if already added */ + list_for_each(qe, &rxf->mcast_active_q) { + mac = (struct bna_mac *)qe; + if (BNA_MAC_IS_EQUAL(mac->addr, addr)) { + if (cbfn) + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); + return BNA_CB_SUCCESS; + } + } + + /* Check if pending addition */ + list_for_each(qe, &rxf->mcast_pending_add_q) { + mac = (struct bna_mac *)qe; + if (BNA_MAC_IS_EQUAL(mac->addr, addr)) { + if (cbfn) + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); + return BNA_CB_SUCCESS; + } + } + + mac = bna_mcam_mod_mac_get(&rxf->rx->bna->mcam_mod); + if (mac == NULL) + return BNA_CB_MCAST_LIST_FULL; + bfa_q_qe_init(&mac->qe); + memcpy(mac->addr, addr, ETH_ALEN); + list_add_tail(&mac->qe, &rxf->mcast_pending_add_q); + + rxf->cam_fltr_cbfn = cbfn; + rxf->cam_fltr_cbarg = rx->bna->bnad; + + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + + return BNA_CB_SUCCESS; +} + +enum bna_cb_status +bna_rx_mcast_del(struct bna_rx *rx, u8 *addr, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)) +{ + struct bna_rxf *rxf = &rx->rxf; + struct list_head *qe; + struct bna_mac *mac; + + list_for_each(qe, &rxf->mcast_pending_add_q) { + mac = (struct bna_mac *)qe; + if (BNA_MAC_IS_EQUAL(mac->addr, addr)) { + list_del(qe); + bfa_q_qe_init(qe); + bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac); + if (cbfn) + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); + return BNA_CB_SUCCESS; + } + } + + list_for_each(qe, &rxf->mcast_active_q) { + mac = (struct bna_mac *)qe; + if (BNA_MAC_IS_EQUAL(mac->addr, addr)) { + list_del(qe); + bfa_q_qe_init(qe); + list_add_tail(qe, &rxf->mcast_pending_del_q); + rxf->cam_fltr_cbfn = cbfn; + rxf->cam_fltr_cbarg = rx->bna->bnad; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + return BNA_CB_SUCCESS; + } + } + + return BNA_CB_INVALID_MAC; +} + +enum bna_cb_status +bna_rx_mcast_listset(struct bna_rx *rx, int count, u8 *mclist, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)) +{ + struct bna_rxf *rxf = &rx->rxf; + struct list_head list_head; + struct list_head *qe; + u8 *mcaddr; + struct bna_mac *mac; + struct bna_mac *mac1; + int skip; + int delete; + int need_hw_config = 0; + int i; + + /* Allocate nodes */ + INIT_LIST_HEAD(&list_head); + for (i = 0, mcaddr = mclist; i < count; i++) { + mac = bna_mcam_mod_mac_get(&rxf->rx->bna->mcam_mod); + if (mac == NULL) + goto err_return; + bfa_q_qe_init(&mac->qe); + memcpy(mac->addr, mcaddr, ETH_ALEN); + list_add_tail(&mac->qe, &list_head); + + mcaddr += ETH_ALEN; + } + + /* Schedule for addition */ + while (!list_empty(&list_head)) { + bfa_q_deq(&list_head, &qe); + mac = (struct bna_mac *)qe; + bfa_q_qe_init(&mac->qe); + + skip = 0; + + /* Skip if already added */ + list_for_each(qe, &rxf->mcast_active_q) { + mac1 = (struct bna_mac *)qe; + if (BNA_MAC_IS_EQUAL(mac1->addr, mac->addr)) { + bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, + mac); + skip = 1; + break; + } + } + + if (skip) + continue; + + /* Skip if pending addition */ + list_for_each(qe, &rxf->mcast_pending_add_q) { + mac1 = (struct bna_mac *)qe; + if (BNA_MAC_IS_EQUAL(mac1->addr, mac->addr)) { + bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, + mac); + skip = 1; + break; + } + } + + if (skip) + continue; + + need_hw_config = 1; + list_add_tail(&mac->qe, &rxf->mcast_pending_add_q); + } + + /** + * Delete the entries that are in the pending_add_q but not + * in the new list + */ + while (!list_empty(&rxf->mcast_pending_add_q)) { + bfa_q_deq(&rxf->mcast_pending_add_q, &qe); + mac = (struct bna_mac *)qe; + bfa_q_qe_init(&mac->qe); + for (i = 0, mcaddr = mclist, delete = 1; i < count; i++) { + if (BNA_MAC_IS_EQUAL(mcaddr, mac->addr)) { + delete = 0; + break; + } + mcaddr += ETH_ALEN; + } + if (delete) + bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac); + else + list_add_tail(&mac->qe, &list_head); + } + while (!list_empty(&list_head)) { + bfa_q_deq(&list_head, &qe); + mac = (struct bna_mac *)qe; + bfa_q_qe_init(&mac->qe); + list_add_tail(&mac->qe, &rxf->mcast_pending_add_q); + } + + /** + * Schedule entries for deletion that are in the active_q but not + * in the new list + */ + while (!list_empty(&rxf->mcast_active_q)) { + bfa_q_deq(&rxf->mcast_active_q, &qe); + mac = (struct bna_mac *)qe; + bfa_q_qe_init(&mac->qe); + for (i = 0, mcaddr = mclist, delete = 1; i < count; i++) { + if (BNA_MAC_IS_EQUAL(mcaddr, mac->addr)) { + delete = 0; + break; + } + mcaddr += ETH_ALEN; + } + if (delete) { + list_add_tail(&mac->qe, &rxf->mcast_pending_del_q); + need_hw_config = 1; + } else { + list_add_tail(&mac->qe, &list_head); + } + } + while (!list_empty(&list_head)) { + bfa_q_deq(&list_head, &qe); + mac = (struct bna_mac *)qe; + bfa_q_qe_init(&mac->qe); + list_add_tail(&mac->qe, &rxf->mcast_active_q); + } + + if (need_hw_config) { + rxf->cam_fltr_cbfn = cbfn; + rxf->cam_fltr_cbarg = rx->bna->bnad; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + } else if (cbfn) + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); + + return BNA_CB_SUCCESS; + +err_return: + while (!list_empty(&list_head)) { + bfa_q_deq(&list_head, &qe); + mac = (struct bna_mac *)qe; + bfa_q_qe_init(&mac->qe); + bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac); + } + + return BNA_CB_MCAST_LIST_FULL; +} + +void +bna_rx_vlan_add(struct bna_rx *rx, int vlan_id) +{ + struct bna_rxf *rxf = &rx->rxf; + int index = (vlan_id >> 5); + int bit = (1 << (vlan_id & 0x1F)); + + rxf->vlan_filter_table[index] |= bit; + if (rxf->vlan_filter_status == BNA_STATUS_T_ENABLED) { + rxf->rxf_flags |= BNA_RXF_FL_VLAN_CONFIG_PENDING; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + } +} + +void +bna_rx_vlan_del(struct bna_rx *rx, int vlan_id) +{ + struct bna_rxf *rxf = &rx->rxf; + int index = (vlan_id >> 5); + int bit = (1 << (vlan_id & 0x1F)); + + rxf->vlan_filter_table[index] &= ~bit; + if (rxf->vlan_filter_status == BNA_STATUS_T_ENABLED) { + rxf->rxf_flags |= BNA_RXF_FL_VLAN_CONFIG_PENDING; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + } +} + +/** + * RX + */ +#define RXQ_RCB_INIT(q, rxp, qdepth, bna, _id, unmapq_mem) do { \ + struct bna_doorbell_qset *_qset; \ + unsigned long off; \ + (q)->rcb->producer_index = (q)->rcb->consumer_index = 0; \ + (q)->rcb->q_depth = (qdepth); \ + (q)->rcb->unmap_q = unmapq_mem; \ + (q)->rcb->rxq = (q); \ + (q)->rcb->cq = &(rxp)->cq; \ + (q)->rcb->bnad = (bna)->bnad; \ + _qset = (struct bna_doorbell_qset *)0; \ + off = (unsigned long)&_qset[(q)->rxq_id].rxq[0]; \ + (q)->rcb->q_dbell = off + \ + BNA_GET_DOORBELL_BASE_ADDR((bna)->pcidev.pci_bar_kva); \ + (q)->rcb->id = _id; \ +} while (0) + +#define BNA_GET_RXQS(qcfg) (((qcfg)->rxp_type == BNA_RXP_SINGLE) ? \ + (qcfg)->num_paths : ((qcfg)->num_paths * 2)) + +#define SIZE_TO_PAGES(size) (((size) >> PAGE_SHIFT) + ((((size) &\ + (PAGE_SIZE - 1)) + (PAGE_SIZE - 1)) >> PAGE_SHIFT)) + +#define call_rx_stop_callback(rx, status) \ + if ((rx)->stop_cbfn) { \ + (*(rx)->stop_cbfn)((rx)->stop_cbarg, rx, (status)); \ + (rx)->stop_cbfn = NULL; \ + (rx)->stop_cbarg = NULL; \ + } + +/* + * Since rx_enable is synchronous callback, there is no start_cbfn required. + * Instead, we'll call bnad_rx_post(rxp) so that bnad can post the buffers + * for each rxpath. + */ + +#define call_rx_disable_cbfn(rx, status) \ + if ((rx)->disable_cbfn) { \ + (*(rx)->disable_cbfn)((rx)->disable_cbarg, \ + status); \ + (rx)->disable_cbfn = NULL; \ + (rx)->disable_cbarg = NULL; \ + } \ + +#define rxqs_reqd(type, num_rxqs) \ + (((type) == BNA_RXP_SINGLE) ? (num_rxqs) : ((num_rxqs) * 2)) + +#define rx_ib_fail(rx) \ +do { \ + struct bna_rxp *rxp; \ + struct list_head *qe; \ + list_for_each(qe, &(rx)->rxp_q) { \ + rxp = (struct bna_rxp *)qe; \ + bna_ib_fail(rxp->cq.ib); \ + } \ +} while (0) + +static void __bna_multi_rxq_stop(struct bna_rxp *, u32 *); +static void __bna_rxq_start(struct bna_rxq *rxq); +static void __bna_cq_start(struct bna_cq *cq); +static void bna_rit_create(struct bna_rx *rx); +static void bna_rx_cb_multi_rxq_stopped(void *arg, int status); +static void bna_rx_cb_rxq_stopped_all(void *arg); + +bfa_fsm_state_decl(bna_rx, stopped, + struct bna_rx, enum bna_rx_event); +bfa_fsm_state_decl(bna_rx, rxf_start_wait, + struct bna_rx, enum bna_rx_event); +bfa_fsm_state_decl(bna_rx, started, + struct bna_rx, enum bna_rx_event); +bfa_fsm_state_decl(bna_rx, rxf_stop_wait, + struct bna_rx, enum bna_rx_event); +bfa_fsm_state_decl(bna_rx, rxq_stop_wait, + struct bna_rx, enum bna_rx_event); + +static struct bfa_sm_table rx_sm_table[] = { + {BFA_SM(bna_rx_sm_stopped), BNA_RX_STOPPED}, + {BFA_SM(bna_rx_sm_rxf_start_wait), BNA_RX_RXF_START_WAIT}, + {BFA_SM(bna_rx_sm_started), BNA_RX_STARTED}, + {BFA_SM(bna_rx_sm_rxf_stop_wait), BNA_RX_RXF_STOP_WAIT}, + {BFA_SM(bna_rx_sm_rxq_stop_wait), BNA_RX_RXQ_STOP_WAIT}, +}; + +static void bna_rx_sm_stopped_entry(struct bna_rx *rx) +{ + struct bna_rxp *rxp; + struct list_head *qe_rxp; + + list_for_each(qe_rxp, &rx->rxp_q) { + rxp = (struct bna_rxp *)qe_rxp; + rx->rx_cleanup_cbfn(rx->bna->bnad, rxp->cq.ccb); + } + + call_rx_stop_callback(rx, BNA_CB_SUCCESS); +} + +static void bna_rx_sm_stopped(struct bna_rx *rx, + enum bna_rx_event event) +{ + switch (event) { + case RX_E_START: + bfa_fsm_set_state(rx, bna_rx_sm_rxf_start_wait); + break; + case RX_E_STOP: + call_rx_stop_callback(rx, BNA_CB_SUCCESS); + break; + case RX_E_FAIL: + /* no-op */ + break; + default: + bfa_sm_fault(rx->bna, event); + break; + } + +} + +static void bna_rx_sm_rxf_start_wait_entry(struct bna_rx *rx) +{ + struct bna_rxp *rxp; + struct list_head *qe_rxp; + struct bna_rxq *q0 = NULL, *q1 = NULL; + + /* Setup the RIT */ + bna_rit_create(rx); + + list_for_each(qe_rxp, &rx->rxp_q) { + rxp = (struct bna_rxp *)qe_rxp; + bna_ib_start(rxp->cq.ib); + GET_RXQS(rxp, q0, q1); + q0->buffer_size = bna_port_mtu_get(&rx->bna->port); + __bna_rxq_start(q0); + rx->rx_post_cbfn(rx->bna->bnad, q0->rcb); + if (q1) { + __bna_rxq_start(q1); + rx->rx_post_cbfn(rx->bna->bnad, q1->rcb); + } + __bna_cq_start(&rxp->cq); + } + + bna_rxf_start(&rx->rxf); +} + +static void bna_rx_sm_rxf_start_wait(struct bna_rx *rx, + enum bna_rx_event event) +{ + switch (event) { + case RX_E_STOP: + bfa_fsm_set_state(rx, bna_rx_sm_rxf_stop_wait); + break; + case RX_E_FAIL: + bfa_fsm_set_state(rx, bna_rx_sm_stopped); + rx_ib_fail(rx); + bna_rxf_fail(&rx->rxf); + break; + case RX_E_RXF_STARTED: + bfa_fsm_set_state(rx, bna_rx_sm_started); + break; + default: + bfa_sm_fault(rx->bna, event); + break; + } +} + +void +bna_rx_sm_started_entry(struct bna_rx *rx) +{ + struct bna_rxp *rxp; + struct list_head *qe_rxp; + + /* Start IB */ + list_for_each(qe_rxp, &rx->rxp_q) { + rxp = (struct bna_rxp *)qe_rxp; + bna_ib_ack(&rxp->cq.ib->door_bell, 0); + } + + bna_llport_admin_up(&rx->bna->port.llport); +} + +void +bna_rx_sm_started(struct bna_rx *rx, enum bna_rx_event event) +{ + switch (event) { + case RX_E_FAIL: + bna_llport_admin_down(&rx->bna->port.llport); + bfa_fsm_set_state(rx, bna_rx_sm_stopped); + rx_ib_fail(rx); + bna_rxf_fail(&rx->rxf); + break; + case RX_E_STOP: + bna_llport_admin_down(&rx->bna->port.llport); + bfa_fsm_set_state(rx, bna_rx_sm_rxf_stop_wait); + break; + default: + bfa_sm_fault(rx->bna, event); + break; + } +} + +void +bna_rx_sm_rxf_stop_wait_entry(struct bna_rx *rx) +{ + bna_rxf_stop(&rx->rxf); +} + +void +bna_rx_sm_rxf_stop_wait(struct bna_rx *rx, enum bna_rx_event event) +{ + switch (event) { + case RX_E_RXF_STOPPED: + bfa_fsm_set_state(rx, bna_rx_sm_rxq_stop_wait); + break; + case RX_E_RXF_STARTED: + /** + * RxF was in the process of starting up when + * RXF_E_STOP was issued. Ignore this event + */ + break; + case RX_E_FAIL: + bfa_fsm_set_state(rx, bna_rx_sm_stopped); + rx_ib_fail(rx); + bna_rxf_fail(&rx->rxf); + break; + default: + bfa_sm_fault(rx->bna, event); + break; + } + +} + +void +bna_rx_sm_rxq_stop_wait_entry(struct bna_rx *rx) +{ + struct bna_rxp *rxp = NULL; + struct bna_rxq *q0 = NULL; + struct bna_rxq *q1 = NULL; + struct list_head *qe; + u32 rxq_mask[2] = {0, 0}; + + /* Only one call to multi-rxq-stop for all RXPs in this RX */ + bfa_wc_up(&rx->rxq_stop_wc); + list_for_each(qe, &rx->rxp_q) { + rxp = (struct bna_rxp *)qe; + GET_RXQS(rxp, q0, q1); + if (q0->rxq_id < 32) + rxq_mask[0] |= ((u32)1 << q0->rxq_id); + else + rxq_mask[1] |= ((u32)1 << (q0->rxq_id - 32)); + if (q1) { + if (q1->rxq_id < 32) + rxq_mask[0] |= ((u32)1 << q1->rxq_id); + else + rxq_mask[1] |= ((u32) + 1 << (q1->rxq_id - 32)); + } + } + + __bna_multi_rxq_stop(rxp, rxq_mask); +} + +void +bna_rx_sm_rxq_stop_wait(struct bna_rx *rx, enum bna_rx_event event) +{ + struct bna_rxp *rxp = NULL; + struct list_head *qe; + + switch (event) { + case RX_E_RXQ_STOPPED: + list_for_each(qe, &rx->rxp_q) { + rxp = (struct bna_rxp *)qe; + bna_ib_stop(rxp->cq.ib); + } + /* Fall through */ + case RX_E_FAIL: + bfa_fsm_set_state(rx, bna_rx_sm_stopped); + break; + default: + bfa_sm_fault(rx->bna, event); + break; + } +} + +void +__bna_multi_rxq_stop(struct bna_rxp *rxp, u32 * rxq_id_mask) +{ + struct bfi_ll_q_stop_req ll_req; + + bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_RXQ_STOP_REQ, 0); + ll_req.q_id_mask[0] = htonl(rxq_id_mask[0]); + ll_req.q_id_mask[1] = htonl(rxq_id_mask[1]); + bna_mbox_qe_fill(&rxp->mbox_qe, &ll_req, sizeof(ll_req), + bna_rx_cb_multi_rxq_stopped, rxp); + bna_mbox_send(rxp->rx->bna, &rxp->mbox_qe); +} + +void +__bna_rxq_start(struct bna_rxq *rxq) +{ + struct bna_rxtx_q_mem *q_mem; + struct bna_rxq_mem rxq_cfg, *rxq_mem; + struct bna_dma_addr cur_q_addr; + /* struct bna_doorbell_qset *qset; */ + struct bna_qpt *qpt; + u32 pg_num; + struct bna *bna = rxq->rx->bna; + void __iomem *base_addr; + unsigned long off; + + qpt = &rxq->qpt; + cur_q_addr = *((struct bna_dma_addr *)(qpt->kv_qpt_ptr)); + + rxq_cfg.pg_tbl_addr_lo = qpt->hw_qpt_ptr.lsb; + rxq_cfg.pg_tbl_addr_hi = qpt->hw_qpt_ptr.msb; + rxq_cfg.cur_q_entry_lo = cur_q_addr.lsb; + rxq_cfg.cur_q_entry_hi = cur_q_addr.msb; + + rxq_cfg.pg_cnt_n_prd_ptr = ((u32)qpt->page_count << 16) | 0x0; + rxq_cfg.entry_n_pg_size = ((u32)(BFI_RXQ_WI_SIZE >> 2) << 16) | + (qpt->page_size >> 2); + rxq_cfg.sg_n_cq_n_cns_ptr = + ((u32)(rxq->rxp->cq.cq_id & 0xff) << 16) | 0x0; + rxq_cfg.buf_sz_n_q_state = ((u32)rxq->buffer_size << 16) | + BNA_Q_IDLE_STATE; + rxq_cfg.next_qid = 0x0 | (0x3 << 8); + + /* Write the page number register */ + pg_num = BNA_GET_PAGE_NUM(HQM0_BLK_PG_NUM + bna->port_num, + HQM_RXTX_Q_RAM_BASE_OFFSET); + writel(pg_num, bna->regs.page_addr); + + /* Write to h/w */ + base_addr = BNA_GET_MEM_BASE_ADDR(bna->pcidev.pci_bar_kva, + HQM_RXTX_Q_RAM_BASE_OFFSET); + + q_mem = (struct bna_rxtx_q_mem *)0; + rxq_mem = &q_mem[rxq->rxq_id].rxq; + + off = (unsigned long)&rxq_mem->pg_tbl_addr_lo; + writel(htonl(rxq_cfg.pg_tbl_addr_lo), base_addr + off); + + off = (unsigned long)&rxq_mem->pg_tbl_addr_hi; + writel(htonl(rxq_cfg.pg_tbl_addr_hi), base_addr + off); + + off = (unsigned long)&rxq_mem->cur_q_entry_lo; + writel(htonl(rxq_cfg.cur_q_entry_lo), base_addr + off); + + off = (unsigned long)&rxq_mem->cur_q_entry_hi; + writel(htonl(rxq_cfg.cur_q_entry_hi), base_addr + off); + + off = (unsigned long)&rxq_mem->pg_cnt_n_prd_ptr; + writel(rxq_cfg.pg_cnt_n_prd_ptr, base_addr + off); + + off = (unsigned long)&rxq_mem->entry_n_pg_size; + writel(rxq_cfg.entry_n_pg_size, base_addr + off); + + off = (unsigned long)&rxq_mem->sg_n_cq_n_cns_ptr; + writel(rxq_cfg.sg_n_cq_n_cns_ptr, base_addr + off); + + off = (unsigned long)&rxq_mem->buf_sz_n_q_state; + writel(rxq_cfg.buf_sz_n_q_state, base_addr + off); + + off = (unsigned long)&rxq_mem->next_qid; + writel(rxq_cfg.next_qid, base_addr + off); + + rxq->rcb->producer_index = 0; + rxq->rcb->consumer_index = 0; +} + +void +__bna_cq_start(struct bna_cq *cq) +{ + struct bna_cq_mem cq_cfg, *cq_mem; + const struct bna_qpt *qpt; + struct bna_dma_addr cur_q_addr; + u32 pg_num; + struct bna *bna = cq->rx->bna; + void __iomem *base_addr; + unsigned long off; + + qpt = &cq->qpt; + cur_q_addr = *((struct bna_dma_addr *)(qpt->kv_qpt_ptr)); + + /* + * Fill out structure, to be subsequently written + * to hardware + */ + cq_cfg.pg_tbl_addr_lo = qpt->hw_qpt_ptr.lsb; + cq_cfg.pg_tbl_addr_hi = qpt->hw_qpt_ptr.msb; + cq_cfg.cur_q_entry_lo = cur_q_addr.lsb; + cq_cfg.cur_q_entry_hi = cur_q_addr.msb; + + cq_cfg.pg_cnt_n_prd_ptr = (qpt->page_count << 16) | 0x0; + cq_cfg.entry_n_pg_size = + ((u32)(BFI_CQ_WI_SIZE >> 2) << 16) | (qpt->page_size >> 2); + cq_cfg.int_blk_n_cns_ptr = ((((u32)cq->ib_seg_offset) << 24) | + ((u32)(cq->ib->ib_id & 0xff) << 16) | 0x0); + cq_cfg.q_state = BNA_Q_IDLE_STATE; + + /* Write the page number register */ + pg_num = BNA_GET_PAGE_NUM(HQM0_BLK_PG_NUM + bna->port_num, + HQM_CQ_RAM_BASE_OFFSET); + + writel(pg_num, bna->regs.page_addr); + + /* H/W write */ + base_addr = BNA_GET_MEM_BASE_ADDR(bna->pcidev.pci_bar_kva, + HQM_CQ_RAM_BASE_OFFSET); + + cq_mem = (struct bna_cq_mem *)0; + + off = (unsigned long)&cq_mem[cq->cq_id].pg_tbl_addr_lo; + writel(htonl(cq_cfg.pg_tbl_addr_lo), base_addr + off); + + off = (unsigned long)&cq_mem[cq->cq_id].pg_tbl_addr_hi; + writel(htonl(cq_cfg.pg_tbl_addr_hi), base_addr + off); + + off = (unsigned long)&cq_mem[cq->cq_id].cur_q_entry_lo; + writel(htonl(cq_cfg.cur_q_entry_lo), base_addr + off); + + off = (unsigned long)&cq_mem[cq->cq_id].cur_q_entry_hi; + writel(htonl(cq_cfg.cur_q_entry_hi), base_addr + off); + + off = (unsigned long)&cq_mem[cq->cq_id].pg_cnt_n_prd_ptr; + writel(cq_cfg.pg_cnt_n_prd_ptr, base_addr + off); + + off = (unsigned long)&cq_mem[cq->cq_id].entry_n_pg_size; + writel(cq_cfg.entry_n_pg_size, base_addr + off); + + off = (unsigned long)&cq_mem[cq->cq_id].int_blk_n_cns_ptr; + writel(cq_cfg.int_blk_n_cns_ptr, base_addr + off); + + off = (unsigned long)&cq_mem[cq->cq_id].q_state; + writel(cq_cfg.q_state, base_addr + off); + + cq->ccb->producer_index = 0; + *(cq->ccb->hw_producer_index) = 0; +} + +void +bna_rit_create(struct bna_rx *rx) +{ + struct list_head *qe_rxp; + struct bna *bna; + struct bna_rxp *rxp; + struct bna_rxq *q0 = NULL; + struct bna_rxq *q1 = NULL; + int offset; + + bna = rx->bna; + + offset = 0; + list_for_each(qe_rxp, &rx->rxp_q) { + rxp = (struct bna_rxp *)qe_rxp; + GET_RXQS(rxp, q0, q1); + rx->rxf.rit_segment->rit[offset].large_rxq_id = q0->rxq_id; + rx->rxf.rit_segment->rit[offset].small_rxq_id = + (q1 ? q1->rxq_id : 0); + offset++; + } +} + +int +_rx_can_satisfy(struct bna_rx_mod *rx_mod, + struct bna_rx_config *rx_cfg) +{ + if ((rx_mod->rx_free_count == 0) || + (rx_mod->rxp_free_count == 0) || + (rx_mod->rxq_free_count == 0)) + return 0; + + if (rx_cfg->rxp_type == BNA_RXP_SINGLE) { + if ((rx_mod->rxp_free_count < rx_cfg->num_paths) || + (rx_mod->rxq_free_count < rx_cfg->num_paths)) + return 0; + } else { + if ((rx_mod->rxp_free_count < rx_cfg->num_paths) || + (rx_mod->rxq_free_count < (2 * rx_cfg->num_paths))) + return 0; + } + + if (!bna_rit_mod_can_satisfy(&rx_mod->bna->rit_mod, rx_cfg->num_paths)) + return 0; + + return 1; +} + +struct bna_rxq * +_get_free_rxq(struct bna_rx_mod *rx_mod) +{ + struct bna_rxq *rxq = NULL; + struct list_head *qe = NULL; + + bfa_q_deq(&rx_mod->rxq_free_q, &qe); + if (qe) { + rx_mod->rxq_free_count--; + rxq = (struct bna_rxq *)qe; + } + return rxq; +} + +void +_put_free_rxq(struct bna_rx_mod *rx_mod, struct bna_rxq *rxq) +{ + bfa_q_qe_init(&rxq->qe); + list_add_tail(&rxq->qe, &rx_mod->rxq_free_q); + rx_mod->rxq_free_count++; +} + +struct bna_rxp * +_get_free_rxp(struct bna_rx_mod *rx_mod) +{ + struct list_head *qe = NULL; + struct bna_rxp *rxp = NULL; + + bfa_q_deq(&rx_mod->rxp_free_q, &qe); + if (qe) { + rx_mod->rxp_free_count--; + + rxp = (struct bna_rxp *)qe; + } + + return rxp; +} + +void +_put_free_rxp(struct bna_rx_mod *rx_mod, struct bna_rxp *rxp) +{ + bfa_q_qe_init(&rxp->qe); + list_add_tail(&rxp->qe, &rx_mod->rxp_free_q); + rx_mod->rxp_free_count++; +} + +struct bna_rx * +_get_free_rx(struct bna_rx_mod *rx_mod) +{ + struct list_head *qe = NULL; + struct bna_rx *rx = NULL; + + bfa_q_deq(&rx_mod->rx_free_q, &qe); + if (qe) { + rx_mod->rx_free_count--; + + rx = (struct bna_rx *)qe; + bfa_q_qe_init(qe); + list_add_tail(&rx->qe, &rx_mod->rx_active_q); + } + + return rx; +} + +void +_put_free_rx(struct bna_rx_mod *rx_mod, struct bna_rx *rx) +{ + bfa_q_qe_init(&rx->qe); + list_add_tail(&rx->qe, &rx_mod->rx_free_q); + rx_mod->rx_free_count++; +} + +void +_rx_init(struct bna_rx *rx, struct bna *bna) +{ + rx->bna = bna; + rx->rx_flags = 0; + + INIT_LIST_HEAD(&rx->rxp_q); + + rx->rxq_stop_wc.wc_resume = bna_rx_cb_rxq_stopped_all; + rx->rxq_stop_wc.wc_cbarg = rx; + rx->rxq_stop_wc.wc_count = 0; + + rx->stop_cbfn = NULL; + rx->stop_cbarg = NULL; +} + +void +_rxp_add_rxqs(struct bna_rxp *rxp, + struct bna_rxq *q0, + struct bna_rxq *q1) +{ + switch (rxp->type) { + case BNA_RXP_SINGLE: + rxp->rxq.single.only = q0; + rxp->rxq.single.reserved = NULL; + break; + case BNA_RXP_SLR: + rxp->rxq.slr.large = q0; + rxp->rxq.slr.small = q1; + break; + case BNA_RXP_HDS: + rxp->rxq.hds.data = q0; + rxp->rxq.hds.hdr = q1; + break; + default: + break; + } +} + +void +_rxq_qpt_init(struct bna_rxq *rxq, + struct bna_rxp *rxp, + u32 page_count, + u32 page_size, + struct bna_mem_descr *qpt_mem, + struct bna_mem_descr *swqpt_mem, + struct bna_mem_descr *page_mem) +{ + int i; + + rxq->qpt.hw_qpt_ptr.lsb = qpt_mem->dma.lsb; + rxq->qpt.hw_qpt_ptr.msb = qpt_mem->dma.msb; + rxq->qpt.kv_qpt_ptr = qpt_mem->kva; + rxq->qpt.page_count = page_count; + rxq->qpt.page_size = page_size; + + rxq->rcb->sw_qpt = (void **) swqpt_mem->kva; + + for (i = 0; i < rxq->qpt.page_count; i++) { + rxq->rcb->sw_qpt[i] = page_mem[i].kva; + ((struct bna_dma_addr *)rxq->qpt.kv_qpt_ptr)[i].lsb = + page_mem[i].dma.lsb; + ((struct bna_dma_addr *)rxq->qpt.kv_qpt_ptr)[i].msb = + page_mem[i].dma.msb; + + } +} + +void +_rxp_cqpt_setup(struct bna_rxp *rxp, + u32 page_count, + u32 page_size, + struct bna_mem_descr *qpt_mem, + struct bna_mem_descr *swqpt_mem, + struct bna_mem_descr *page_mem) +{ + int i; + + rxp->cq.qpt.hw_qpt_ptr.lsb = qpt_mem->dma.lsb; + rxp->cq.qpt.hw_qpt_ptr.msb = qpt_mem->dma.msb; + rxp->cq.qpt.kv_qpt_ptr = qpt_mem->kva; + rxp->cq.qpt.page_count = page_count; + rxp->cq.qpt.page_size = page_size; + + rxp->cq.ccb->sw_qpt = (void **) swqpt_mem->kva; + + for (i = 0; i < rxp->cq.qpt.page_count; i++) { + rxp->cq.ccb->sw_qpt[i] = page_mem[i].kva; + + ((struct bna_dma_addr *)rxp->cq.qpt.kv_qpt_ptr)[i].lsb = + page_mem[i].dma.lsb; + ((struct bna_dma_addr *)rxp->cq.qpt.kv_qpt_ptr)[i].msb = + page_mem[i].dma.msb; + + } +} + +void +_rx_add_rxp(struct bna_rx *rx, struct bna_rxp *rxp) +{ + list_add_tail(&rxp->qe, &rx->rxp_q); +} + +void +_init_rxmod_queues(struct bna_rx_mod *rx_mod) +{ + INIT_LIST_HEAD(&rx_mod->rx_free_q); + INIT_LIST_HEAD(&rx_mod->rxq_free_q); + INIT_LIST_HEAD(&rx_mod->rxp_free_q); + INIT_LIST_HEAD(&rx_mod->rx_active_q); + + rx_mod->rx_free_count = 0; + rx_mod->rxq_free_count = 0; + rx_mod->rxp_free_count = 0; +} + +void +_rx_ctor(struct bna_rx *rx, int id) +{ + bfa_q_qe_init(&rx->qe); + INIT_LIST_HEAD(&rx->rxp_q); + rx->bna = NULL; + + rx->rxf.rxf_id = id; + + /* FIXME: mbox_qe ctor()?? */ + bfa_q_qe_init(&rx->mbox_qe.qe); + + rx->stop_cbfn = NULL; + rx->stop_cbarg = NULL; +} + +void +bna_rx_cb_multi_rxq_stopped(void *arg, int status) +{ + struct bna_rxp *rxp = (struct bna_rxp *)arg; + + bfa_wc_down(&rxp->rx->rxq_stop_wc); +} + +void +bna_rx_cb_rxq_stopped_all(void *arg) +{ + struct bna_rx *rx = (struct bna_rx *)arg; + + bfa_fsm_send_event(rx, RX_E_RXQ_STOPPED); +} + +void +bna_rx_mod_cb_rx_stopped(void *arg, struct bna_rx *rx, + enum bna_cb_status status) +{ + struct bna_rx_mod *rx_mod = (struct bna_rx_mod *)arg; + + bfa_wc_down(&rx_mod->rx_stop_wc); +} + +void +bna_rx_mod_cb_rx_stopped_all(void *arg) +{ + struct bna_rx_mod *rx_mod = (struct bna_rx_mod *)arg; + + if (rx_mod->stop_cbfn) + rx_mod->stop_cbfn(&rx_mod->bna->port, BNA_CB_SUCCESS); + rx_mod->stop_cbfn = NULL; +} + +void +bna_rx_start(struct bna_rx *rx) +{ + rx->rx_flags |= BNA_RX_F_PORT_ENABLED; + if (rx->rx_flags & BNA_RX_F_ENABLE) + bfa_fsm_send_event(rx, RX_E_START); +} + +void +bna_rx_stop(struct bna_rx *rx) +{ + rx->rx_flags &= ~BNA_RX_F_PORT_ENABLED; + if (rx->fsm == (bfa_fsm_t) bna_rx_sm_stopped) + bna_rx_mod_cb_rx_stopped(&rx->bna->rx_mod, rx, BNA_CB_SUCCESS); + else { + rx->stop_cbfn = bna_rx_mod_cb_rx_stopped; + rx->stop_cbarg = &rx->bna->rx_mod; + bfa_fsm_send_event(rx, RX_E_STOP); + } +} + +void +bna_rx_fail(struct bna_rx *rx) +{ + /* Indicate port is not enabled, and failed */ + rx->rx_flags &= ~BNA_RX_F_PORT_ENABLED; + rx->rx_flags |= BNA_RX_F_PORT_FAILED; + bfa_fsm_send_event(rx, RX_E_FAIL); +} + +void +bna_rx_cb_rxf_started(struct bna_rx *rx, enum bna_cb_status status) +{ + bfa_fsm_send_event(rx, RX_E_RXF_STARTED); + if (rx->rxf.rxf_id < 32) + rx->bna->rx_mod.rxf_bmap[0] |= ((u32)1 << rx->rxf.rxf_id); + else + rx->bna->rx_mod.rxf_bmap[1] |= ((u32) + 1 << (rx->rxf.rxf_id - 32)); +} + +void +bna_rx_cb_rxf_stopped(struct bna_rx *rx, enum bna_cb_status status) +{ + bfa_fsm_send_event(rx, RX_E_RXF_STOPPED); + if (rx->rxf.rxf_id < 32) + rx->bna->rx_mod.rxf_bmap[0] &= ~(u32)1 << rx->rxf.rxf_id; + else + rx->bna->rx_mod.rxf_bmap[1] &= ~(u32) + 1 << (rx->rxf.rxf_id - 32); +} + +void +bna_rx_mod_start(struct bna_rx_mod *rx_mod, enum bna_rx_type type) +{ + struct bna_rx *rx; + struct list_head *qe; + + rx_mod->flags |= BNA_RX_MOD_F_PORT_STARTED; + if (type == BNA_RX_T_LOOPBACK) + rx_mod->flags |= BNA_RX_MOD_F_PORT_LOOPBACK; + + list_for_each(qe, &rx_mod->rx_active_q) { + rx = (struct bna_rx *)qe; + if (rx->type == type) + bna_rx_start(rx); + } +} + +void +bna_rx_mod_stop(struct bna_rx_mod *rx_mod, enum bna_rx_type type) +{ + struct bna_rx *rx; + struct list_head *qe; + + rx_mod->flags &= ~BNA_RX_MOD_F_PORT_STARTED; + rx_mod->flags &= ~BNA_RX_MOD_F_PORT_LOOPBACK; + + rx_mod->stop_cbfn = bna_port_cb_rx_stopped; + + /** + * Before calling bna_rx_stop(), increment rx_stop_wc as many times + * as we are going to call bna_rx_stop + */ + list_for_each(qe, &rx_mod->rx_active_q) { + rx = (struct bna_rx *)qe; + if (rx->type == type) + bfa_wc_up(&rx_mod->rx_stop_wc); + } + + if (rx_mod->rx_stop_wc.wc_count == 0) { + rx_mod->stop_cbfn(&rx_mod->bna->port, BNA_CB_SUCCESS); + rx_mod->stop_cbfn = NULL; + return; + } + + list_for_each(qe, &rx_mod->rx_active_q) { + rx = (struct bna_rx *)qe; + if (rx->type == type) + bna_rx_stop(rx); + } +} + +void +bna_rx_mod_fail(struct bna_rx_mod *rx_mod) +{ + struct bna_rx *rx; + struct list_head *qe; + + rx_mod->flags &= ~BNA_RX_MOD_F_PORT_STARTED; + rx_mod->flags &= ~BNA_RX_MOD_F_PORT_LOOPBACK; + + list_for_each(qe, &rx_mod->rx_active_q) { + rx = (struct bna_rx *)qe; + bna_rx_fail(rx); + } +} + +void bna_rx_mod_init(struct bna_rx_mod *rx_mod, struct bna *bna, + struct bna_res_info *res_info) +{ + int index; + struct bna_rx *rx_ptr; + struct bna_rxp *rxp_ptr; + struct bna_rxq *rxq_ptr; + + rx_mod->bna = bna; + rx_mod->flags = 0; + + rx_mod->rx = (struct bna_rx *) + res_info[BNA_RES_MEM_T_RX_ARRAY].res_u.mem_info.mdl[0].kva; + rx_mod->rxp = (struct bna_rxp *) + res_info[BNA_RES_MEM_T_RXP_ARRAY].res_u.mem_info.mdl[0].kva; + rx_mod->rxq = (struct bna_rxq *) + res_info[BNA_RES_MEM_T_RXQ_ARRAY].res_u.mem_info.mdl[0].kva; + + /* Initialize the queues */ + _init_rxmod_queues(rx_mod); + + /* Build RX queues */ + for (index = 0; index < BFI_MAX_RXQ; index++) { + rx_ptr = &rx_mod->rx[index]; + _rx_ctor(rx_ptr, index); + list_add_tail(&rx_ptr->qe, &rx_mod->rx_free_q); + rx_mod->rx_free_count++; + } + + /* build RX-path queue */ + for (index = 0; index < BFI_MAX_RXQ; index++) { + rxp_ptr = &rx_mod->rxp[index]; + rxp_ptr->cq.cq_id = index; + bfa_q_qe_init(&rxp_ptr->qe); + list_add_tail(&rxp_ptr->qe, &rx_mod->rxp_free_q); + rx_mod->rxp_free_count++; + } + + /* build RXQ queue */ + for (index = 0; index < BFI_MAX_RXQ; index++) { + rxq_ptr = &rx_mod->rxq[index]; + rxq_ptr->rxq_id = index; + + bfa_q_qe_init(&rxq_ptr->qe); + list_add_tail(&rxq_ptr->qe, &rx_mod->rxq_free_q); + rx_mod->rxq_free_count++; + } + + rx_mod->rx_stop_wc.wc_resume = bna_rx_mod_cb_rx_stopped_all; + rx_mod->rx_stop_wc.wc_cbarg = rx_mod; + rx_mod->rx_stop_wc.wc_count = 0; +} + +void +bna_rx_mod_uninit(struct bna_rx_mod *rx_mod) +{ + struct list_head *qe; + int i; + + i = 0; + list_for_each(qe, &rx_mod->rx_free_q) + i++; + + i = 0; + list_for_each(qe, &rx_mod->rxp_free_q) + i++; + + i = 0; + list_for_each(qe, &rx_mod->rxq_free_q) + i++; + + rx_mod->bna = NULL; +} + +int +bna_rx_state_get(struct bna_rx *rx) +{ + return bfa_sm_to_state(rx_sm_table, rx->fsm); +} + +void +bna_rx_res_req(struct bna_rx_config *q_cfg, struct bna_res_info *res_info) +{ + u32 cq_size, hq_size, dq_size; + u32 cpage_count, hpage_count, dpage_count; + struct bna_mem_info *mem_info; + u32 cq_depth; + u32 hq_depth; + u32 dq_depth; + + dq_depth = q_cfg->q_depth; + hq_depth = ((q_cfg->rxp_type == BNA_RXP_SINGLE) ? 0 : q_cfg->q_depth); + cq_depth = dq_depth + hq_depth; + + BNA_TO_POWER_OF_2_HIGH(cq_depth); + cq_size = cq_depth * BFI_CQ_WI_SIZE; + cq_size = ALIGN(cq_size, PAGE_SIZE); + cpage_count = SIZE_TO_PAGES(cq_size); + + BNA_TO_POWER_OF_2_HIGH(dq_depth); + dq_size = dq_depth * BFI_RXQ_WI_SIZE; + dq_size = ALIGN(dq_size, PAGE_SIZE); + dpage_count = SIZE_TO_PAGES(dq_size); + + if (BNA_RXP_SINGLE != q_cfg->rxp_type) { + BNA_TO_POWER_OF_2_HIGH(hq_depth); + hq_size = hq_depth * BFI_RXQ_WI_SIZE; + hq_size = ALIGN(hq_size, PAGE_SIZE); + hpage_count = SIZE_TO_PAGES(hq_size); + } else { + hpage_count = 0; + } + + /* CCB structures */ + res_info[BNA_RX_RES_MEM_T_CCB].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_RX_RES_MEM_T_CCB].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_KVA; + mem_info->len = sizeof(struct bna_ccb); + mem_info->num = q_cfg->num_paths; + + /* RCB structures */ + res_info[BNA_RX_RES_MEM_T_RCB].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_RX_RES_MEM_T_RCB].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_KVA; + mem_info->len = sizeof(struct bna_rcb); + mem_info->num = BNA_GET_RXQS(q_cfg); + + /* Completion QPT */ + res_info[BNA_RX_RES_MEM_T_CQPT].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_RX_RES_MEM_T_CQPT].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_DMA; + mem_info->len = cpage_count * sizeof(struct bna_dma_addr); + mem_info->num = q_cfg->num_paths; + + /* Completion s/w QPT */ + res_info[BNA_RX_RES_MEM_T_CSWQPT].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_RX_RES_MEM_T_CSWQPT].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_KVA; + mem_info->len = cpage_count * sizeof(void *); + mem_info->num = q_cfg->num_paths; + + /* Completion QPT pages */ + res_info[BNA_RX_RES_MEM_T_CQPT_PAGE].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_RX_RES_MEM_T_CQPT_PAGE].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_DMA; + mem_info->len = PAGE_SIZE; + mem_info->num = cpage_count * q_cfg->num_paths; + + /* Data QPTs */ + res_info[BNA_RX_RES_MEM_T_DQPT].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_RX_RES_MEM_T_DQPT].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_DMA; + mem_info->len = dpage_count * sizeof(struct bna_dma_addr); + mem_info->num = q_cfg->num_paths; + + /* Data s/w QPTs */ + res_info[BNA_RX_RES_MEM_T_DSWQPT].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_RX_RES_MEM_T_DSWQPT].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_KVA; + mem_info->len = dpage_count * sizeof(void *); + mem_info->num = q_cfg->num_paths; + + /* Data QPT pages */ + res_info[BNA_RX_RES_MEM_T_DPAGE].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_RX_RES_MEM_T_DPAGE].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_DMA; + mem_info->len = PAGE_SIZE; + mem_info->num = dpage_count * q_cfg->num_paths; + + /* Hdr QPTs */ + res_info[BNA_RX_RES_MEM_T_HQPT].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_RX_RES_MEM_T_HQPT].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_DMA; + mem_info->len = hpage_count * sizeof(struct bna_dma_addr); + mem_info->num = (hpage_count ? q_cfg->num_paths : 0); + + /* Hdr s/w QPTs */ + res_info[BNA_RX_RES_MEM_T_HSWQPT].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_RX_RES_MEM_T_HSWQPT].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_KVA; + mem_info->len = hpage_count * sizeof(void *); + mem_info->num = (hpage_count ? q_cfg->num_paths : 0); + + /* Hdr QPT pages */ + res_info[BNA_RX_RES_MEM_T_HPAGE].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_RX_RES_MEM_T_HPAGE].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_DMA; + mem_info->len = (hpage_count ? PAGE_SIZE : 0); + mem_info->num = (hpage_count ? (hpage_count * q_cfg->num_paths) : 0); + + /* RX Interrupts */ + res_info[BNA_RX_RES_T_INTR].res_type = BNA_RES_T_INTR; + res_info[BNA_RX_RES_T_INTR].res_u.intr_info.intr_type = BNA_INTR_T_MSIX; + res_info[BNA_RX_RES_T_INTR].res_u.intr_info.num = q_cfg->num_paths; +} + +struct bna_rx * +bna_rx_create(struct bna *bna, struct bnad *bnad, + struct bna_rx_config *rx_cfg, + struct bna_rx_event_cbfn *rx_cbfn, + struct bna_res_info *res_info, + void *priv) +{ + struct bna_rx_mod *rx_mod = &bna->rx_mod; + struct bna_rx *rx; + struct bna_rxp *rxp; + struct bna_rxq *q0; + struct bna_rxq *q1; + struct bna_intr_info *intr_info; + u32 page_count; + struct bna_mem_descr *ccb_mem; + struct bna_mem_descr *rcb_mem; + struct bna_mem_descr *unmapq_mem; + struct bna_mem_descr *cqpt_mem; + struct bna_mem_descr *cswqpt_mem; + struct bna_mem_descr *cpage_mem; + struct bna_mem_descr *hqpt_mem; /* Header/Small Q qpt */ + struct bna_mem_descr *dqpt_mem; /* Data/Large Q qpt */ + struct bna_mem_descr *hsqpt_mem; /* s/w qpt for hdr */ + struct bna_mem_descr *dsqpt_mem; /* s/w qpt for data */ + struct bna_mem_descr *hpage_mem; /* hdr page mem */ + struct bna_mem_descr *dpage_mem; /* data page mem */ + int i, cpage_idx = 0, dpage_idx = 0, hpage_idx = 0, ret; + int dpage_count, hpage_count, rcb_idx; + struct bna_ib_config ibcfg; + /* Fail if we don't have enough RXPs, RXQs */ + if (!_rx_can_satisfy(rx_mod, rx_cfg)) + return NULL; + + /* Initialize resource pointers */ + intr_info = &res_info[BNA_RX_RES_T_INTR].res_u.intr_info; + ccb_mem = &res_info[BNA_RX_RES_MEM_T_CCB].res_u.mem_info.mdl[0]; + rcb_mem = &res_info[BNA_RX_RES_MEM_T_RCB].res_u.mem_info.mdl[0]; + unmapq_mem = &res_info[BNA_RX_RES_MEM_T_UNMAPQ].res_u.mem_info.mdl[0]; + cqpt_mem = &res_info[BNA_RX_RES_MEM_T_CQPT].res_u.mem_info.mdl[0]; + cswqpt_mem = &res_info[BNA_RX_RES_MEM_T_CSWQPT].res_u.mem_info.mdl[0]; + cpage_mem = &res_info[BNA_RX_RES_MEM_T_CQPT_PAGE].res_u.mem_info.mdl[0]; + hqpt_mem = &res_info[BNA_RX_RES_MEM_T_HQPT].res_u.mem_info.mdl[0]; + dqpt_mem = &res_info[BNA_RX_RES_MEM_T_DQPT].res_u.mem_info.mdl[0]; + hsqpt_mem = &res_info[BNA_RX_RES_MEM_T_HSWQPT].res_u.mem_info.mdl[0]; + dsqpt_mem = &res_info[BNA_RX_RES_MEM_T_DSWQPT].res_u.mem_info.mdl[0]; + hpage_mem = &res_info[BNA_RX_RES_MEM_T_HPAGE].res_u.mem_info.mdl[0]; + dpage_mem = &res_info[BNA_RX_RES_MEM_T_DPAGE].res_u.mem_info.mdl[0]; + + /* Compute q depth & page count */ + page_count = res_info[BNA_RX_RES_MEM_T_CQPT_PAGE].res_u.mem_info.num / + rx_cfg->num_paths; + + dpage_count = res_info[BNA_RX_RES_MEM_T_DPAGE].res_u.mem_info.num / + rx_cfg->num_paths; + + hpage_count = res_info[BNA_RX_RES_MEM_T_HPAGE].res_u.mem_info.num / + rx_cfg->num_paths; + /* Get RX pointer */ + rx = _get_free_rx(rx_mod); + _rx_init(rx, bna); + rx->priv = priv; + rx->type = rx_cfg->rx_type; + + rx->rcb_setup_cbfn = rx_cbfn->rcb_setup_cbfn; + rx->rcb_destroy_cbfn = rx_cbfn->rcb_destroy_cbfn; + rx->ccb_setup_cbfn = rx_cbfn->ccb_setup_cbfn; + rx->ccb_destroy_cbfn = rx_cbfn->ccb_destroy_cbfn; + /* Following callbacks are mandatory */ + rx->rx_cleanup_cbfn = rx_cbfn->rx_cleanup_cbfn; + rx->rx_post_cbfn = rx_cbfn->rx_post_cbfn; + + if (rx->bna->rx_mod.flags & BNA_RX_MOD_F_PORT_STARTED) { + switch (rx->type) { + case BNA_RX_T_REGULAR: + if (!(rx->bna->rx_mod.flags & + BNA_RX_MOD_F_PORT_LOOPBACK)) + rx->rx_flags |= BNA_RX_F_PORT_ENABLED; + break; + case BNA_RX_T_LOOPBACK: + if (rx->bna->rx_mod.flags & BNA_RX_MOD_F_PORT_LOOPBACK) + rx->rx_flags |= BNA_RX_F_PORT_ENABLED; + break; + } + } + + for (i = 0, rcb_idx = 0; i < rx_cfg->num_paths; i++) { + rxp = _get_free_rxp(rx_mod); + rxp->type = rx_cfg->rxp_type; + rxp->rx = rx; + rxp->cq.rx = rx; + + /* Get required RXQs, and queue them to rx-path */ + q0 = _get_free_rxq(rx_mod); + if (BNA_RXP_SINGLE == rx_cfg->rxp_type) + q1 = NULL; + else + q1 = _get_free_rxq(rx_mod); + + /* Initialize IB */ + if (1 == intr_info->num) { + rxp->cq.ib = bna_ib_get(&bna->ib_mod, + intr_info->intr_type, + intr_info->idl[0].vector); + rxp->vector = intr_info->idl[0].vector; + } else { + rxp->cq.ib = bna_ib_get(&bna->ib_mod, + intr_info->intr_type, + intr_info->idl[i].vector); + + /* Map the MSI-x vector used for this RXP */ + rxp->vector = intr_info->idl[i].vector; + } + + rxp->cq.ib_seg_offset = bna_ib_reserve_idx(rxp->cq.ib); + + ibcfg.coalescing_timeo = BFI_RX_COALESCING_TIMEO; + ibcfg.interpkt_count = BFI_RX_INTERPKT_COUNT; + ibcfg.interpkt_timeo = BFI_RX_INTERPKT_TIMEO; + ibcfg.ctrl_flags = BFI_IB_CF_INT_ENABLE; + + ret = bna_ib_config(rxp->cq.ib, &ibcfg); + + /* Link rxqs to rxp */ + _rxp_add_rxqs(rxp, q0, q1); + + /* Link rxp to rx */ + _rx_add_rxp(rx, rxp); + + q0->rx = rx; + q0->rxp = rxp; + + /* Initialize RCB for the large / data q */ + q0->rcb = (struct bna_rcb *) rcb_mem[rcb_idx].kva; + RXQ_RCB_INIT(q0, rxp, rx_cfg->q_depth, bna, 0, + (void *)unmapq_mem[rcb_idx].kva); + rcb_idx++; + (q0)->rx_packets = (q0)->rx_bytes = 0; + (q0)->rx_packets_with_error = (q0)->rxbuf_alloc_failed = 0; + + /* Initialize RXQs */ + _rxq_qpt_init(q0, rxp, dpage_count, PAGE_SIZE, + &dqpt_mem[i], &dsqpt_mem[i], &dpage_mem[dpage_idx]); + q0->rcb->page_idx = dpage_idx; + q0->rcb->page_count = dpage_count; + dpage_idx += dpage_count; + + /* Call bnad to complete rcb setup */ + if (rx->rcb_setup_cbfn) + rx->rcb_setup_cbfn(bnad, q0->rcb); + + if (q1) { + q1->rx = rx; + q1->rxp = rxp; + + q1->rcb = (struct bna_rcb *) rcb_mem[rcb_idx].kva; + RXQ_RCB_INIT(q1, rxp, rx_cfg->q_depth, bna, 1, + (void *)unmapq_mem[rcb_idx].kva); + rcb_idx++; + (q1)->buffer_size = (rx_cfg)->small_buff_size; + (q1)->rx_packets = (q1)->rx_bytes = 0; + (q1)->rx_packets_with_error = + (q1)->rxbuf_alloc_failed = 0; + + _rxq_qpt_init(q1, rxp, hpage_count, PAGE_SIZE, + &hqpt_mem[i], &hsqpt_mem[i], + &hpage_mem[hpage_idx]); + q1->rcb->page_idx = hpage_idx; + q1->rcb->page_count = hpage_count; + hpage_idx += hpage_count; + + /* Call bnad to complete rcb setup */ + if (rx->rcb_setup_cbfn) + rx->rcb_setup_cbfn(bnad, q1->rcb); + } + /* Setup RXP::CQ */ + rxp->cq.ccb = (struct bna_ccb *) ccb_mem[i].kva; + _rxp_cqpt_setup(rxp, page_count, PAGE_SIZE, + &cqpt_mem[i], &cswqpt_mem[i], &cpage_mem[cpage_idx]); + rxp->cq.ccb->page_idx = cpage_idx; + rxp->cq.ccb->page_count = page_count; + cpage_idx += page_count; + + rxp->cq.ccb->pkt_rate.small_pkt_cnt = 0; + rxp->cq.ccb->pkt_rate.large_pkt_cnt = 0; + + rxp->cq.ccb->producer_index = 0; + rxp->cq.ccb->q_depth = rx_cfg->q_depth + + ((rx_cfg->rxp_type == BNA_RXP_SINGLE) ? + 0 : rx_cfg->q_depth); + rxp->cq.ccb->i_dbell = &rxp->cq.ib->door_bell; + rxp->cq.ccb->rcb[0] = q0->rcb; + if (q1) + rxp->cq.ccb->rcb[1] = q1->rcb; + rxp->cq.ccb->cq = &rxp->cq; + rxp->cq.ccb->bnad = bna->bnad; + rxp->cq.ccb->hw_producer_index = + ((volatile u32 *)rxp->cq.ib->ib_seg_host_addr_kva + + (rxp->cq.ib_seg_offset * BFI_IBIDX_SIZE)); + *(rxp->cq.ccb->hw_producer_index) = 0; + rxp->cq.ccb->intr_type = intr_info->intr_type; + rxp->cq.ccb->intr_vector = (intr_info->num == 1) ? + intr_info->idl[0].vector : + intr_info->idl[i].vector; + rxp->cq.ccb->rx_coalescing_timeo = + rxp->cq.ib->ib_config.coalescing_timeo; + rxp->cq.ccb->id = i; + + /* Call bnad to complete CCB setup */ + if (rx->ccb_setup_cbfn) + rx->ccb_setup_cbfn(bnad, rxp->cq.ccb); + + } /* for each rx-path */ + + bna_rxf_init(&rx->rxf, rx, rx_cfg); + + bfa_fsm_set_state(rx, bna_rx_sm_stopped); + + return rx; +} + +void +bna_rx_destroy(struct bna_rx *rx) +{ + struct bna_rx_mod *rx_mod = &rx->bna->rx_mod; + struct bna_ib_mod *ib_mod = &rx->bna->ib_mod; + struct bna_rxq *q0 = NULL; + struct bna_rxq *q1 = NULL; + struct bna_rxp *rxp; + struct list_head *qe; + + bna_rxf_uninit(&rx->rxf); + + while (!list_empty(&rx->rxp_q)) { + bfa_q_deq(&rx->rxp_q, &rxp); + GET_RXQS(rxp, q0, q1); + /* Callback to bnad for destroying RCB */ + if (rx->rcb_destroy_cbfn) + rx->rcb_destroy_cbfn(rx->bna->bnad, q0->rcb); + q0->rcb = NULL; + q0->rxp = NULL; + q0->rx = NULL; + _put_free_rxq(rx_mod, q0); + if (q1) { + /* Callback to bnad for destroying RCB */ + if (rx->rcb_destroy_cbfn) + rx->rcb_destroy_cbfn(rx->bna->bnad, q1->rcb); + q1->rcb = NULL; + q1->rxp = NULL; + q1->rx = NULL; + _put_free_rxq(rx_mod, q1); + } + rxp->rxq.slr.large = NULL; + rxp->rxq.slr.small = NULL; + if (rxp->cq.ib) { + if (rxp->cq.ib_seg_offset != 0xff) + bna_ib_release_idx(rxp->cq.ib, + rxp->cq.ib_seg_offset); + bna_ib_put(ib_mod, rxp->cq.ib); + rxp->cq.ib = NULL; + } + /* Callback to bnad for destroying CCB */ + if (rx->ccb_destroy_cbfn) + rx->ccb_destroy_cbfn(rx->bna->bnad, rxp->cq.ccb); + rxp->cq.ccb = NULL; + rxp->rx = NULL; + _put_free_rxp(rx_mod, rxp); + } + + list_for_each(qe, &rx_mod->rx_active_q) { + if (qe == &rx->qe) { + list_del(&rx->qe); + bfa_q_qe_init(&rx->qe); + break; + } + } + + rx->bna = NULL; + rx->priv = NULL; + _put_free_rx(rx_mod, rx); +} + +void +bna_rx_enable(struct bna_rx *rx) +{ + if (rx->fsm != (bfa_sm_t)bna_rx_sm_stopped) + return; + + rx->rx_flags |= BNA_RX_F_ENABLE; + if (rx->rx_flags & BNA_RX_F_PORT_ENABLED) + bfa_fsm_send_event(rx, RX_E_START); +} + +void +bna_rx_disable(struct bna_rx *rx, enum bna_cleanup_type type, + void (*cbfn)(void *, struct bna_rx *, + enum bna_cb_status)) +{ + if (type == BNA_SOFT_CLEANUP) { + /* h/w should not be accessed. Treat we're stopped */ + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); + } else { + rx->stop_cbfn = cbfn; + rx->stop_cbarg = rx->bna->bnad; + + rx->rx_flags &= ~BNA_RX_F_ENABLE; + + bfa_fsm_send_event(rx, RX_E_STOP); + } +} + +/** + * TX + */ +#define call_tx_stop_cbfn(tx, status)\ +do {\ + if ((tx)->stop_cbfn)\ + (tx)->stop_cbfn((tx)->stop_cbarg, (tx), status);\ + (tx)->stop_cbfn = NULL;\ + (tx)->stop_cbarg = NULL;\ +} while (0) + +#define call_tx_prio_change_cbfn(tx, status)\ +do {\ + if ((tx)->prio_change_cbfn)\ + (tx)->prio_change_cbfn((tx)->bna->bnad, (tx), status);\ + (tx)->prio_change_cbfn = NULL;\ +} while (0) + +static void bna_tx_mod_cb_tx_stopped(void *tx_mod, struct bna_tx *tx, + enum bna_cb_status status); +static void bna_tx_cb_txq_stopped(void *arg, int status); +static void bna_tx_cb_stats_cleared(void *arg, int status); +static void __bna_tx_stop(struct bna_tx *tx); +static void __bna_tx_start(struct bna_tx *tx); +static void __bna_txf_stat_clr(struct bna_tx *tx); + +enum bna_tx_event { + TX_E_START = 1, + TX_E_STOP = 2, + TX_E_FAIL = 3, + TX_E_TXQ_STOPPED = 4, + TX_E_PRIO_CHANGE = 5, + TX_E_STAT_CLEARED = 6, +}; + +enum bna_tx_state { + BNA_TX_STOPPED = 1, + BNA_TX_STARTED = 2, + BNA_TX_TXQ_STOP_WAIT = 3, + BNA_TX_PRIO_STOP_WAIT = 4, + BNA_TX_STAT_CLR_WAIT = 5, +}; + +bfa_fsm_state_decl(bna_tx, stopped, struct bna_tx, + enum bna_tx_event); +bfa_fsm_state_decl(bna_tx, started, struct bna_tx, + enum bna_tx_event); +bfa_fsm_state_decl(bna_tx, txq_stop_wait, struct bna_tx, + enum bna_tx_event); +bfa_fsm_state_decl(bna_tx, prio_stop_wait, struct bna_tx, + enum bna_tx_event); +bfa_fsm_state_decl(bna_tx, stat_clr_wait, struct bna_tx, + enum bna_tx_event); + +static struct bfa_sm_table tx_sm_table[] = { + {BFA_SM(bna_tx_sm_stopped), BNA_TX_STOPPED}, + {BFA_SM(bna_tx_sm_started), BNA_TX_STARTED}, + {BFA_SM(bna_tx_sm_txq_stop_wait), BNA_TX_TXQ_STOP_WAIT}, + {BFA_SM(bna_tx_sm_prio_stop_wait), BNA_TX_PRIO_STOP_WAIT}, + {BFA_SM(bna_tx_sm_stat_clr_wait), BNA_TX_STAT_CLR_WAIT}, +}; + +static void +bna_tx_sm_stopped_entry(struct bna_tx *tx) +{ + struct bna_txq *txq; + struct list_head *qe; + + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + (tx->tx_cleanup_cbfn)(tx->bna->bnad, txq->tcb); + } + + call_tx_stop_cbfn(tx, BNA_CB_SUCCESS); +} + +static void +bna_tx_sm_stopped(struct bna_tx *tx, enum bna_tx_event event) +{ + switch (event) { + case TX_E_START: + bfa_fsm_set_state(tx, bna_tx_sm_started); + break; + + case TX_E_STOP: + bfa_fsm_set_state(tx, bna_tx_sm_stopped); + break; + + case TX_E_FAIL: + /* No-op */ + break; + + case TX_E_PRIO_CHANGE: + call_tx_prio_change_cbfn(tx, BNA_CB_SUCCESS); + break; + + case TX_E_TXQ_STOPPED: + /** + * This event is received due to flushing of mbox when + * device fails + */ + /* No-op */ + break; + + default: + bfa_sm_fault(tx->bna, event); + } +} + +static void +bna_tx_sm_started_entry(struct bna_tx *tx) +{ + struct bna_txq *txq; + struct list_head *qe; + + __bna_tx_start(tx); + + /* Start IB */ + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + bna_ib_ack(&txq->ib->door_bell, 0); + } +} + +static void +bna_tx_sm_started(struct bna_tx *tx, enum bna_tx_event event) +{ + struct bna_txq *txq; + struct list_head *qe; + + switch (event) { + case TX_E_STOP: + bfa_fsm_set_state(tx, bna_tx_sm_txq_stop_wait); + __bna_tx_stop(tx); + break; + + case TX_E_FAIL: + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + bna_ib_fail(txq->ib); + (tx->tx_stall_cbfn)(tx->bna->bnad, txq->tcb); + } + bfa_fsm_set_state(tx, bna_tx_sm_stopped); + break; + + case TX_E_PRIO_CHANGE: + bfa_fsm_set_state(tx, bna_tx_sm_prio_stop_wait); + break; + + default: + bfa_sm_fault(tx->bna, event); + } +} + +static void +bna_tx_sm_txq_stop_wait_entry(struct bna_tx *tx) +{ +} + +static void +bna_tx_sm_txq_stop_wait(struct bna_tx *tx, enum bna_tx_event event) +{ + struct bna_txq *txq; + struct list_head *qe; + + switch (event) { + case TX_E_FAIL: + bfa_fsm_set_state(tx, bna_tx_sm_stopped); + break; + + case TX_E_TXQ_STOPPED: + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + bna_ib_stop(txq->ib); + } + bfa_fsm_set_state(tx, bna_tx_sm_stat_clr_wait); + break; + + case TX_E_PRIO_CHANGE: + /* No-op */ + break; + + default: + bfa_sm_fault(tx->bna, event); + } +} + +static void +bna_tx_sm_prio_stop_wait_entry(struct bna_tx *tx) +{ + __bna_tx_stop(tx); +} + +static void +bna_tx_sm_prio_stop_wait(struct bna_tx *tx, enum bna_tx_event event) +{ + struct bna_txq *txq; + struct list_head *qe; + + switch (event) { + case TX_E_STOP: + bfa_fsm_set_state(tx, bna_tx_sm_txq_stop_wait); + break; + + case TX_E_FAIL: + call_tx_prio_change_cbfn(tx, BNA_CB_FAIL); + bfa_fsm_set_state(tx, bna_tx_sm_stopped); + break; + + case TX_E_TXQ_STOPPED: + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + bna_ib_stop(txq->ib); + (tx->tx_cleanup_cbfn)(tx->bna->bnad, txq->tcb); + } + call_tx_prio_change_cbfn(tx, BNA_CB_SUCCESS); + bfa_fsm_set_state(tx, bna_tx_sm_started); + break; + + case TX_E_PRIO_CHANGE: + /* No-op */ + break; + + default: + bfa_sm_fault(tx->bna, event); + } +} + +static void +bna_tx_sm_stat_clr_wait_entry(struct bna_tx *tx) +{ + __bna_txf_stat_clr(tx); +} + +static void +bna_tx_sm_stat_clr_wait(struct bna_tx *tx, enum bna_tx_event event) +{ + switch (event) { + case TX_E_FAIL: + case TX_E_STAT_CLEARED: + bfa_fsm_set_state(tx, bna_tx_sm_stopped); + break; + + default: + bfa_sm_fault(tx->bna, event); + } +} + +static void +__bna_txq_start(struct bna_tx *tx, struct bna_txq *txq) +{ + struct bna_rxtx_q_mem *q_mem; + struct bna_txq_mem txq_cfg; + struct bna_txq_mem *txq_mem; + struct bna_dma_addr cur_q_addr; + u32 pg_num; + void __iomem *base_addr; + unsigned long off; + + /* Fill out structure, to be subsequently written to hardware */ + txq_cfg.pg_tbl_addr_lo = txq->qpt.hw_qpt_ptr.lsb; + txq_cfg.pg_tbl_addr_hi = txq->qpt.hw_qpt_ptr.msb; + cur_q_addr = *((struct bna_dma_addr *)(txq->qpt.kv_qpt_ptr)); + txq_cfg.cur_q_entry_lo = cur_q_addr.lsb; + txq_cfg.cur_q_entry_hi = cur_q_addr.msb; + + txq_cfg.pg_cnt_n_prd_ptr = (txq->qpt.page_count << 16) | 0x0; + + txq_cfg.entry_n_pg_size = ((u32)(BFI_TXQ_WI_SIZE >> 2) << 16) | + (txq->qpt.page_size >> 2); + txq_cfg.int_blk_n_cns_ptr = ((((u32)txq->ib_seg_offset) << 24) | + ((u32)(txq->ib->ib_id & 0xff) << 16) | 0x0); + + txq_cfg.cns_ptr2_n_q_state = BNA_Q_IDLE_STATE; + txq_cfg.nxt_qid_n_fid_n_pri = (((tx->txf.txf_id & 0x3f) << 3) | + (txq->priority & 0x3)); + txq_cfg.wvc_n_cquota_n_rquota = + ((((u32)BFI_TX_MAX_WRR_QUOTA & 0xfff) << 12) | + (BFI_TX_MAX_WRR_QUOTA & 0xfff)); + + /* Setup the page and write to H/W */ + + pg_num = BNA_GET_PAGE_NUM(HQM0_BLK_PG_NUM + tx->bna->port_num, + HQM_RXTX_Q_RAM_BASE_OFFSET); + writel(pg_num, tx->bna->regs.page_addr); + + base_addr = BNA_GET_MEM_BASE_ADDR(tx->bna->pcidev.pci_bar_kva, + HQM_RXTX_Q_RAM_BASE_OFFSET); + q_mem = (struct bna_rxtx_q_mem *)0; + txq_mem = &q_mem[txq->txq_id].txq; + + /* + * The following 4 lines, is a hack b'cos the H/W needs to read + * these DMA addresses as little endian + */ + + off = (unsigned long)&txq_mem->pg_tbl_addr_lo; + writel(htonl(txq_cfg.pg_tbl_addr_lo), base_addr + off); + + off = (unsigned long)&txq_mem->pg_tbl_addr_hi; + writel(htonl(txq_cfg.pg_tbl_addr_hi), base_addr + off); + + off = (unsigned long)&txq_mem->cur_q_entry_lo; + writel(htonl(txq_cfg.cur_q_entry_lo), base_addr + off); + + off = (unsigned long)&txq_mem->cur_q_entry_hi; + writel(htonl(txq_cfg.cur_q_entry_hi), base_addr + off); + + off = (unsigned long)&txq_mem->pg_cnt_n_prd_ptr; + writel(txq_cfg.pg_cnt_n_prd_ptr, base_addr + off); + + off = (unsigned long)&txq_mem->entry_n_pg_size; + writel(txq_cfg.entry_n_pg_size, base_addr + off); + + off = (unsigned long)&txq_mem->int_blk_n_cns_ptr; + writel(txq_cfg.int_blk_n_cns_ptr, base_addr + off); + + off = (unsigned long)&txq_mem->cns_ptr2_n_q_state; + writel(txq_cfg.cns_ptr2_n_q_state, base_addr + off); + + off = (unsigned long)&txq_mem->nxt_qid_n_fid_n_pri; + writel(txq_cfg.nxt_qid_n_fid_n_pri, base_addr + off); + + off = (unsigned long)&txq_mem->wvc_n_cquota_n_rquota; + writel(txq_cfg.wvc_n_cquota_n_rquota, base_addr + off); + + txq->tcb->producer_index = 0; + txq->tcb->consumer_index = 0; + *(txq->tcb->hw_consumer_index) = 0; + +} + +static void +__bna_txq_stop(struct bna_tx *tx, struct bna_txq *txq) +{ + struct bfi_ll_q_stop_req ll_req; + u32 bit_mask[2] = {0, 0}; + if (txq->txq_id < 32) + bit_mask[0] = (u32)1 << txq->txq_id; + else + bit_mask[1] = (u32)1 << (txq->txq_id - 32); + + memset(&ll_req, 0, sizeof(ll_req)); + ll_req.mh.msg_class = BFI_MC_LL; + ll_req.mh.msg_id = BFI_LL_H2I_TXQ_STOP_REQ; + ll_req.mh.mtag.h2i.lpu_id = 0; + ll_req.q_id_mask[0] = htonl(bit_mask[0]); + ll_req.q_id_mask[1] = htonl(bit_mask[1]); + + bna_mbox_qe_fill(&tx->mbox_qe, &ll_req, sizeof(ll_req), + bna_tx_cb_txq_stopped, tx); + + bna_mbox_send(tx->bna, &tx->mbox_qe); +} + +static void +__bna_txf_start(struct bna_tx *tx) +{ + struct bna_tx_fndb_ram *tx_fndb; + struct bna_txf *txf = &tx->txf; + void __iomem *base_addr; + unsigned long off; + + writel(BNA_GET_PAGE_NUM(LUT0_MEM_BLK_BASE_PG_NUM + + (tx->bna->port_num * 2), TX_FNDB_RAM_BASE_OFFSET), + tx->bna->regs.page_addr); + + base_addr = BNA_GET_MEM_BASE_ADDR(tx->bna->pcidev.pci_bar_kva, + TX_FNDB_RAM_BASE_OFFSET); + + tx_fndb = (struct bna_tx_fndb_ram *)0; + off = (unsigned long)&tx_fndb[txf->txf_id].vlan_n_ctrl_flags; + + writel(((u32)txf->vlan << 16) | txf->ctrl_flags, + base_addr + off); + + if (tx->txf.txf_id < 32) + tx->bna->tx_mod.txf_bmap[0] |= ((u32)1 << tx->txf.txf_id); + else + tx->bna->tx_mod.txf_bmap[1] |= ((u32) + 1 << (tx->txf.txf_id - 32)); +} + +static void +__bna_txf_stop(struct bna_tx *tx) +{ + struct bna_tx_fndb_ram *tx_fndb; + u32 page_num; + u32 ctl_flags; + struct bna_txf *txf = &tx->txf; + void __iomem *base_addr; + unsigned long off; + + /* retrieve the running txf_flags & turn off enable bit */ + page_num = BNA_GET_PAGE_NUM(LUT0_MEM_BLK_BASE_PG_NUM + + (tx->bna->port_num * 2), TX_FNDB_RAM_BASE_OFFSET); + writel(page_num, tx->bna->regs.page_addr); + + base_addr = BNA_GET_MEM_BASE_ADDR(tx->bna->pcidev.pci_bar_kva, + TX_FNDB_RAM_BASE_OFFSET); + tx_fndb = (struct bna_tx_fndb_ram *)0; + off = (unsigned long)&tx_fndb[txf->txf_id].vlan_n_ctrl_flags; + + ctl_flags = readl(base_addr + off); + ctl_flags &= ~BFI_TXF_CF_ENABLE; + + writel(ctl_flags, base_addr + off); + + if (tx->txf.txf_id < 32) + tx->bna->tx_mod.txf_bmap[0] &= ~((u32)1 << tx->txf.txf_id); + else + tx->bna->tx_mod.txf_bmap[0] &= ~((u32) + 1 << (tx->txf.txf_id - 32)); +} + +static void +__bna_txf_stat_clr(struct bna_tx *tx) +{ + struct bfi_ll_stats_req ll_req; + u32 txf_bmap[2] = {0, 0}; + if (tx->txf.txf_id < 32) + txf_bmap[0] = ((u32)1 << tx->txf.txf_id); + else + txf_bmap[1] = ((u32)1 << (tx->txf.txf_id - 32)); + bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_STATS_CLEAR_REQ, 0); + ll_req.stats_mask = 0; + ll_req.rxf_id_mask[0] = 0; + ll_req.rxf_id_mask[1] = 0; + ll_req.txf_id_mask[0] = htonl(txf_bmap[0]); + ll_req.txf_id_mask[1] = htonl(txf_bmap[1]); + + bna_mbox_qe_fill(&tx->mbox_qe, &ll_req, sizeof(ll_req), + bna_tx_cb_stats_cleared, tx); + bna_mbox_send(tx->bna, &tx->mbox_qe); +} + +static void +__bna_tx_start(struct bna_tx *tx) +{ + struct bna_txq *txq; + struct list_head *qe; + + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + bna_ib_start(txq->ib); + __bna_txq_start(tx, txq); + } + + __bna_txf_start(tx); + + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + txq->tcb->priority = txq->priority; + (tx->tx_resume_cbfn)(tx->bna->bnad, txq->tcb); + } +} + +static void +__bna_tx_stop(struct bna_tx *tx) +{ + struct bna_txq *txq; + struct list_head *qe; + + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + (tx->tx_stall_cbfn)(tx->bna->bnad, txq->tcb); + } + + __bna_txf_stop(tx); + + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + bfa_wc_up(&tx->txq_stop_wc); + } + + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + __bna_txq_stop(tx, txq); + } +} + +static void +bna_txq_qpt_setup(struct bna_txq *txq, int page_count, int page_size, + struct bna_mem_descr *qpt_mem, + struct bna_mem_descr *swqpt_mem, + struct bna_mem_descr *page_mem) +{ + int i; + + txq->qpt.hw_qpt_ptr.lsb = qpt_mem->dma.lsb; + txq->qpt.hw_qpt_ptr.msb = qpt_mem->dma.msb; + txq->qpt.kv_qpt_ptr = qpt_mem->kva; + txq->qpt.page_count = page_count; + txq->qpt.page_size = page_size; + + txq->tcb->sw_qpt = (void **) swqpt_mem->kva; + + for (i = 0; i < page_count; i++) { + txq->tcb->sw_qpt[i] = page_mem[i].kva; + + ((struct bna_dma_addr *)txq->qpt.kv_qpt_ptr)[i].lsb = + page_mem[i].dma.lsb; + ((struct bna_dma_addr *)txq->qpt.kv_qpt_ptr)[i].msb = + page_mem[i].dma.msb; + + } +} + +static void +bna_tx_free(struct bna_tx *tx) +{ + struct bna_tx_mod *tx_mod = &tx->bna->tx_mod; + struct bna_txq *txq; + struct bna_ib_mod *ib_mod = &tx->bna->ib_mod; + struct list_head *qe; + + while (!list_empty(&tx->txq_q)) { + bfa_q_deq(&tx->txq_q, &txq); + bfa_q_qe_init(&txq->qe); + if (txq->ib) { + if (txq->ib_seg_offset != -1) + bna_ib_release_idx(txq->ib, + txq->ib_seg_offset); + bna_ib_put(ib_mod, txq->ib); + txq->ib = NULL; + } + txq->tcb = NULL; + txq->tx = NULL; + list_add_tail(&txq->qe, &tx_mod->txq_free_q); + } + + list_for_each(qe, &tx_mod->tx_active_q) { + if (qe == &tx->qe) { + list_del(&tx->qe); + bfa_q_qe_init(&tx->qe); + break; + } + } + + tx->bna = NULL; + tx->priv = NULL; + list_add_tail(&tx->qe, &tx_mod->tx_free_q); +} + +static void +bna_tx_cb_txq_stopped(void *arg, int status) +{ + struct bna_tx *tx = (struct bna_tx *)arg; + + bfa_q_qe_init(&tx->mbox_qe.qe); + bfa_wc_down(&tx->txq_stop_wc); +} + +static void +bna_tx_cb_txq_stopped_all(void *arg) +{ + struct bna_tx *tx = (struct bna_tx *)arg; + + bfa_fsm_send_event(tx, TX_E_TXQ_STOPPED); +} + +static void +bna_tx_cb_stats_cleared(void *arg, int status) +{ + struct bna_tx *tx = (struct bna_tx *)arg; + + bfa_q_qe_init(&tx->mbox_qe.qe); + + bfa_fsm_send_event(tx, TX_E_STAT_CLEARED); +} + +static void +bna_tx_start(struct bna_tx *tx) +{ + tx->flags |= BNA_TX_F_PORT_STARTED; + if (tx->flags & BNA_TX_F_ENABLED) + bfa_fsm_send_event(tx, TX_E_START); +} + +static void +bna_tx_stop(struct bna_tx *tx) +{ + tx->stop_cbfn = bna_tx_mod_cb_tx_stopped; + tx->stop_cbarg = &tx->bna->tx_mod; + + tx->flags &= ~BNA_TX_F_PORT_STARTED; + bfa_fsm_send_event(tx, TX_E_STOP); +} + +static void +bna_tx_fail(struct bna_tx *tx) +{ + tx->flags &= ~BNA_TX_F_PORT_STARTED; + bfa_fsm_send_event(tx, TX_E_FAIL); +} + +void +bna_tx_prio_changed(struct bna_tx *tx, int prio) +{ + struct bna_txq *txq; + struct list_head *qe; + + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + txq->priority = prio; + } + + bfa_fsm_send_event(tx, TX_E_PRIO_CHANGE); +} + +static void +bna_tx_cee_link_status(struct bna_tx *tx, int cee_link) +{ + if (cee_link) + tx->flags |= BNA_TX_F_PRIO_LOCK; + else + tx->flags &= ~BNA_TX_F_PRIO_LOCK; +} + +static void +bna_tx_mod_cb_tx_stopped(void *arg, struct bna_tx *tx, + enum bna_cb_status status) +{ + struct bna_tx_mod *tx_mod = (struct bna_tx_mod *)arg; + + bfa_wc_down(&tx_mod->tx_stop_wc); +} + +static void +bna_tx_mod_cb_tx_stopped_all(void *arg) +{ + struct bna_tx_mod *tx_mod = (struct bna_tx_mod *)arg; + + if (tx_mod->stop_cbfn) + tx_mod->stop_cbfn(&tx_mod->bna->port, BNA_CB_SUCCESS); + tx_mod->stop_cbfn = NULL; +} + +void +bna_tx_res_req(int num_txq, int txq_depth, struct bna_res_info *res_info) +{ + u32 q_size; + u32 page_count; + struct bna_mem_info *mem_info; + + res_info[BNA_TX_RES_MEM_T_TCB].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_TX_RES_MEM_T_TCB].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_KVA; + mem_info->len = sizeof(struct bna_tcb); + mem_info->num = num_txq; + + q_size = txq_depth * BFI_TXQ_WI_SIZE; + q_size = ALIGN(q_size, PAGE_SIZE); + page_count = q_size >> PAGE_SHIFT; + + res_info[BNA_TX_RES_MEM_T_QPT].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_TX_RES_MEM_T_QPT].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_DMA; + mem_info->len = page_count * sizeof(struct bna_dma_addr); + mem_info->num = num_txq; + + res_info[BNA_TX_RES_MEM_T_SWQPT].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_TX_RES_MEM_T_SWQPT].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_KVA; + mem_info->len = page_count * sizeof(void *); + mem_info->num = num_txq; + + res_info[BNA_TX_RES_MEM_T_PAGE].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_TX_RES_MEM_T_PAGE].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_DMA; + mem_info->len = PAGE_SIZE; + mem_info->num = num_txq * page_count; + + res_info[BNA_TX_RES_INTR_T_TXCMPL].res_type = BNA_RES_T_INTR; + res_info[BNA_TX_RES_INTR_T_TXCMPL].res_u.intr_info.intr_type = + BNA_INTR_T_MSIX; + res_info[BNA_TX_RES_INTR_T_TXCMPL].res_u.intr_info.num = num_txq; +} + +struct bna_tx * +bna_tx_create(struct bna *bna, struct bnad *bnad, + struct bna_tx_config *tx_cfg, + struct bna_tx_event_cbfn *tx_cbfn, + struct bna_res_info *res_info, void *priv) +{ + struct bna_intr_info *intr_info; + struct bna_tx_mod *tx_mod = &bna->tx_mod; + struct bna_tx *tx; + struct bna_txq *txq; + struct list_head *qe; + struct bna_ib_mod *ib_mod = &bna->ib_mod; + struct bna_doorbell_qset *qset; + struct bna_ib_config ib_config; + int page_count; + int page_size; + int page_idx; + int i; + unsigned long off; + + intr_info = &res_info[BNA_TX_RES_INTR_T_TXCMPL].res_u.intr_info; + page_count = (res_info[BNA_TX_RES_MEM_T_PAGE].res_u.mem_info.num) / + tx_cfg->num_txq; + page_size = res_info[BNA_TX_RES_MEM_T_PAGE].res_u.mem_info.len; + + /** + * Get resources + */ + + if ((intr_info->num != 1) && (intr_info->num != tx_cfg->num_txq)) + return NULL; + + /* Tx */ + + if (list_empty(&tx_mod->tx_free_q)) + return NULL; + bfa_q_deq(&tx_mod->tx_free_q, &tx); + bfa_q_qe_init(&tx->qe); + + /* TxQs */ + + INIT_LIST_HEAD(&tx->txq_q); + for (i = 0; i < tx_cfg->num_txq; i++) { + if (list_empty(&tx_mod->txq_free_q)) + goto err_return; + + bfa_q_deq(&tx_mod->txq_free_q, &txq); + bfa_q_qe_init(&txq->qe); + list_add_tail(&txq->qe, &tx->txq_q); + txq->ib = NULL; + txq->ib_seg_offset = -1; + txq->tx = tx; + } + + /* IBs */ + i = 0; + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + + if (intr_info->num == 1) + txq->ib = bna_ib_get(ib_mod, intr_info->intr_type, + intr_info->idl[0].vector); + else + txq->ib = bna_ib_get(ib_mod, intr_info->intr_type, + intr_info->idl[i].vector); + + if (txq->ib == NULL) + goto err_return; + + txq->ib_seg_offset = bna_ib_reserve_idx(txq->ib); + if (txq->ib_seg_offset == -1) + goto err_return; + + i++; + } + + /* + * Initialize + */ + + /* Tx */ + + tx->tcb_setup_cbfn = tx_cbfn->tcb_setup_cbfn; + tx->tcb_destroy_cbfn = tx_cbfn->tcb_destroy_cbfn; + /* Following callbacks are mandatory */ + tx->tx_stall_cbfn = tx_cbfn->tx_stall_cbfn; + tx->tx_resume_cbfn = tx_cbfn->tx_resume_cbfn; + tx->tx_cleanup_cbfn = tx_cbfn->tx_cleanup_cbfn; + + list_add_tail(&tx->qe, &tx_mod->tx_active_q); + tx->bna = bna; + tx->priv = priv; + tx->txq_stop_wc.wc_resume = bna_tx_cb_txq_stopped_all; + tx->txq_stop_wc.wc_cbarg = tx; + tx->txq_stop_wc.wc_count = 0; + + tx->type = tx_cfg->tx_type; + + tx->flags = 0; + if (tx->bna->tx_mod.flags & BNA_TX_MOD_F_PORT_STARTED) { + switch (tx->type) { + case BNA_TX_T_REGULAR: + if (!(tx->bna->tx_mod.flags & + BNA_TX_MOD_F_PORT_LOOPBACK)) + tx->flags |= BNA_TX_F_PORT_STARTED; + break; + case BNA_TX_T_LOOPBACK: + if (tx->bna->tx_mod.flags & BNA_TX_MOD_F_PORT_LOOPBACK) + tx->flags |= BNA_TX_F_PORT_STARTED; + break; + } + } + if (tx->bna->tx_mod.cee_link) + tx->flags |= BNA_TX_F_PRIO_LOCK; + + /* TxQ */ + + i = 0; + page_idx = 0; + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + txq->priority = tx_mod->priority; + txq->tcb = (struct bna_tcb *) + res_info[BNA_TX_RES_MEM_T_TCB].res_u.mem_info.mdl[i].kva; + txq->tx_packets = 0; + txq->tx_bytes = 0; + + /* IB */ + + ib_config.coalescing_timeo = BFI_TX_COALESCING_TIMEO; + ib_config.interpkt_timeo = 0; /* Not used */ + ib_config.interpkt_count = BFI_TX_INTERPKT_COUNT; + ib_config.ctrl_flags = (BFI_IB_CF_INTER_PKT_DMA | + BFI_IB_CF_INT_ENABLE | + BFI_IB_CF_COALESCING_MODE); + bna_ib_config(txq->ib, &ib_config); + + /* TCB */ + + txq->tcb->producer_index = 0; + txq->tcb->consumer_index = 0; + txq->tcb->hw_consumer_index = (volatile u32 *) + ((volatile u8 *)txq->ib->ib_seg_host_addr_kva + + (txq->ib_seg_offset * BFI_IBIDX_SIZE)); + *(txq->tcb->hw_consumer_index) = 0; + txq->tcb->q_depth = tx_cfg->txq_depth; + txq->tcb->unmap_q = (void *) + res_info[BNA_TX_RES_MEM_T_UNMAPQ].res_u.mem_info.mdl[i].kva; + qset = (struct bna_doorbell_qset *)0; + off = (unsigned long)&qset[txq->txq_id].txq[0]; + txq->tcb->q_dbell = off + + BNA_GET_DOORBELL_BASE_ADDR(bna->pcidev.pci_bar_kva); + txq->tcb->i_dbell = &txq->ib->door_bell; + txq->tcb->intr_type = intr_info->intr_type; + txq->tcb->intr_vector = (intr_info->num == 1) ? + intr_info->idl[0].vector : + intr_info->idl[i].vector; + txq->tcb->txq = txq; + txq->tcb->bnad = bnad; + txq->tcb->id = i; + + /* QPT, SWQPT, Pages */ + bna_txq_qpt_setup(txq, page_count, page_size, + &res_info[BNA_TX_RES_MEM_T_QPT].res_u.mem_info.mdl[i], + &res_info[BNA_TX_RES_MEM_T_SWQPT].res_u.mem_info.mdl[i], + &res_info[BNA_TX_RES_MEM_T_PAGE]. + res_u.mem_info.mdl[page_idx]); + txq->tcb->page_idx = page_idx; + txq->tcb->page_count = page_count; + page_idx += page_count; + + /* Callback to bnad for setting up TCB */ + if (tx->tcb_setup_cbfn) + (tx->tcb_setup_cbfn)(bna->bnad, txq->tcb); + + i++; + } + + /* TxF */ + + tx->txf.ctrl_flags = BFI_TXF_CF_ENABLE | BFI_TXF_CF_VLAN_WI_BASED; + tx->txf.vlan = 0; + + /* Mbox element */ + bfa_q_qe_init(&tx->mbox_qe.qe); + + bfa_fsm_set_state(tx, bna_tx_sm_stopped); + + return tx; + +err_return: + bna_tx_free(tx); + return NULL; +} + +void +bna_tx_destroy(struct bna_tx *tx) +{ + /* Callback to bnad for destroying TCB */ + if (tx->tcb_destroy_cbfn) { + struct bna_txq *txq; + struct list_head *qe; + + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + (tx->tcb_destroy_cbfn)(tx->bna->bnad, txq->tcb); + } + } + + bna_tx_free(tx); +} + +void +bna_tx_enable(struct bna_tx *tx) +{ + if (tx->fsm != (bfa_sm_t)bna_tx_sm_stopped) + return; + + tx->flags |= BNA_TX_F_ENABLED; + + if (tx->flags & BNA_TX_F_PORT_STARTED) + bfa_fsm_send_event(tx, TX_E_START); +} + +void +bna_tx_disable(struct bna_tx *tx, enum bna_cleanup_type type, + void (*cbfn)(void *, struct bna_tx *, enum bna_cb_status)) +{ + if (type == BNA_SOFT_CLEANUP) { + (*cbfn)(tx->bna->bnad, tx, BNA_CB_SUCCESS); + return; + } + + tx->stop_cbfn = cbfn; + tx->stop_cbarg = tx->bna->bnad; + + tx->flags &= ~BNA_TX_F_ENABLED; + + bfa_fsm_send_event(tx, TX_E_STOP); +} + +int +bna_tx_state_get(struct bna_tx *tx) +{ + return bfa_sm_to_state(tx_sm_table, tx->fsm); +} + +void +bna_tx_mod_init(struct bna_tx_mod *tx_mod, struct bna *bna, + struct bna_res_info *res_info) +{ + int i; + + tx_mod->bna = bna; + tx_mod->flags = 0; + + tx_mod->tx = (struct bna_tx *) + res_info[BNA_RES_MEM_T_TX_ARRAY].res_u.mem_info.mdl[0].kva; + tx_mod->txq = (struct bna_txq *) + res_info[BNA_RES_MEM_T_TXQ_ARRAY].res_u.mem_info.mdl[0].kva; + + INIT_LIST_HEAD(&tx_mod->tx_free_q); + INIT_LIST_HEAD(&tx_mod->tx_active_q); + + INIT_LIST_HEAD(&tx_mod->txq_free_q); + + for (i = 0; i < BFI_MAX_TXQ; i++) { + tx_mod->tx[i].txf.txf_id = i; + bfa_q_qe_init(&tx_mod->tx[i].qe); + list_add_tail(&tx_mod->tx[i].qe, &tx_mod->tx_free_q); + + tx_mod->txq[i].txq_id = i; + bfa_q_qe_init(&tx_mod->txq[i].qe); + list_add_tail(&tx_mod->txq[i].qe, &tx_mod->txq_free_q); + } + + tx_mod->tx_stop_wc.wc_resume = bna_tx_mod_cb_tx_stopped_all; + tx_mod->tx_stop_wc.wc_cbarg = tx_mod; + tx_mod->tx_stop_wc.wc_count = 0; +} + +void +bna_tx_mod_uninit(struct bna_tx_mod *tx_mod) +{ + struct list_head *qe; + int i; + + i = 0; + list_for_each(qe, &tx_mod->tx_free_q) + i++; + + i = 0; + list_for_each(qe, &tx_mod->txq_free_q) + i++; + + tx_mod->bna = NULL; +} + +void +bna_tx_mod_start(struct bna_tx_mod *tx_mod, enum bna_tx_type type) +{ + struct bna_tx *tx; + struct list_head *qe; + + tx_mod->flags |= BNA_TX_MOD_F_PORT_STARTED; + if (type == BNA_TX_T_LOOPBACK) + tx_mod->flags |= BNA_TX_MOD_F_PORT_LOOPBACK; + + list_for_each(qe, &tx_mod->tx_active_q) { + tx = (struct bna_tx *)qe; + if (tx->type == type) + bna_tx_start(tx); + } +} + +void +bna_tx_mod_stop(struct bna_tx_mod *tx_mod, enum bna_tx_type type) +{ + struct bna_tx *tx; + struct list_head *qe; + + tx_mod->flags &= ~BNA_TX_MOD_F_PORT_STARTED; + tx_mod->flags &= ~BNA_TX_MOD_F_PORT_LOOPBACK; + + tx_mod->stop_cbfn = bna_port_cb_tx_stopped; + + /** + * Before calling bna_tx_stop(), increment tx_stop_wc as many times + * as we are going to call bna_tx_stop + */ + list_for_each(qe, &tx_mod->tx_active_q) { + tx = (struct bna_tx *)qe; + if (tx->type == type) + bfa_wc_up(&tx_mod->tx_stop_wc); + } + + if (tx_mod->tx_stop_wc.wc_count == 0) { + tx_mod->stop_cbfn(&tx_mod->bna->port, BNA_CB_SUCCESS); + tx_mod->stop_cbfn = NULL; + return; + } + + list_for_each(qe, &tx_mod->tx_active_q) { + tx = (struct bna_tx *)qe; + if (tx->type == type) + bna_tx_stop(tx); + } +} + +void +bna_tx_mod_fail(struct bna_tx_mod *tx_mod) +{ + struct bna_tx *tx; + struct list_head *qe; + + tx_mod->flags &= ~BNA_TX_MOD_F_PORT_STARTED; + tx_mod->flags &= ~BNA_TX_MOD_F_PORT_LOOPBACK; + + list_for_each(qe, &tx_mod->tx_active_q) { + tx = (struct bna_tx *)qe; + bna_tx_fail(tx); + } +} + +void +bna_tx_mod_prio_changed(struct bna_tx_mod *tx_mod, int prio) +{ + struct bna_tx *tx; + struct list_head *qe; + + if (prio != tx_mod->priority) { + tx_mod->priority = prio; + + list_for_each(qe, &tx_mod->tx_active_q) { + tx = (struct bna_tx *)qe; + bna_tx_prio_changed(tx, prio); + } + } +} + +void +bna_tx_mod_cee_link_status(struct bna_tx_mod *tx_mod, int cee_link) +{ + struct bna_tx *tx; + struct list_head *qe; + + tx_mod->cee_link = cee_link; + + list_for_each(qe, &tx_mod->tx_active_q) { + tx = (struct bna_tx *)qe; + bna_tx_cee_link_status(tx, cee_link); + } +} diff --git a/drivers/net/bna/bna_types.h b/drivers/net/bna/bna_types.h new file mode 100644 index 000000000000..6877310f6ef4 --- /dev/null +++ b/drivers/net/bna/bna_types.h @@ -0,0 +1,1128 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ +#ifndef __BNA_TYPES_H__ +#define __BNA_TYPES_H__ + +#include "cna.h" +#include "bna_hw.h" +#include "bfa_cee.h" + +/** + * + * Forward declarations + * + */ + +struct bna_txq; +struct bna_tx; +struct bna_rxq; +struct bna_cq; +struct bna_rx; +struct bna_rxf; +struct bna_port; +struct bna; +struct bnad; + +/** + * + * Enums, primitive data types + * + */ + +enum bna_status { + BNA_STATUS_T_DISABLED = 0, + BNA_STATUS_T_ENABLED = 1 +}; + +enum bna_cleanup_type { + BNA_HARD_CLEANUP = 0, + BNA_SOFT_CLEANUP = 1 +}; + +enum bna_cb_status { + BNA_CB_SUCCESS = 0, + BNA_CB_FAIL = 1, + BNA_CB_INTERRUPT = 2, + BNA_CB_BUSY = 3, + BNA_CB_INVALID_MAC = 4, + BNA_CB_MCAST_LIST_FULL = 5, + BNA_CB_UCAST_CAM_FULL = 6, + BNA_CB_WAITING = 7, + BNA_CB_NOT_EXEC = 8 +}; + +enum bna_res_type { + BNA_RES_T_MEM = 1, + BNA_RES_T_INTR = 2 +}; + +enum bna_mem_type { + BNA_MEM_T_KVA = 1, + BNA_MEM_T_DMA = 2 +}; + +enum bna_intr_type { + BNA_INTR_T_INTX = 1, + BNA_INTR_T_MSIX = 2 +}; + +enum bna_res_req_type { + BNA_RES_MEM_T_COM = 0, + BNA_RES_MEM_T_ATTR = 1, + BNA_RES_MEM_T_FWTRC = 2, + BNA_RES_MEM_T_STATS = 3, + BNA_RES_MEM_T_SWSTATS = 4, + BNA_RES_MEM_T_IBIDX = 5, + BNA_RES_MEM_T_IB_ARRAY = 6, + BNA_RES_MEM_T_INTR_ARRAY = 7, + BNA_RES_MEM_T_IDXSEG_ARRAY = 8, + BNA_RES_MEM_T_TX_ARRAY = 9, + BNA_RES_MEM_T_TXQ_ARRAY = 10, + BNA_RES_MEM_T_RX_ARRAY = 11, + BNA_RES_MEM_T_RXP_ARRAY = 12, + BNA_RES_MEM_T_RXQ_ARRAY = 13, + BNA_RES_MEM_T_UCMAC_ARRAY = 14, + BNA_RES_MEM_T_MCMAC_ARRAY = 15, + BNA_RES_MEM_T_RIT_ENTRY = 16, + BNA_RES_MEM_T_RIT_SEGMENT = 17, + BNA_RES_INTR_T_MBOX = 18, + BNA_RES_T_MAX +}; + +enum bna_tx_res_req_type { + BNA_TX_RES_MEM_T_TCB = 0, + BNA_TX_RES_MEM_T_UNMAPQ = 1, + BNA_TX_RES_MEM_T_QPT = 2, + BNA_TX_RES_MEM_T_SWQPT = 3, + BNA_TX_RES_MEM_T_PAGE = 4, + BNA_TX_RES_INTR_T_TXCMPL = 5, + BNA_TX_RES_T_MAX, +}; + +enum bna_rx_mem_type { + BNA_RX_RES_MEM_T_CCB = 0, /* CQ context */ + BNA_RX_RES_MEM_T_RCB = 1, /* CQ context */ + BNA_RX_RES_MEM_T_UNMAPQ = 2, /* UnmapQ for RxQs */ + BNA_RX_RES_MEM_T_CQPT = 3, /* CQ QPT */ + BNA_RX_RES_MEM_T_CSWQPT = 4, /* S/W QPT */ + BNA_RX_RES_MEM_T_CQPT_PAGE = 5, /* CQPT page */ + BNA_RX_RES_MEM_T_HQPT = 6, /* RX QPT */ + BNA_RX_RES_MEM_T_DQPT = 7, /* RX QPT */ + BNA_RX_RES_MEM_T_HSWQPT = 8, /* RX s/w QPT */ + BNA_RX_RES_MEM_T_DSWQPT = 9, /* RX s/w QPT */ + BNA_RX_RES_MEM_T_DPAGE = 10, /* RX s/w QPT */ + BNA_RX_RES_MEM_T_HPAGE = 11, /* RX s/w QPT */ + BNA_RX_RES_T_INTR = 12, /* Rx interrupts */ + BNA_RX_RES_T_MAX = 13 +}; + +enum bna_mbox_state { + BNA_MBOX_FREE = 0, + BNA_MBOX_POSTED = 1 +}; + +enum bna_tx_type { + BNA_TX_T_REGULAR = 0, + BNA_TX_T_LOOPBACK = 1, +}; + +enum bna_tx_flags { + BNA_TX_F_PORT_STARTED = 1, + BNA_TX_F_ENABLED = 2, + BNA_TX_F_PRIO_LOCK = 4, +}; + +enum bna_tx_mod_flags { + BNA_TX_MOD_F_PORT_STARTED = 1, + BNA_TX_MOD_F_PORT_LOOPBACK = 2, +}; + +enum bna_rx_type { + BNA_RX_T_REGULAR = 0, + BNA_RX_T_LOOPBACK = 1, +}; + +enum bna_rxp_type { + BNA_RXP_SINGLE = 1, + BNA_RXP_SLR = 2, + BNA_RXP_HDS = 3 +}; + +enum bna_rxmode { + BNA_RXMODE_PROMISC = 1, + BNA_RXMODE_DEFAULT = 2, + BNA_RXMODE_ALLMULTI = 4 +}; + +enum bna_rx_event { + RX_E_START = 1, + RX_E_STOP = 2, + RX_E_FAIL = 3, + RX_E_RXF_STARTED = 4, + RX_E_RXF_STOPPED = 5, + RX_E_RXQ_STOPPED = 6, +}; + +enum bna_rx_state { + BNA_RX_STOPPED = 1, + BNA_RX_RXF_START_WAIT = 2, + BNA_RX_STARTED = 3, + BNA_RX_RXF_STOP_WAIT = 4, + BNA_RX_RXQ_STOP_WAIT = 5, +}; + +enum bna_rx_flags { + BNA_RX_F_ENABLE = 0x01, /* bnad enabled rxf */ + BNA_RX_F_PORT_ENABLED = 0x02, /* Port object is enabled */ + BNA_RX_F_PORT_FAILED = 0x04, /* Port in failed state */ +}; + +enum bna_rx_mod_flags { + BNA_RX_MOD_F_PORT_STARTED = 1, + BNA_RX_MOD_F_PORT_LOOPBACK = 2, +}; + +enum bna_rxf_oper_state { + BNA_RXF_OPER_STATE_RUNNING = 0x01, /* rxf operational */ + BNA_RXF_OPER_STATE_PAUSED = 0x02, /* rxf in PAUSED state */ +}; + +enum bna_rxf_flags { + BNA_RXF_FL_STOP_PENDING = 0x01, + BNA_RXF_FL_FAILED = 0x02, + BNA_RXF_FL_RSS_CONFIG_PENDING = 0x04, + BNA_RXF_FL_OPERSTATE_CHANGED = 0x08, + BNA_RXF_FL_RXF_ENABLED = 0x10, + BNA_RXF_FL_VLAN_CONFIG_PENDING = 0x20, +}; + +enum bna_rxf_event { + RXF_E_START = 1, + RXF_E_STOP = 2, + RXF_E_FAIL = 3, + RXF_E_CAM_FLTR_MOD = 4, + RXF_E_STARTED = 5, + RXF_E_STOPPED = 6, + RXF_E_CAM_FLTR_RESP = 7, + RXF_E_PAUSE = 8, + RXF_E_RESUME = 9, + RXF_E_STAT_CLEARED = 10, +}; + +enum bna_rxf_state { + BNA_RXF_STOPPED = 1, + BNA_RXF_START_WAIT = 2, + BNA_RXF_CAM_FLTR_MOD_WAIT = 3, + BNA_RXF_STARTED = 4, + BNA_RXF_CAM_FLTR_CLR_WAIT = 5, + BNA_RXF_STOP_WAIT = 6, + BNA_RXF_PAUSE_WAIT = 7, + BNA_RXF_RESUME_WAIT = 8, + BNA_RXF_STAT_CLR_WAIT = 9, +}; + +enum bna_port_type { + BNA_PORT_T_REGULAR = 0, + BNA_PORT_T_LOOPBACK_INTERNAL = 1, + BNA_PORT_T_LOOPBACK_EXTERNAL = 2, +}; + +enum bna_link_status { + BNA_LINK_DOWN = 0, + BNA_LINK_UP = 1, + BNA_CEE_UP = 2 +}; + +enum bna_llport_flags { + BNA_LLPORT_F_ENABLED = 1, + BNA_LLPORT_F_RX_ENABLED = 2 +}; + +enum bna_port_flags { + BNA_PORT_F_DEVICE_READY = 1, + BNA_PORT_F_ENABLED = 2, + BNA_PORT_F_PAUSE_CHANGED = 4, + BNA_PORT_F_MTU_CHANGED = 8 +}; + +enum bna_pkt_rates { + BNA_PKT_RATE_10K = 10000, + BNA_PKT_RATE_20K = 20000, + BNA_PKT_RATE_30K = 30000, + BNA_PKT_RATE_40K = 40000, + BNA_PKT_RATE_50K = 50000, + BNA_PKT_RATE_60K = 60000, + BNA_PKT_RATE_70K = 70000, + BNA_PKT_RATE_80K = 80000, +}; + +enum bna_dim_load_types { + BNA_LOAD_T_HIGH_4 = 0, /* 80K <= r */ + BNA_LOAD_T_HIGH_3 = 1, /* 60K <= r < 80K */ + BNA_LOAD_T_HIGH_2 = 2, /* 50K <= r < 60K */ + BNA_LOAD_T_HIGH_1 = 3, /* 40K <= r < 50K */ + BNA_LOAD_T_LOW_1 = 4, /* 30K <= r < 40K */ + BNA_LOAD_T_LOW_2 = 5, /* 20K <= r < 30K */ + BNA_LOAD_T_LOW_3 = 6, /* 10K <= r < 20K */ + BNA_LOAD_T_LOW_4 = 7, /* r < 10K */ + BNA_LOAD_T_MAX = 8 +}; + +enum bna_dim_bias_types { + BNA_BIAS_T_SMALL = 0, /* small pkts > (large pkts * 2) */ + BNA_BIAS_T_LARGE = 1, /* Not BNA_BIAS_T_SMALL */ + BNA_BIAS_T_MAX = 2 +}; + +struct bna_mac { + /* This should be the first one */ + struct list_head qe; + u8 addr[ETH_ALEN]; +}; + +struct bna_mem_descr { + u32 len; + void *kva; + struct bna_dma_addr dma; +}; + +struct bna_mem_info { + enum bna_mem_type mem_type; + u32 len; + u32 num; + u32 align_sz; /* 0/1 = no alignment */ + struct bna_mem_descr *mdl; + void *cookie; /* For bnad to unmap dma later */ +}; + +struct bna_intr_descr { + int vector; +}; + +struct bna_intr_info { + enum bna_intr_type intr_type; + int num; + struct bna_intr_descr *idl; +}; + +union bna_res_u { + struct bna_mem_info mem_info; + struct bna_intr_info intr_info; +}; + +struct bna_res_info { + enum bna_res_type res_type; + union bna_res_u res_u; +}; + +/* HW QPT */ +struct bna_qpt { + struct bna_dma_addr hw_qpt_ptr; + void *kv_qpt_ptr; + u32 page_count; + u32 page_size; +}; + +/** + * + * Device + * + */ + +struct bna_device { + bfa_fsm_t fsm; + struct bfa_ioc ioc; + + enum bna_intr_type intr_type; + int vector; + + void (*ready_cbfn)(struct bnad *bnad, enum bna_cb_status status); + struct bnad *ready_cbarg; + + void (*stop_cbfn)(struct bnad *bnad, enum bna_cb_status status); + struct bnad *stop_cbarg; + + struct bna *bna; +}; + +/** + * + * Mail box + * + */ + +struct bna_mbox_qe { + /* This should be the first one */ + struct list_head qe; + + struct bfa_mbox_cmd cmd; + u32 cmd_len; + /* Callback for port, tx, rx, rxf */ + void (*cbfn)(void *arg, int status); + void *cbarg; +}; + +struct bna_mbox_mod { + enum bna_mbox_state state; + struct list_head posted_q; + u32 msg_pending; + u32 msg_ctr; + struct bna *bna; +}; + +/** + * + * Port + * + */ + +/* Pause configuration */ +struct bna_pause_config { + enum bna_status tx_pause; + enum bna_status rx_pause; +}; + +struct bna_llport { + bfa_fsm_t fsm; + enum bna_llport_flags flags; + + enum bna_port_type type; + + enum bna_link_status link_status; + + int admin_up_count; + + void (*stop_cbfn)(struct bna_port *, enum bna_cb_status); + + struct bna_mbox_qe mbox_qe; + + struct bna *bna; +}; + +struct bna_port { + bfa_fsm_t fsm; + enum bna_port_flags flags; + + enum bna_port_type type; + + struct bna_llport llport; + + struct bna_pause_config pause_config; + u8 priority; + int mtu; + + /* Callback for bna_port_disable(), port_stop() */ + void (*stop_cbfn)(void *, enum bna_cb_status); + void *stop_cbarg; + + /* Callback for bna_port_pause_config() */ + void (*pause_cbfn)(struct bnad *, enum bna_cb_status); + + /* Callback for bna_port_mtu_set() */ + void (*mtu_cbfn)(struct bnad *, enum bna_cb_status); + + void (*link_cbfn)(struct bnad *, enum bna_link_status); + + struct bfa_wc chld_stop_wc; + + struct bna_mbox_qe mbox_qe; + + struct bna *bna; +}; + +/** + * + * Interrupt Block + * + */ + +/* IB index segment structure */ +struct bna_ibidx_seg { + /* This should be the first one */ + struct list_head qe; + + u8 ib_seg_size; + u8 ib_idx_tbl_offset; +}; + +/* Interrupt structure */ +struct bna_intr { + /* This should be the first one */ + struct list_head qe; + int ref_count; + + enum bna_intr_type intr_type; + int vector; + + struct bna_ib *ib; +}; + +/* Doorbell structure */ +struct bna_ib_dbell { + void *__iomem doorbell_addr; + u32 doorbell_ack; +}; + +/* Interrupt timer configuration */ +struct bna_ib_config { + u8 coalescing_timeo; /* Unit is 5usec. */ + + int interpkt_count; + int interpkt_timeo; + + enum ib_flags ctrl_flags; +}; + +/* IB structure */ +struct bna_ib { + /* This should be the first one */ + struct list_head qe; + + int ib_id; + + int ref_count; + int start_count; + + struct bna_dma_addr ib_seg_host_addr; + void *ib_seg_host_addr_kva; + u32 idx_mask; /* Size >= BNA_IBIDX_MAX_SEGSIZE */ + + struct bna_ibidx_seg *idx_seg; + + struct bna_ib_dbell door_bell; + + struct bna_intr *intr; + + struct bna_ib_config ib_config; + + struct bna *bna; +}; + +/* IB module - keeps track of IBs and interrupts */ +struct bna_ib_mod { + struct bna_ib *ib; /* BFI_MAX_IB entries */ + struct bna_intr *intr; /* BFI_MAX_IB entries */ + struct bna_ibidx_seg *idx_seg; /* BNA_IBIDX_TOTAL_SEGS */ + + struct list_head ib_free_q; + + struct list_head ibidx_seg_pool[BFI_IBIDX_TOTAL_POOLS]; + + struct list_head intr_free_q; + struct list_head intr_active_q; + + struct bna *bna; +}; + +/** + * + * Tx object + * + */ + +/* Tx datapath control structure */ +#define BNA_Q_NAME_SIZE 16 +struct bna_tcb { + /* Fast path */ + void **sw_qpt; + void *unmap_q; + u32 producer_index; + u32 consumer_index; + volatile u32 *hw_consumer_index; + u32 q_depth; + void *__iomem q_dbell; + struct bna_ib_dbell *i_dbell; + int page_idx; + int page_count; + /* Control path */ + struct bna_txq *txq; + struct bnad *bnad; + enum bna_intr_type intr_type; + int intr_vector; + u8 priority; /* Current priority */ + unsigned long flags; /* Used by bnad as required */ + int id; + char name[BNA_Q_NAME_SIZE]; +}; + +/* TxQ QPT and configuration */ +struct bna_txq { + /* This should be the first one */ + struct list_head qe; + + int txq_id; + + u8 priority; + + struct bna_qpt qpt; + struct bna_tcb *tcb; + struct bna_ib *ib; + int ib_seg_offset; + + struct bna_tx *tx; + + u64 tx_packets; + u64 tx_bytes; +}; + +/* TxF structure (hardware Tx Function) */ +struct bna_txf { + int txf_id; + enum txf_flags ctrl_flags; + u16 vlan; +}; + +/* Tx object */ +struct bna_tx { + /* This should be the first one */ + struct list_head qe; + + bfa_fsm_t fsm; + enum bna_tx_flags flags; + + enum bna_tx_type type; + + struct list_head txq_q; + struct bna_txf txf; + + /* Tx event handlers */ + void (*tcb_setup_cbfn)(struct bnad *, struct bna_tcb *); + void (*tcb_destroy_cbfn)(struct bnad *, struct bna_tcb *); + void (*tx_stall_cbfn)(struct bnad *, struct bna_tcb *); + void (*tx_resume_cbfn)(struct bnad *, struct bna_tcb *); + void (*tx_cleanup_cbfn)(struct bnad *, struct bna_tcb *); + + /* callback for bna_tx_disable(), bna_tx_stop() */ + void (*stop_cbfn)(void *arg, struct bna_tx *tx, + enum bna_cb_status status); + void *stop_cbarg; + + /* callback for bna_tx_prio_set() */ + void (*prio_change_cbfn)(struct bnad *bnad, struct bna_tx *tx, + enum bna_cb_status status); + + struct bfa_wc txq_stop_wc; + + struct bna_mbox_qe mbox_qe; + + struct bna *bna; + void *priv; /* bnad's cookie */ +}; + +struct bna_tx_config { + int num_txq; + int txq_depth; + enum bna_tx_type tx_type; +}; + +struct bna_tx_event_cbfn { + /* Optional */ + void (*tcb_setup_cbfn)(struct bnad *, struct bna_tcb *); + void (*tcb_destroy_cbfn)(struct bnad *, struct bna_tcb *); + /* Mandatory */ + void (*tx_stall_cbfn)(struct bnad *, struct bna_tcb *); + void (*tx_resume_cbfn)(struct bnad *, struct bna_tcb *); + void (*tx_cleanup_cbfn)(struct bnad *, struct bna_tcb *); +}; + +/* Tx module - keeps track of free, active tx objects */ +struct bna_tx_mod { + struct bna_tx *tx; /* BFI_MAX_TXQ entries */ + struct bna_txq *txq; /* BFI_MAX_TXQ entries */ + + struct list_head tx_free_q; + struct list_head tx_active_q; + + struct list_head txq_free_q; + + /* callback for bna_tx_mod_stop() */ + void (*stop_cbfn)(struct bna_port *port, + enum bna_cb_status status); + + struct bfa_wc tx_stop_wc; + + enum bna_tx_mod_flags flags; + + int priority; + int cee_link; + + u32 txf_bmap[2]; + + struct bna *bna; +}; + +/** + * + * Receive Indirection Table + * + */ + +/* One row of RIT table */ +struct bna_rit_entry { + u8 large_rxq_id; /* used for either large or data buffers */ + u8 small_rxq_id; /* used for either small or header buffers */ +}; + +/* RIT segment */ +struct bna_rit_segment { + struct list_head qe; + + u32 rit_offset; + u32 rit_size; + /** + * max_rit_size: Varies per RIT segment depending on how RIT is + * partitioned + */ + u32 max_rit_size; + + struct bna_rit_entry *rit; +}; + +struct bna_rit_mod { + struct bna_rit_entry *rit; + struct bna_rit_segment *rit_segment; + + struct list_head rit_seg_pool[BFI_RIT_SEG_TOTAL_POOLS]; +}; + +/** + * + * Rx object + * + */ + +/* Rx datapath control structure */ +struct bna_rcb { + /* Fast path */ + void **sw_qpt; + void *unmap_q; + u32 producer_index; + u32 consumer_index; + u32 q_depth; + void *__iomem q_dbell; + int page_idx; + int page_count; + /* Control path */ + struct bna_rxq *rxq; + struct bna_cq *cq; + struct bnad *bnad; + unsigned long flags; + int id; +}; + +/* RxQ structure - QPT, configuration */ +struct bna_rxq { + struct list_head qe; + int rxq_id; + + int buffer_size; + int q_depth; + + struct bna_qpt qpt; + struct bna_rcb *rcb; + + struct bna_rxp *rxp; + struct bna_rx *rx; + + u64 rx_packets; + u64 rx_bytes; + u64 rx_packets_with_error; + u64 rxbuf_alloc_failed; +}; + +/* RxQ pair */ +union bna_rxq_u { + struct { + struct bna_rxq *hdr; + struct bna_rxq *data; + } hds; + struct { + struct bna_rxq *small; + struct bna_rxq *large; + } slr; + struct { + struct bna_rxq *only; + struct bna_rxq *reserved; + } single; +}; + +/* Packet rate for Dynamic Interrupt Moderation */ +struct bna_pkt_rate { + u32 small_pkt_cnt; + u32 large_pkt_cnt; +}; + +/* Completion control structure */ +struct bna_ccb { + /* Fast path */ + void **sw_qpt; + u32 producer_index; + volatile u32 *hw_producer_index; + u32 q_depth; + struct bna_ib_dbell *i_dbell; + struct bna_rcb *rcb[2]; + void *ctrl; /* For bnad */ + struct bna_pkt_rate pkt_rate; + int page_idx; + int page_count; + + /* Control path */ + struct bna_cq *cq; + struct bnad *bnad; + enum bna_intr_type intr_type; + int intr_vector; + u8 rx_coalescing_timeo; /* For NAPI */ + int id; + char name[BNA_Q_NAME_SIZE]; +}; + +/* CQ QPT, configuration */ +struct bna_cq { + int cq_id; + + struct bna_qpt qpt; + struct bna_ccb *ccb; + + struct bna_ib *ib; + u8 ib_seg_offset; + + struct bna_rx *rx; +}; + +struct bna_rss_config { + enum rss_hash_type hash_type; + u8 hash_mask; + u32 toeplitz_hash_key[BFI_RSS_HASH_KEY_LEN]; +}; + +struct bna_hds_config { + enum hds_header_type hdr_type; + int header_size; +}; + +/* This structure is used during RX creation */ +struct bna_rx_config { + enum bna_rx_type rx_type; + int num_paths; + enum bna_rxp_type rxp_type; + int paused; + int q_depth; + /* + * Small/Large (or Header/Data) buffer size to be configured + * for SLR and HDS queue type. Large buffer size comes from + * port->mtu. + */ + int small_buff_size; + + enum bna_status rss_status; + struct bna_rss_config rss_config; + + enum bna_status hds_status; + struct bna_hds_config hds_config; + + enum bna_status vlan_strip_status; +}; + +/* Rx Path structure - one per MSIX vector/CPU */ +struct bna_rxp { + /* This should be the first one */ + struct list_head qe; + + enum bna_rxp_type type; + union bna_rxq_u rxq; + struct bna_cq cq; + + struct bna_rx *rx; + + /* MSI-x vector number for configuring RSS */ + int vector; + + struct bna_mbox_qe mbox_qe; +}; + +/* HDS configuration structure */ +struct bna_rxf_hds { + enum hds_header_type hdr_type; + int header_size; +}; + +/* RSS configuration structure */ +struct bna_rxf_rss { + enum rss_hash_type hash_type; + u8 hash_mask; + u32 toeplitz_hash_key[BFI_RSS_HASH_KEY_LEN]; +}; + +/* RxF structure (hardware Rx Function) */ +struct bna_rxf { + bfa_fsm_t fsm; + int rxf_id; + enum rxf_flags ctrl_flags; + u16 default_vlan_tag; + enum bna_rxf_oper_state rxf_oper_state; + enum bna_status hds_status; + struct bna_rxf_hds hds_cfg; + enum bna_status rss_status; + struct bna_rxf_rss rss_cfg; + struct bna_rit_segment *rit_segment; + struct bna_rx *rx; + u32 forced_offset; + struct bna_mbox_qe mbox_qe; + int mcast_rxq_id; + + /* callback for bna_rxf_start() */ + void (*start_cbfn) (struct bna_rx *rx, enum bna_cb_status status); + struct bna_rx *start_cbarg; + + /* callback for bna_rxf_stop() */ + void (*stop_cbfn) (struct bna_rx *rx, enum bna_cb_status status); + struct bna_rx *stop_cbarg; + + /* callback for bna_rxf_receive_enable() / bna_rxf_receive_disable() */ + void (*oper_state_cbfn) (struct bnad *bnad, struct bna_rx *rx, + enum bna_cb_status status); + struct bnad *oper_state_cbarg; + + /** + * callback for: + * bna_rxf_ucast_set() + * bna_rxf_{ucast/mcast}_add(), + * bna_rxf_{ucast/mcast}_del(), + * bna_rxf_mode_set() + */ + void (*cam_fltr_cbfn)(struct bnad *bnad, struct bna_rx *rx, + enum bna_cb_status status); + struct bnad *cam_fltr_cbarg; + + enum bna_rxf_flags rxf_flags; + + /* List of unicast addresses yet to be applied to h/w */ + struct list_head ucast_pending_add_q; + struct list_head ucast_pending_del_q; + int ucast_pending_set; + /* ucast addresses applied to the h/w */ + struct list_head ucast_active_q; + struct bna_mac *ucast_active_mac; + + /* List of multicast addresses yet to be applied to h/w */ + struct list_head mcast_pending_add_q; + struct list_head mcast_pending_del_q; + /* multicast addresses applied to the h/w */ + struct list_head mcast_active_q; + + /* Rx modes yet to be applied to h/w */ + enum bna_rxmode rxmode_pending; + enum bna_rxmode rxmode_pending_bitmask; + /* Rx modes applied to h/w */ + enum bna_rxmode rxmode_active; + + enum bna_status vlan_filter_status; + u32 vlan_filter_table[(BFI_MAX_VLAN + 1) / 32]; +}; + +/* Rx object */ +struct bna_rx { + /* This should be the first one */ + struct list_head qe; + + bfa_fsm_t fsm; + + enum bna_rx_type type; + + /* list-head for RX path objects */ + struct list_head rxp_q; + + struct bna_rxf rxf; + + enum bna_rx_flags rx_flags; + + struct bna_mbox_qe mbox_qe; + + struct bfa_wc rxq_stop_wc; + + /* Rx event handlers */ + void (*rcb_setup_cbfn)(struct bnad *, struct bna_rcb *); + void (*rcb_destroy_cbfn)(struct bnad *, struct bna_rcb *); + void (*ccb_setup_cbfn)(struct bnad *, struct bna_ccb *); + void (*ccb_destroy_cbfn)(struct bnad *, struct bna_ccb *); + void (*rx_cleanup_cbfn)(struct bnad *, struct bna_ccb *); + void (*rx_post_cbfn)(struct bnad *, struct bna_rcb *); + + /* callback for bna_rx_disable(), bna_rx_stop() */ + void (*stop_cbfn)(void *arg, struct bna_rx *rx, + enum bna_cb_status status); + void *stop_cbarg; + + struct bna *bna; + void *priv; /* bnad's cookie */ +}; + +struct bna_rx_event_cbfn { + /* Optional */ + void (*rcb_setup_cbfn)(struct bnad *, struct bna_rcb *); + void (*rcb_destroy_cbfn)(struct bnad *, struct bna_rcb *); + void (*ccb_setup_cbfn)(struct bnad *, struct bna_ccb *); + void (*ccb_destroy_cbfn)(struct bnad *, struct bna_ccb *); + /* Mandatory */ + void (*rx_cleanup_cbfn)(struct bnad *, struct bna_ccb *); + void (*rx_post_cbfn)(struct bnad *, struct bna_rcb *); +}; + +/* Rx module - keeps track of free, active rx objects */ +struct bna_rx_mod { + struct bna *bna; /* back pointer to parent */ + struct bna_rx *rx; /* BFI_MAX_RXQ entries */ + struct bna_rxp *rxp; /* BFI_MAX_RXQ entries */ + struct bna_rxq *rxq; /* BFI_MAX_RXQ entries */ + + struct list_head rx_free_q; + struct list_head rx_active_q; + int rx_free_count; + + struct list_head rxp_free_q; + int rxp_free_count; + + struct list_head rxq_free_q; + int rxq_free_count; + + enum bna_rx_mod_flags flags; + + /* callback for bna_rx_mod_stop() */ + void (*stop_cbfn)(struct bna_port *port, + enum bna_cb_status status); + + struct bfa_wc rx_stop_wc; + u32 dim_vector[BNA_LOAD_T_MAX][BNA_BIAS_T_MAX]; + u32 rxf_bmap[2]; +}; + +/** + * + * CAM + * + */ + +struct bna_ucam_mod { + struct bna_mac *ucmac; /* BFI_MAX_UCMAC entries */ + struct list_head free_q; + + struct bna *bna; +}; + +struct bna_mcam_mod { + struct bna_mac *mcmac; /* BFI_MAX_MCMAC entries */ + struct list_head free_q; + + struct bna *bna; +}; + +/** + * + * Statistics + * + */ + +struct bna_tx_stats { + int tx_state; + int tx_flags; + int num_txqs; + u32 txq_bmap[2]; + int txf_id; +}; + +struct bna_rx_stats { + int rx_state; + int rx_flags; + int num_rxps; + int num_rxqs; + u32 rxq_bmap[2]; + u32 cq_bmap[2]; + int rxf_id; + int rxf_state; + int rxf_oper_state; + int num_active_ucast; + int num_active_mcast; + int rxmode_active; + int vlan_filter_status; + u32 vlan_filter_table[(BFI_MAX_VLAN + 1) / 32]; + int rss_status; + int hds_status; +}; + +struct bna_sw_stats { + int device_state; + int port_state; + int port_flags; + int llport_state; + int priority; + int num_active_tx; + int num_active_rx; + struct bna_tx_stats tx_stats[BFI_MAX_TXQ]; + struct bna_rx_stats rx_stats[BFI_MAX_RXQ]; +}; + +struct bna_stats { + u32 txf_bmap[2]; + u32 rxf_bmap[2]; + struct bfi_ll_stats *hw_stats; + struct bna_sw_stats *sw_stats; +}; + +/** + * + * BNA + * + */ + +struct bna { + struct bfa_pcidev pcidev; + + int port_num; + + struct bna_chip_regs regs; + + struct bna_dma_addr hw_stats_dma; + struct bna_stats stats; + + struct bna_device device; + struct bfa_cee cee; + + struct bna_mbox_mod mbox_mod; + + struct bna_port port; + + struct bna_tx_mod tx_mod; + + struct bna_rx_mod rx_mod; + + struct bna_ib_mod ib_mod; + + struct bna_ucam_mod ucam_mod; + struct bna_mcam_mod mcam_mod; + + struct bna_rit_mod rit_mod; + + int rxf_default_id; + int rxf_promisc_id; + + struct bna_mbox_qe mbox_qe; + + struct bnad *bnad; +}; + +#endif /* __BNA_TYPES_H__ */ diff --git a/drivers/net/bna/bnad.c b/drivers/net/bna/bnad.c new file mode 100644 index 000000000000..491d148f88ae --- /dev/null +++ b/drivers/net/bna/bnad.c @@ -0,0 +1,3270 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bnad.h" +#include "bna.h" +#include "cna.h" + +DEFINE_MUTEX(bnad_fwimg_mutex); + +/* + * Module params + */ +static uint bnad_msix_disable; +module_param(bnad_msix_disable, uint, 0444); +MODULE_PARM_DESC(bnad_msix_disable, "Disable MSIX mode"); + +static uint bnad_ioc_auto_recover = 1; +module_param(bnad_ioc_auto_recover, uint, 0444); +MODULE_PARM_DESC(bnad_ioc_auto_recover, "Enable / Disable auto recovery"); + +/* + * Global variables + */ +u32 bnad_rxqs_per_cq = 2; + +const u8 bnad_bcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +/* + * Local MACROS + */ +#define BNAD_TX_UNMAPQ_DEPTH (bnad->txq_depth * 2) + +#define BNAD_RX_UNMAPQ_DEPTH (bnad->rxq_depth) + +#define BNAD_GET_MBOX_IRQ(_bnad) \ + (((_bnad)->cfg_flags & BNAD_CF_MSIX) ? \ + ((_bnad)->msix_table[(_bnad)->msix_num - 1].vector) : \ + ((_bnad)->pcidev->irq)) + +#define BNAD_FILL_UNMAPQ_MEM_REQ(_res_info, _num, _depth) \ +do { \ + (_res_info)->res_type = BNA_RES_T_MEM; \ + (_res_info)->res_u.mem_info.mem_type = BNA_MEM_T_KVA; \ + (_res_info)->res_u.mem_info.num = (_num); \ + (_res_info)->res_u.mem_info.len = \ + sizeof(struct bnad_unmap_q) + \ + (sizeof(struct bnad_skb_unmap) * ((_depth) - 1)); \ +} while (0) + +/* + * Reinitialize completions in CQ, once Rx is taken down + */ +static void +bnad_cq_cmpl_init(struct bnad *bnad, struct bna_ccb *ccb) +{ + struct bna_cq_entry *cmpl, *next_cmpl; + unsigned int wi_range, wis = 0, ccb_prod = 0; + int i; + + BNA_CQ_QPGE_PTR_GET(ccb_prod, ccb->sw_qpt, cmpl, + wi_range); + + for (i = 0; i < ccb->q_depth; i++) { + wis++; + if (likely(--wi_range)) + next_cmpl = cmpl + 1; + else { + BNA_QE_INDX_ADD(ccb_prod, wis, ccb->q_depth); + wis = 0; + BNA_CQ_QPGE_PTR_GET(ccb_prod, ccb->sw_qpt, + next_cmpl, wi_range); + } + cmpl->valid = 0; + cmpl = next_cmpl; + } +} + +/* + * Frees all pending Tx Bufs + * At this point no activity is expected on the Q, + * so DMA unmap & freeing is fine. + */ +static void +bnad_free_all_txbufs(struct bnad *bnad, + struct bna_tcb *tcb) +{ + u16 unmap_cons; + struct bnad_unmap_q *unmap_q = tcb->unmap_q; + struct bnad_skb_unmap *unmap_array; + struct sk_buff *skb = NULL; + int i; + + unmap_array = unmap_q->unmap_array; + + unmap_cons = 0; + while (unmap_cons < unmap_q->q_depth) { + skb = unmap_array[unmap_cons].skb; + if (!skb) { + unmap_cons++; + continue; + } + unmap_array[unmap_cons].skb = NULL; + + pci_unmap_single(bnad->pcidev, + pci_unmap_addr(&unmap_array[unmap_cons], + dma_addr), skb_headlen(skb), + PCI_DMA_TODEVICE); + + pci_unmap_addr_set(&unmap_array[unmap_cons], dma_addr, 0); + unmap_cons++; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + pci_unmap_page(bnad->pcidev, + pci_unmap_addr(&unmap_array[unmap_cons], + dma_addr), + skb_shinfo(skb)->frags[i].size, + PCI_DMA_TODEVICE); + pci_unmap_addr_set(&unmap_array[unmap_cons], dma_addr, + 0); + unmap_cons++; + } + dev_kfree_skb_any(skb); + } +} + +/* Data Path Handlers */ + +/* + * bnad_free_txbufs : Frees the Tx bufs on Tx completion + * Can be called in a) Interrupt context + * b) Sending context + * c) Tasklet context + */ +static u32 +bnad_free_txbufs(struct bnad *bnad, + struct bna_tcb *tcb) +{ + u32 sent_packets = 0, sent_bytes = 0; + u16 wis, unmap_cons, updated_hw_cons; + struct bnad_unmap_q *unmap_q = tcb->unmap_q; + struct bnad_skb_unmap *unmap_array; + struct sk_buff *skb; + int i; + + /* + * Just return if TX is stopped. This check is useful + * when bnad_free_txbufs() runs out of a tasklet scheduled + * before bnad_cb_tx_cleanup() cleared BNAD_RF_TX_STARTED bit + * but this routine runs actually after the cleanup has been + * executed. + */ + if (!test_bit(BNAD_RF_TX_STARTED, &bnad->run_flags)) + return 0; + + updated_hw_cons = *(tcb->hw_consumer_index); + + wis = BNA_Q_INDEX_CHANGE(tcb->consumer_index, + updated_hw_cons, tcb->q_depth); + + BUG_ON(!(wis <= BNA_QE_IN_USE_CNT(tcb, tcb->q_depth))); + + unmap_array = unmap_q->unmap_array; + unmap_cons = unmap_q->consumer_index; + + prefetch(&unmap_array[unmap_cons + 1]); + while (wis) { + skb = unmap_array[unmap_cons].skb; + + unmap_array[unmap_cons].skb = NULL; + + sent_packets++; + sent_bytes += skb->len; + wis -= BNA_TXQ_WI_NEEDED(1 + skb_shinfo(skb)->nr_frags); + + pci_unmap_single(bnad->pcidev, + pci_unmap_addr(&unmap_array[unmap_cons], + dma_addr), skb_headlen(skb), + PCI_DMA_TODEVICE); + pci_unmap_addr_set(&unmap_array[unmap_cons], dma_addr, 0); + BNA_QE_INDX_ADD(unmap_cons, 1, unmap_q->q_depth); + + prefetch(&unmap_array[unmap_cons + 1]); + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + prefetch(&unmap_array[unmap_cons + 1]); + + pci_unmap_page(bnad->pcidev, + pci_unmap_addr(&unmap_array[unmap_cons], + dma_addr), + skb_shinfo(skb)->frags[i].size, + PCI_DMA_TODEVICE); + pci_unmap_addr_set(&unmap_array[unmap_cons], dma_addr, + 0); + BNA_QE_INDX_ADD(unmap_cons, 1, unmap_q->q_depth); + } + dev_kfree_skb_any(skb); + } + + /* Update consumer pointers. */ + tcb->consumer_index = updated_hw_cons; + unmap_q->consumer_index = unmap_cons; + + tcb->txq->tx_packets += sent_packets; + tcb->txq->tx_bytes += sent_bytes; + + return sent_packets; +} + +/* Tx Free Tasklet function */ +/* Frees for all the tcb's in all the Tx's */ +/* + * Scheduled from sending context, so that + * the fat Tx lock is not held for too long + * in the sending context. + */ +static void +bnad_tx_free_tasklet(unsigned long bnad_ptr) +{ + struct bnad *bnad = (struct bnad *)bnad_ptr; + struct bna_tcb *tcb; + u32 acked; + int i, j; + + for (i = 0; i < bnad->num_tx; i++) { + for (j = 0; j < bnad->num_txq_per_tx; j++) { + tcb = bnad->tx_info[i].tcb[j]; + if (!tcb) + continue; + if (((u16) (*tcb->hw_consumer_index) != + tcb->consumer_index) && + (!test_and_set_bit(BNAD_TXQ_FREE_SENT, + &tcb->flags))) { + acked = bnad_free_txbufs(bnad, tcb); + bna_ib_ack(tcb->i_dbell, acked); + smp_mb__before_clear_bit(); + clear_bit(BNAD_TXQ_FREE_SENT, &tcb->flags); + } + } + } +} + +static u32 +bnad_tx(struct bnad *bnad, struct bna_tcb *tcb) +{ + struct net_device *netdev = bnad->netdev; + u32 sent; + + if (test_and_set_bit(BNAD_TXQ_FREE_SENT, &tcb->flags)) + return 0; + + sent = bnad_free_txbufs(bnad, tcb); + if (sent) { + if (netif_queue_stopped(netdev) && + netif_carrier_ok(netdev) && + BNA_QE_FREE_CNT(tcb, tcb->q_depth) >= + BNAD_NETIF_WAKE_THRESHOLD) { + netif_wake_queue(netdev); + BNAD_UPDATE_CTR(bnad, netif_queue_wakeup); + } + bna_ib_ack(tcb->i_dbell, sent); + } else + bna_ib_ack(tcb->i_dbell, 0); + + smp_mb__before_clear_bit(); + clear_bit(BNAD_TXQ_FREE_SENT, &tcb->flags); + + return sent; +} + +/* MSIX Tx Completion Handler */ +static irqreturn_t +bnad_msix_tx(int irq, void *data) +{ + struct bna_tcb *tcb = (struct bna_tcb *)data; + struct bnad *bnad = tcb->bnad; + + bnad_tx(bnad, tcb); + + return IRQ_HANDLED; +} + +static void +bnad_reset_rcb(struct bnad *bnad, struct bna_rcb *rcb) +{ + struct bnad_unmap_q *unmap_q = rcb->unmap_q; + + rcb->producer_index = 0; + rcb->consumer_index = 0; + + unmap_q->producer_index = 0; + unmap_q->consumer_index = 0; +} + +static void +bnad_free_rxbufs(struct bnad *bnad, struct bna_rcb *rcb) +{ + struct bnad_unmap_q *unmap_q; + struct sk_buff *skb; + + unmap_q = rcb->unmap_q; + while (BNA_QE_IN_USE_CNT(unmap_q, unmap_q->q_depth)) { + skb = unmap_q->unmap_array[unmap_q->consumer_index].skb; + BUG_ON(!(skb)); + unmap_q->unmap_array[unmap_q->consumer_index].skb = NULL; + pci_unmap_single(bnad->pcidev, pci_unmap_addr(&unmap_q-> + unmap_array[unmap_q->consumer_index], + dma_addr), rcb->rxq->buffer_size + + NET_IP_ALIGN, PCI_DMA_FROMDEVICE); + dev_kfree_skb(skb); + BNA_QE_INDX_ADD(unmap_q->consumer_index, 1, unmap_q->q_depth); + BNA_QE_INDX_ADD(rcb->consumer_index, 1, rcb->q_depth); + } + + bnad_reset_rcb(bnad, rcb); +} + +static void +bnad_alloc_n_post_rxbufs(struct bnad *bnad, struct bna_rcb *rcb) +{ + u16 to_alloc, alloced, unmap_prod, wi_range; + struct bnad_unmap_q *unmap_q = rcb->unmap_q; + struct bnad_skb_unmap *unmap_array; + struct bna_rxq_entry *rxent; + struct sk_buff *skb; + dma_addr_t dma_addr; + + alloced = 0; + to_alloc = + BNA_QE_FREE_CNT(unmap_q, unmap_q->q_depth); + + unmap_array = unmap_q->unmap_array; + unmap_prod = unmap_q->producer_index; + + BNA_RXQ_QPGE_PTR_GET(unmap_prod, rcb->sw_qpt, rxent, wi_range); + + while (to_alloc--) { + if (!wi_range) { + BNA_RXQ_QPGE_PTR_GET(unmap_prod, rcb->sw_qpt, rxent, + wi_range); + } + skb = alloc_skb(rcb->rxq->buffer_size + NET_IP_ALIGN, + GFP_ATOMIC); + if (unlikely(!skb)) { + BNAD_UPDATE_CTR(bnad, rxbuf_alloc_failed); + goto finishing; + } + skb->dev = bnad->netdev; + skb_reserve(skb, NET_IP_ALIGN); + unmap_array[unmap_prod].skb = skb; + dma_addr = pci_map_single(bnad->pcidev, skb->data, + rcb->rxq->buffer_size, PCI_DMA_FROMDEVICE); + pci_unmap_addr_set(&unmap_array[unmap_prod], dma_addr, + dma_addr); + BNA_SET_DMA_ADDR(dma_addr, &rxent->host_addr); + BNA_QE_INDX_ADD(unmap_prod, 1, unmap_q->q_depth); + + rxent++; + wi_range--; + alloced++; + } + +finishing: + if (likely(alloced)) { + unmap_q->producer_index = unmap_prod; + rcb->producer_index = unmap_prod; + smp_mb(); + bna_rxq_prod_indx_doorbell(rcb); + } +} + +/* + * Locking is required in the enable path + * because it is called from a napi poll + * context, where the bna_lock is not held + * unlike the IRQ context. + */ +static void +bnad_enable_txrx_irqs(struct bnad *bnad) +{ + struct bna_tcb *tcb; + struct bna_ccb *ccb; + int i, j; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + for (i = 0; i < bnad->num_tx; i++) { + for (j = 0; j < bnad->num_txq_per_tx; j++) { + tcb = bnad->tx_info[i].tcb[j]; + bna_ib_coalescing_timer_set(tcb->i_dbell, + tcb->txq->ib->ib_config.coalescing_timeo); + bna_ib_ack(tcb->i_dbell, 0); + } + } + + for (i = 0; i < bnad->num_rx; i++) { + for (j = 0; j < bnad->num_rxp_per_rx; j++) { + ccb = bnad->rx_info[i].rx_ctrl[j].ccb; + bnad_enable_rx_irq_unsafe(ccb); + } + } + spin_unlock_irqrestore(&bnad->bna_lock, flags); +} + +static inline void +bnad_refill_rxq(struct bnad *bnad, struct bna_rcb *rcb) +{ + struct bnad_unmap_q *unmap_q = rcb->unmap_q; + + if (!test_and_set_bit(BNAD_RXQ_REFILL, &rcb->flags)) { + if (BNA_QE_FREE_CNT(unmap_q, unmap_q->q_depth) + >> BNAD_RXQ_REFILL_THRESHOLD_SHIFT) + bnad_alloc_n_post_rxbufs(bnad, rcb); + smp_mb__before_clear_bit(); + clear_bit(BNAD_RXQ_REFILL, &rcb->flags); + } +} + +static u32 +bnad_poll_cq(struct bnad *bnad, struct bna_ccb *ccb, int budget) +{ + struct bna_cq_entry *cmpl, *next_cmpl; + struct bna_rcb *rcb = NULL; + unsigned int wi_range, packets = 0, wis = 0; + struct bnad_unmap_q *unmap_q; + struct sk_buff *skb; + u32 flags; + u32 qid0 = ccb->rcb[0]->rxq->rxq_id; + struct bna_pkt_rate *pkt_rt = &ccb->pkt_rate; + + prefetch(bnad->netdev); + BNA_CQ_QPGE_PTR_GET(ccb->producer_index, ccb->sw_qpt, cmpl, + wi_range); + BUG_ON(!(wi_range <= ccb->q_depth)); + while (cmpl->valid && packets < budget) { + packets++; + BNA_UPDATE_PKT_CNT(pkt_rt, ntohs(cmpl->length)); + + if (qid0 == cmpl->rxq_id) + rcb = ccb->rcb[0]; + else + rcb = ccb->rcb[1]; + + unmap_q = rcb->unmap_q; + + skb = unmap_q->unmap_array[unmap_q->consumer_index].skb; + BUG_ON(!(skb)); + unmap_q->unmap_array[unmap_q->consumer_index].skb = NULL; + pci_unmap_single(bnad->pcidev, + pci_unmap_addr(&unmap_q-> + unmap_array[unmap_q-> + consumer_index], + dma_addr), + rcb->rxq->buffer_size, + PCI_DMA_FROMDEVICE); + BNA_QE_INDX_ADD(unmap_q->consumer_index, 1, unmap_q->q_depth); + + /* Should be more efficient ? Performance ? */ + BNA_QE_INDX_ADD(rcb->consumer_index, 1, rcb->q_depth); + + wis++; + if (likely(--wi_range)) + next_cmpl = cmpl + 1; + else { + BNA_QE_INDX_ADD(ccb->producer_index, wis, ccb->q_depth); + wis = 0; + BNA_CQ_QPGE_PTR_GET(ccb->producer_index, ccb->sw_qpt, + next_cmpl, wi_range); + BUG_ON(!(wi_range <= ccb->q_depth)); + } + prefetch(next_cmpl); + + flags = ntohl(cmpl->flags); + if (unlikely + (flags & + (BNA_CQ_EF_MAC_ERROR | BNA_CQ_EF_FCS_ERROR | + BNA_CQ_EF_TOO_LONG))) { + dev_kfree_skb_any(skb); + rcb->rxq->rx_packets_with_error++; + goto next; + } + + skb_put(skb, ntohs(cmpl->length)); + if (likely + (bnad->rx_csum && + (((flags & BNA_CQ_EF_IPV4) && + (flags & BNA_CQ_EF_L3_CKSUM_OK)) || + (flags & BNA_CQ_EF_IPV6)) && + (flags & (BNA_CQ_EF_TCP | BNA_CQ_EF_UDP)) && + (flags & BNA_CQ_EF_L4_CKSUM_OK))) + skb->ip_summed = CHECKSUM_UNNECESSARY; + else + skb->ip_summed = CHECKSUM_NONE; + + rcb->rxq->rx_packets++; + rcb->rxq->rx_bytes += skb->len; + skb->protocol = eth_type_trans(skb, bnad->netdev); + + if (bnad->vlan_grp && (flags & BNA_CQ_EF_VLAN)) { + struct bnad_rx_ctrl *rx_ctrl = + (struct bnad_rx_ctrl *)ccb->ctrl; + if (skb->ip_summed == CHECKSUM_UNNECESSARY) + vlan_gro_receive(&rx_ctrl->napi, bnad->vlan_grp, + ntohs(cmpl->vlan_tag), skb); + else + vlan_hwaccel_receive_skb(skb, + bnad->vlan_grp, + ntohs(cmpl->vlan_tag)); + + } else { /* Not VLAN tagged/stripped */ + struct bnad_rx_ctrl *rx_ctrl = + (struct bnad_rx_ctrl *)ccb->ctrl; + if (skb->ip_summed == CHECKSUM_UNNECESSARY) + napi_gro_receive(&rx_ctrl->napi, skb); + else + netif_receive_skb(skb); + } + +next: + cmpl->valid = 0; + cmpl = next_cmpl; + } + + BNA_QE_INDX_ADD(ccb->producer_index, wis, ccb->q_depth); + + if (likely(ccb)) { + bna_ib_ack(ccb->i_dbell, packets); + bnad_refill_rxq(bnad, ccb->rcb[0]); + if (ccb->rcb[1]) + bnad_refill_rxq(bnad, ccb->rcb[1]); + } else + bna_ib_ack(ccb->i_dbell, 0); + + return packets; +} + +static void +bnad_disable_rx_irq(struct bnad *bnad, struct bna_ccb *ccb) +{ + bna_ib_coalescing_timer_set(ccb->i_dbell, 0); + bna_ib_ack(ccb->i_dbell, 0); +} + +static void +bnad_enable_rx_irq(struct bnad *bnad, struct bna_ccb *ccb) +{ + spin_lock_irq(&bnad->bna_lock); /* Because of polling context */ + bnad_enable_rx_irq_unsafe(ccb); + spin_unlock_irq(&bnad->bna_lock); +} + +static void +bnad_netif_rx_schedule_poll(struct bnad *bnad, struct bna_ccb *ccb) +{ + struct bnad_rx_ctrl *rx_ctrl = (struct bnad_rx_ctrl *)(ccb->ctrl); + if (likely(napi_schedule_prep((&rx_ctrl->napi)))) { + bnad_disable_rx_irq(bnad, ccb); + __napi_schedule((&rx_ctrl->napi)); + } + BNAD_UPDATE_CTR(bnad, netif_rx_schedule); +} + +/* MSIX Rx Path Handler */ +static irqreturn_t +bnad_msix_rx(int irq, void *data) +{ + struct bna_ccb *ccb = (struct bna_ccb *)data; + struct bnad *bnad = ccb->bnad; + + bnad_netif_rx_schedule_poll(bnad, ccb); + + return IRQ_HANDLED; +} + +/* Interrupt handlers */ + +/* Mbox Interrupt Handlers */ +static irqreturn_t +bnad_msix_mbox_handler(int irq, void *data) +{ + u32 intr_status; + unsigned long flags; + struct net_device *netdev = data; + struct bnad *bnad; + + bnad = netdev_priv(netdev); + + /* BNA_ISR_GET(bnad); Inc Ref count */ + spin_lock_irqsave(&bnad->bna_lock, flags); + + bna_intr_status_get(&bnad->bna, intr_status); + + if (BNA_IS_MBOX_ERR_INTR(intr_status)) + bna_mbox_handler(&bnad->bna, intr_status); + + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + /* BNAD_ISR_PUT(bnad); Dec Ref count */ + return IRQ_HANDLED; +} + +static irqreturn_t +bnad_isr(int irq, void *data) +{ + int i, j; + u32 intr_status; + unsigned long flags; + struct net_device *netdev = data; + struct bnad *bnad = netdev_priv(netdev); + struct bnad_rx_info *rx_info; + struct bnad_rx_ctrl *rx_ctrl; + + spin_lock_irqsave(&bnad->bna_lock, flags); + + bna_intr_status_get(&bnad->bna, intr_status); + if (!intr_status) { + spin_unlock_irqrestore(&bnad->bna_lock, flags); + return IRQ_NONE; + } + + if (BNA_IS_MBOX_ERR_INTR(intr_status)) { + bna_mbox_handler(&bnad->bna, intr_status); + if (!BNA_IS_INTX_DATA_INTR(intr_status)) { + spin_unlock_irqrestore(&bnad->bna_lock, flags); + goto done; + } + } + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + /* Process data interrupts */ + for (i = 0; i < bnad->num_rx; i++) { + rx_info = &bnad->rx_info[i]; + if (!rx_info->rx) + continue; + for (j = 0; j < bnad->num_rxp_per_rx; j++) { + rx_ctrl = &rx_info->rx_ctrl[j]; + if (rx_ctrl->ccb) + bnad_netif_rx_schedule_poll(bnad, + rx_ctrl->ccb); + } + } +done: + return IRQ_HANDLED; +} + +/* + * Called in interrupt / callback context + * with bna_lock held, so cfg_flags access is OK + */ +static void +bnad_enable_mbox_irq(struct bnad *bnad) +{ + int irq = BNAD_GET_MBOX_IRQ(bnad); + + if (!(bnad->cfg_flags & BNAD_CF_MSIX)) + return; + + if (test_and_clear_bit(BNAD_RF_MBOX_IRQ_DISABLED, &bnad->run_flags)) + enable_irq(irq); + BNAD_UPDATE_CTR(bnad, mbox_intr_enabled); +} + +/* + * Called with bnad->bna_lock held b'cos of + * bnad->cfg_flags access. + */ +void +bnad_disable_mbox_irq(struct bnad *bnad) +{ + int irq = BNAD_GET_MBOX_IRQ(bnad); + + if (!(bnad->cfg_flags & BNAD_CF_MSIX)) + return; + + if (!test_and_set_bit(BNAD_RF_MBOX_IRQ_DISABLED, &bnad->run_flags)) + disable_irq_nosync(irq); + BNAD_UPDATE_CTR(bnad, mbox_intr_disabled); +} + +/* Control Path Handlers */ + +/* Callbacks */ +void +bnad_cb_device_enable_mbox_intr(struct bnad *bnad) +{ + bnad_enable_mbox_irq(bnad); +} + +void +bnad_cb_device_disable_mbox_intr(struct bnad *bnad) +{ + bnad_disable_mbox_irq(bnad); +} + +void +bnad_cb_device_enabled(struct bnad *bnad, enum bna_cb_status status) +{ + complete(&bnad->bnad_completions.ioc_comp); + bnad->bnad_completions.ioc_comp_status = status; +} + +void +bnad_cb_device_disabled(struct bnad *bnad, enum bna_cb_status status) +{ + complete(&bnad->bnad_completions.ioc_comp); + bnad->bnad_completions.ioc_comp_status = status; +} + +static void +bnad_cb_port_disabled(void *arg, enum bna_cb_status status) +{ + struct bnad *bnad = (struct bnad *)arg; + + complete(&bnad->bnad_completions.port_comp); + + netif_carrier_off(bnad->netdev); +} + +void +bnad_cb_port_link_status(struct bnad *bnad, + enum bna_link_status link_status) +{ + bool link_up = 0; + + link_up = (link_status == BNA_LINK_UP) || (link_status == BNA_CEE_UP); + + if (link_status == BNA_CEE_UP) { + set_bit(BNAD_RF_CEE_RUNNING, &bnad->run_flags); + BNAD_UPDATE_CTR(bnad, cee_up); + } else + clear_bit(BNAD_RF_CEE_RUNNING, &bnad->run_flags); + + if (link_up) { + if (!netif_carrier_ok(bnad->netdev)) { + pr_warn("bna: %s link up\n", + bnad->netdev->name); + netif_carrier_on(bnad->netdev); + BNAD_UPDATE_CTR(bnad, link_toggle); + if (test_bit(BNAD_RF_TX_STARTED, &bnad->run_flags)) { + /* Force an immediate Transmit Schedule */ + pr_info("bna: %s TX_STARTED\n", + bnad->netdev->name); + netif_wake_queue(bnad->netdev); + BNAD_UPDATE_CTR(bnad, netif_queue_wakeup); + } else { + netif_stop_queue(bnad->netdev); + BNAD_UPDATE_CTR(bnad, netif_queue_stop); + } + } + } else { + if (netif_carrier_ok(bnad->netdev)) { + pr_warn("bna: %s link down\n", + bnad->netdev->name); + netif_carrier_off(bnad->netdev); + BNAD_UPDATE_CTR(bnad, link_toggle); + } + } +} + +static void +bnad_cb_tx_disabled(void *arg, struct bna_tx *tx, + enum bna_cb_status status) +{ + struct bnad *bnad = (struct bnad *)arg; + + complete(&bnad->bnad_completions.tx_comp); +} + +static void +bnad_cb_tcb_setup(struct bnad *bnad, struct bna_tcb *tcb) +{ + struct bnad_tx_info *tx_info = + (struct bnad_tx_info *)tcb->txq->tx->priv; + struct bnad_unmap_q *unmap_q = tcb->unmap_q; + + tx_info->tcb[tcb->id] = tcb; + unmap_q->producer_index = 0; + unmap_q->consumer_index = 0; + unmap_q->q_depth = BNAD_TX_UNMAPQ_DEPTH; +} + +static void +bnad_cb_tcb_destroy(struct bnad *bnad, struct bna_tcb *tcb) +{ + struct bnad_tx_info *tx_info = + (struct bnad_tx_info *)tcb->txq->tx->priv; + + tx_info->tcb[tcb->id] = NULL; +} + +static void +bnad_cb_rcb_setup(struct bnad *bnad, struct bna_rcb *rcb) +{ + struct bnad_unmap_q *unmap_q = rcb->unmap_q; + + unmap_q->producer_index = 0; + unmap_q->consumer_index = 0; + unmap_q->q_depth = BNAD_RX_UNMAPQ_DEPTH; +} + +static void +bnad_cb_ccb_setup(struct bnad *bnad, struct bna_ccb *ccb) +{ + struct bnad_rx_info *rx_info = + (struct bnad_rx_info *)ccb->cq->rx->priv; + + rx_info->rx_ctrl[ccb->id].ccb = ccb; + ccb->ctrl = &rx_info->rx_ctrl[ccb->id]; +} + +static void +bnad_cb_ccb_destroy(struct bnad *bnad, struct bna_ccb *ccb) +{ + struct bnad_rx_info *rx_info = + (struct bnad_rx_info *)ccb->cq->rx->priv; + + rx_info->rx_ctrl[ccb->id].ccb = NULL; +} + +static void +bnad_cb_tx_stall(struct bnad *bnad, struct bna_tcb *tcb) +{ + struct bnad_tx_info *tx_info = + (struct bnad_tx_info *)tcb->txq->tx->priv; + + if (tx_info != &bnad->tx_info[0]) + return; + + clear_bit(BNAD_RF_TX_STARTED, &bnad->run_flags); + netif_stop_queue(bnad->netdev); + pr_info("bna: %s TX_STOPPED\n", bnad->netdev->name); +} + +static void +bnad_cb_tx_resume(struct bnad *bnad, struct bna_tcb *tcb) +{ + if (test_and_set_bit(BNAD_RF_TX_STARTED, &bnad->run_flags)) + return; + + if (netif_carrier_ok(bnad->netdev)) { + pr_info("bna: %s TX_STARTED\n", bnad->netdev->name); + netif_wake_queue(bnad->netdev); + BNAD_UPDATE_CTR(bnad, netif_queue_wakeup); + } +} + +static void +bnad_cb_tx_cleanup(struct bnad *bnad, struct bna_tcb *tcb) +{ + struct bnad_unmap_q *unmap_q = tcb->unmap_q; + + if (!tcb || (!tcb->unmap_q)) + return; + + if (!unmap_q->unmap_array) + return; + + if (test_and_set_bit(BNAD_TXQ_FREE_SENT, &tcb->flags)) + return; + + bnad_free_all_txbufs(bnad, tcb); + + unmap_q->producer_index = 0; + unmap_q->consumer_index = 0; + + smp_mb__before_clear_bit(); + clear_bit(BNAD_TXQ_FREE_SENT, &tcb->flags); +} + +static void +bnad_cb_rx_cleanup(struct bnad *bnad, + struct bna_ccb *ccb) +{ + bnad_cq_cmpl_init(bnad, ccb); + + bnad_free_rxbufs(bnad, ccb->rcb[0]); + clear_bit(BNAD_RXQ_STARTED, &ccb->rcb[0]->flags); + + if (ccb->rcb[1]) { + bnad_free_rxbufs(bnad, ccb->rcb[1]); + clear_bit(BNAD_RXQ_STARTED, &ccb->rcb[1]->flags); + } +} + +static void +bnad_cb_rx_post(struct bnad *bnad, struct bna_rcb *rcb) +{ + struct bnad_unmap_q *unmap_q = rcb->unmap_q; + + set_bit(BNAD_RXQ_STARTED, &rcb->flags); + + /* Now allocate & post buffers for this RCB */ + /* !!Allocation in callback context */ + if (!test_and_set_bit(BNAD_RXQ_REFILL, &rcb->flags)) { + if (BNA_QE_FREE_CNT(unmap_q, unmap_q->q_depth) + >> BNAD_RXQ_REFILL_THRESHOLD_SHIFT) + bnad_alloc_n_post_rxbufs(bnad, rcb); + smp_mb__before_clear_bit(); + clear_bit(BNAD_RXQ_REFILL, &rcb->flags); + } +} + +static void +bnad_cb_rx_disabled(void *arg, struct bna_rx *rx, + enum bna_cb_status status) +{ + struct bnad *bnad = (struct bnad *)arg; + + complete(&bnad->bnad_completions.rx_comp); +} + +static void +bnad_cb_rx_mcast_add(struct bnad *bnad, struct bna_rx *rx, + enum bna_cb_status status) +{ + bnad->bnad_completions.mcast_comp_status = status; + complete(&bnad->bnad_completions.mcast_comp); +} + +void +bnad_cb_stats_get(struct bnad *bnad, enum bna_cb_status status, + struct bna_stats *stats) +{ + if (status == BNA_CB_SUCCESS) + BNAD_UPDATE_CTR(bnad, hw_stats_updates); + + if (!netif_running(bnad->netdev) || + !test_bit(BNAD_RF_STATS_TIMER_RUNNING, &bnad->run_flags)) + return; + + mod_timer(&bnad->stats_timer, + jiffies + msecs_to_jiffies(BNAD_STATS_TIMER_FREQ)); +} + +void +bnad_cb_stats_clr(struct bnad *bnad) +{ +} + +/* Resource allocation, free functions */ + +static void +bnad_mem_free(struct bnad *bnad, + struct bna_mem_info *mem_info) +{ + int i; + dma_addr_t dma_pa; + + if (mem_info->mdl == NULL) + return; + + for (i = 0; i < mem_info->num; i++) { + if (mem_info->mdl[i].kva != NULL) { + if (mem_info->mem_type == BNA_MEM_T_DMA) { + BNA_GET_DMA_ADDR(&(mem_info->mdl[i].dma), + dma_pa); + pci_free_consistent(bnad->pcidev, + mem_info->mdl[i].len, + mem_info->mdl[i].kva, dma_pa); + } else + kfree(mem_info->mdl[i].kva); + } + } + kfree(mem_info->mdl); + mem_info->mdl = NULL; +} + +static int +bnad_mem_alloc(struct bnad *bnad, + struct bna_mem_info *mem_info) +{ + int i; + dma_addr_t dma_pa; + + if ((mem_info->num == 0) || (mem_info->len == 0)) { + mem_info->mdl = NULL; + return 0; + } + + mem_info->mdl = kcalloc(mem_info->num, sizeof(struct bna_mem_descr), + GFP_KERNEL); + if (mem_info->mdl == NULL) + return -ENOMEM; + + if (mem_info->mem_type == BNA_MEM_T_DMA) { + for (i = 0; i < mem_info->num; i++) { + mem_info->mdl[i].len = mem_info->len; + mem_info->mdl[i].kva = + pci_alloc_consistent(bnad->pcidev, + mem_info->len, &dma_pa); + + if (mem_info->mdl[i].kva == NULL) + goto err_return; + + BNA_SET_DMA_ADDR(dma_pa, + &(mem_info->mdl[i].dma)); + } + } else { + for (i = 0; i < mem_info->num; i++) { + mem_info->mdl[i].len = mem_info->len; + mem_info->mdl[i].kva = kzalloc(mem_info->len, + GFP_KERNEL); + if (mem_info->mdl[i].kva == NULL) + goto err_return; + } + } + + return 0; + +err_return: + bnad_mem_free(bnad, mem_info); + return -ENOMEM; +} + +/* Free IRQ for Mailbox */ +static void +bnad_mbox_irq_free(struct bnad *bnad, + struct bna_intr_info *intr_info) +{ + int irq; + unsigned long flags; + + if (intr_info->idl == NULL) + return; + + spin_lock_irqsave(&bnad->bna_lock, flags); + + bnad_disable_mbox_irq(bnad); + + irq = BNAD_GET_MBOX_IRQ(bnad); + free_irq(irq, bnad->netdev); + + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + kfree(intr_info->idl); +} + +/* + * Allocates IRQ for Mailbox, but keep it disabled + * This will be enabled once we get the mbox enable callback + * from bna + */ +static int +bnad_mbox_irq_alloc(struct bnad *bnad, + struct bna_intr_info *intr_info) +{ + int err; + unsigned long flags; + u32 irq; + irq_handler_t irq_handler; + + /* Mbox should use only 1 vector */ + + intr_info->idl = kzalloc(sizeof(*(intr_info->idl)), GFP_KERNEL); + if (!intr_info->idl) + return -ENOMEM; + + spin_lock_irqsave(&bnad->bna_lock, flags); + if (bnad->cfg_flags & BNAD_CF_MSIX) { + irq_handler = (irq_handler_t)bnad_msix_mbox_handler; + irq = bnad->msix_table[bnad->msix_num - 1].vector; + flags = 0; + intr_info->intr_type = BNA_INTR_T_MSIX; + intr_info->idl[0].vector = bnad->msix_num - 1; + } else { + irq_handler = (irq_handler_t)bnad_isr; + irq = bnad->pcidev->irq; + flags = IRQF_SHARED; + intr_info->intr_type = BNA_INTR_T_INTX; + /* intr_info->idl.vector = 0 ? */ + } + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + sprintf(bnad->mbox_irq_name, "%s", BNAD_NAME); + + err = request_irq(irq, irq_handler, flags, + bnad->mbox_irq_name, bnad->netdev); + if (err) { + kfree(intr_info->idl); + intr_info->idl = NULL; + return err; + } + + spin_lock_irqsave(&bnad->bna_lock, flags); + bnad_disable_mbox_irq(bnad); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + return 0; +} + +static void +bnad_txrx_irq_free(struct bnad *bnad, struct bna_intr_info *intr_info) +{ + kfree(intr_info->idl); + intr_info->idl = NULL; +} + +/* Allocates Interrupt Descriptor List for MSIX/INT-X vectors */ +static int +bnad_txrx_irq_alloc(struct bnad *bnad, enum bnad_intr_source src, + uint txrx_id, struct bna_intr_info *intr_info) +{ + int i, vector_start = 0; + u32 cfg_flags; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + cfg_flags = bnad->cfg_flags; + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + if (cfg_flags & BNAD_CF_MSIX) { + intr_info->intr_type = BNA_INTR_T_MSIX; + intr_info->idl = kcalloc(intr_info->num, + sizeof(struct bna_intr_descr), + GFP_KERNEL); + if (!intr_info->idl) + return -ENOMEM; + + switch (src) { + case BNAD_INTR_TX: + vector_start = txrx_id; + break; + + case BNAD_INTR_RX: + vector_start = bnad->num_tx * bnad->num_txq_per_tx + + txrx_id; + break; + + default: + BUG(); + } + + for (i = 0; i < intr_info->num; i++) + intr_info->idl[i].vector = vector_start + i; + } else { + intr_info->intr_type = BNA_INTR_T_INTX; + intr_info->num = 1; + intr_info->idl = kcalloc(intr_info->num, + sizeof(struct bna_intr_descr), + GFP_KERNEL); + if (!intr_info->idl) + return -ENOMEM; + + switch (src) { + case BNAD_INTR_TX: + intr_info->idl[0].vector = 0x1; /* Bit mask : Tx IB */ + break; + + case BNAD_INTR_RX: + intr_info->idl[0].vector = 0x2; /* Bit mask : Rx IB */ + break; + } + } + return 0; +} + +/** + * NOTE: Should be called for MSIX only + * Unregisters Tx MSIX vector(s) from the kernel + */ +static void +bnad_tx_msix_unregister(struct bnad *bnad, struct bnad_tx_info *tx_info, + int num_txqs) +{ + int i; + int vector_num; + + for (i = 0; i < num_txqs; i++) { + if (tx_info->tcb[i] == NULL) + continue; + + vector_num = tx_info->tcb[i]->intr_vector; + free_irq(bnad->msix_table[vector_num].vector, tx_info->tcb[i]); + } +} + +/** + * NOTE: Should be called for MSIX only + * Registers Tx MSIX vector(s) and ISR(s), cookie with the kernel + */ +static int +bnad_tx_msix_register(struct bnad *bnad, struct bnad_tx_info *tx_info, + uint tx_id, int num_txqs) +{ + int i; + int err; + int vector_num; + + for (i = 0; i < num_txqs; i++) { + vector_num = tx_info->tcb[i]->intr_vector; + sprintf(tx_info->tcb[i]->name, "%s TXQ %d", bnad->netdev->name, + tx_id + tx_info->tcb[i]->id); + err = request_irq(bnad->msix_table[vector_num].vector, + (irq_handler_t)bnad_msix_tx, 0, + tx_info->tcb[i]->name, + tx_info->tcb[i]); + if (err) + goto err_return; + } + + return 0; + +err_return: + if (i > 0) + bnad_tx_msix_unregister(bnad, tx_info, (i - 1)); + return -1; +} + +/** + * NOTE: Should be called for MSIX only + * Unregisters Rx MSIX vector(s) from the kernel + */ +static void +bnad_rx_msix_unregister(struct bnad *bnad, struct bnad_rx_info *rx_info, + int num_rxps) +{ + int i; + int vector_num; + + for (i = 0; i < num_rxps; i++) { + if (rx_info->rx_ctrl[i].ccb == NULL) + continue; + + vector_num = rx_info->rx_ctrl[i].ccb->intr_vector; + free_irq(bnad->msix_table[vector_num].vector, + rx_info->rx_ctrl[i].ccb); + } +} + +/** + * NOTE: Should be called for MSIX only + * Registers Tx MSIX vector(s) and ISR(s), cookie with the kernel + */ +static int +bnad_rx_msix_register(struct bnad *bnad, struct bnad_rx_info *rx_info, + uint rx_id, int num_rxps) +{ + int i; + int err; + int vector_num; + + for (i = 0; i < num_rxps; i++) { + vector_num = rx_info->rx_ctrl[i].ccb->intr_vector; + sprintf(rx_info->rx_ctrl[i].ccb->name, "%s CQ %d", + bnad->netdev->name, + rx_id + rx_info->rx_ctrl[i].ccb->id); + err = request_irq(bnad->msix_table[vector_num].vector, + (irq_handler_t)bnad_msix_rx, 0, + rx_info->rx_ctrl[i].ccb->name, + rx_info->rx_ctrl[i].ccb); + if (err) + goto err_return; + } + + return 0; + +err_return: + if (i > 0) + bnad_rx_msix_unregister(bnad, rx_info, (i - 1)); + return -1; +} + +/* Free Tx object Resources */ +static void +bnad_tx_res_free(struct bnad *bnad, struct bna_res_info *res_info) +{ + int i; + + for (i = 0; i < BNA_TX_RES_T_MAX; i++) { + if (res_info[i].res_type == BNA_RES_T_MEM) + bnad_mem_free(bnad, &res_info[i].res_u.mem_info); + else if (res_info[i].res_type == BNA_RES_T_INTR) + bnad_txrx_irq_free(bnad, &res_info[i].res_u.intr_info); + } +} + +/* Allocates memory and interrupt resources for Tx object */ +static int +bnad_tx_res_alloc(struct bnad *bnad, struct bna_res_info *res_info, + uint tx_id) +{ + int i, err = 0; + + for (i = 0; i < BNA_TX_RES_T_MAX; i++) { + if (res_info[i].res_type == BNA_RES_T_MEM) + err = bnad_mem_alloc(bnad, + &res_info[i].res_u.mem_info); + else if (res_info[i].res_type == BNA_RES_T_INTR) + err = bnad_txrx_irq_alloc(bnad, BNAD_INTR_TX, tx_id, + &res_info[i].res_u.intr_info); + if (err) + goto err_return; + } + return 0; + +err_return: + bnad_tx_res_free(bnad, res_info); + return err; +} + +/* Free Rx object Resources */ +static void +bnad_rx_res_free(struct bnad *bnad, struct bna_res_info *res_info) +{ + int i; + + for (i = 0; i < BNA_RX_RES_T_MAX; i++) { + if (res_info[i].res_type == BNA_RES_T_MEM) + bnad_mem_free(bnad, &res_info[i].res_u.mem_info); + else if (res_info[i].res_type == BNA_RES_T_INTR) + bnad_txrx_irq_free(bnad, &res_info[i].res_u.intr_info); + } +} + +/* Allocates memory and interrupt resources for Rx object */ +static int +bnad_rx_res_alloc(struct bnad *bnad, struct bna_res_info *res_info, + uint rx_id) +{ + int i, err = 0; + + /* All memory needs to be allocated before setup_ccbs */ + for (i = 0; i < BNA_RX_RES_T_MAX; i++) { + if (res_info[i].res_type == BNA_RES_T_MEM) + err = bnad_mem_alloc(bnad, + &res_info[i].res_u.mem_info); + else if (res_info[i].res_type == BNA_RES_T_INTR) + err = bnad_txrx_irq_alloc(bnad, BNAD_INTR_RX, rx_id, + &res_info[i].res_u.intr_info); + if (err) + goto err_return; + } + return 0; + +err_return: + bnad_rx_res_free(bnad, res_info); + return err; +} + +/* Timer callbacks */ +/* a) IOC timer */ +static void +bnad_ioc_timeout(unsigned long data) +{ + struct bnad *bnad = (struct bnad *)data; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + bfa_ioc_timeout((void *) &bnad->bna.device.ioc); + spin_unlock_irqrestore(&bnad->bna_lock, flags); +} + +static void +bnad_ioc_hb_check(unsigned long data) +{ + struct bnad *bnad = (struct bnad *)data; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + bfa_ioc_hb_check((void *) &bnad->bna.device.ioc); + spin_unlock_irqrestore(&bnad->bna_lock, flags); +} + +static void +bnad_ioc_sem_timeout(unsigned long data) +{ + struct bnad *bnad = (struct bnad *)data; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + bfa_ioc_sem_timeout((void *) &bnad->bna.device.ioc); + spin_unlock_irqrestore(&bnad->bna_lock, flags); +} + +/* + * All timer routines use bnad->bna_lock to protect against + * the following race, which may occur in case of no locking: + * Time CPU m CPU n + * 0 1 = test_bit + * 1 clear_bit + * 2 del_timer_sync + * 3 mod_timer + */ + +/* b) Dynamic Interrupt Moderation Timer */ +static void +bnad_dim_timeout(unsigned long data) +{ + struct bnad *bnad = (struct bnad *)data; + struct bnad_rx_info *rx_info; + struct bnad_rx_ctrl *rx_ctrl; + int i, j; + unsigned long flags; + + if (!netif_carrier_ok(bnad->netdev)) + return; + + spin_lock_irqsave(&bnad->bna_lock, flags); + for (i = 0; i < bnad->num_rx; i++) { + rx_info = &bnad->rx_info[i]; + if (!rx_info->rx) + continue; + for (j = 0; j < bnad->num_rxp_per_rx; j++) { + rx_ctrl = &rx_info->rx_ctrl[j]; + if (!rx_ctrl->ccb) + continue; + bna_rx_dim_update(rx_ctrl->ccb); + } + } + + /* Check for BNAD_CF_DIM_ENABLED, does not eleminate a race */ + if (test_bit(BNAD_RF_DIM_TIMER_RUNNING, &bnad->run_flags)) + mod_timer(&bnad->dim_timer, + jiffies + msecs_to_jiffies(BNAD_DIM_TIMER_FREQ)); + spin_unlock_irqrestore(&bnad->bna_lock, flags); +} + +/* c) Statistics Timer */ +static void +bnad_stats_timeout(unsigned long data) +{ + struct bnad *bnad = (struct bnad *)data; + unsigned long flags; + + if (!netif_running(bnad->netdev) || + !test_bit(BNAD_RF_STATS_TIMER_RUNNING, &bnad->run_flags)) + return; + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_stats_get(&bnad->bna); + spin_unlock_irqrestore(&bnad->bna_lock, flags); +} + +/* + * Set up timer for DIM + * Called with bnad->bna_lock held + */ +void +bnad_dim_timer_start(struct bnad *bnad) +{ + if (bnad->cfg_flags & BNAD_CF_DIM_ENABLED && + !test_bit(BNAD_RF_DIM_TIMER_RUNNING, &bnad->run_flags)) { + setup_timer(&bnad->dim_timer, bnad_dim_timeout, + (unsigned long)bnad); + set_bit(BNAD_RF_DIM_TIMER_RUNNING, &bnad->run_flags); + mod_timer(&bnad->dim_timer, + jiffies + msecs_to_jiffies(BNAD_DIM_TIMER_FREQ)); + } +} + +/* + * Set up timer for statistics + * Called with mutex_lock(&bnad->conf_mutex) held + */ +static void +bnad_stats_timer_start(struct bnad *bnad) +{ + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + if (!test_and_set_bit(BNAD_RF_STATS_TIMER_RUNNING, &bnad->run_flags)) { + setup_timer(&bnad->stats_timer, bnad_stats_timeout, + (unsigned long)bnad); + mod_timer(&bnad->stats_timer, + jiffies + msecs_to_jiffies(BNAD_STATS_TIMER_FREQ)); + } + spin_unlock_irqrestore(&bnad->bna_lock, flags); + +} + +/* + * Stops the stats timer + * Called with mutex_lock(&bnad->conf_mutex) held + */ +static void +bnad_stats_timer_stop(struct bnad *bnad) +{ + int to_del = 0; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + if (test_and_clear_bit(BNAD_RF_STATS_TIMER_RUNNING, &bnad->run_flags)) + to_del = 1; + spin_unlock_irqrestore(&bnad->bna_lock, flags); + if (to_del) + del_timer_sync(&bnad->stats_timer); +} + +/* Utilities */ + +static void +bnad_netdev_mc_list_get(struct net_device *netdev, u8 *mc_list) +{ + int i = 1; /* Index 0 has broadcast address */ + struct netdev_hw_addr *mc_addr; + + netdev_for_each_mc_addr(mc_addr, netdev) { + memcpy(&mc_list[i * ETH_ALEN], &mc_addr->addr[0], + ETH_ALEN); + i++; + } +} + +static int +bnad_napi_poll_rx(struct napi_struct *napi, int budget) +{ + struct bnad_rx_ctrl *rx_ctrl = + container_of(napi, struct bnad_rx_ctrl, napi); + struct bna_ccb *ccb; + struct bnad *bnad; + int rcvd = 0; + + ccb = rx_ctrl->ccb; + + bnad = ccb->bnad; + + if (!netif_carrier_ok(bnad->netdev)) + goto poll_exit; + + rcvd = bnad_poll_cq(bnad, ccb, budget); + if (rcvd == budget) + return rcvd; + +poll_exit: + napi_complete((napi)); + + BNAD_UPDATE_CTR(bnad, netif_rx_complete); + + bnad_enable_rx_irq(bnad, ccb); + return rcvd; +} + +static int +bnad_napi_poll_txrx(struct napi_struct *napi, int budget) +{ + struct bnad_rx_ctrl *rx_ctrl = + container_of(napi, struct bnad_rx_ctrl, napi); + struct bna_ccb *ccb; + struct bnad *bnad; + int rcvd = 0; + int i, j; + + ccb = rx_ctrl->ccb; + + bnad = ccb->bnad; + + if (!netif_carrier_ok(bnad->netdev)) + goto poll_exit; + + /* Handle Tx Completions, if any */ + for (i = 0; i < bnad->num_tx; i++) { + for (j = 0; j < bnad->num_txq_per_tx; j++) + bnad_tx(bnad, bnad->tx_info[i].tcb[j]); + } + + /* Handle Rx Completions */ + rcvd = bnad_poll_cq(bnad, ccb, budget); + if (rcvd == budget) + return rcvd; +poll_exit: + napi_complete((napi)); + + BNAD_UPDATE_CTR(bnad, netif_rx_complete); + + bnad_enable_txrx_irqs(bnad); + return rcvd; +} + +static void +bnad_napi_enable(struct bnad *bnad, u32 rx_id) +{ + int (*napi_poll) (struct napi_struct *, int); + struct bnad_rx_ctrl *rx_ctrl; + int i; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + if (bnad->cfg_flags & BNAD_CF_MSIX) + napi_poll = bnad_napi_poll_rx; + else + napi_poll = bnad_napi_poll_txrx; + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + /* Initialize & enable NAPI */ + for (i = 0; i < bnad->num_rxp_per_rx; i++) { + rx_ctrl = &bnad->rx_info[rx_id].rx_ctrl[i]; + netif_napi_add(bnad->netdev, &rx_ctrl->napi, + napi_poll, 64); + napi_enable(&rx_ctrl->napi); + } +} + +static void +bnad_napi_disable(struct bnad *bnad, u32 rx_id) +{ + int i; + + /* First disable and then clean up */ + for (i = 0; i < bnad->num_rxp_per_rx; i++) { + napi_disable(&bnad->rx_info[rx_id].rx_ctrl[i].napi); + netif_napi_del(&bnad->rx_info[rx_id].rx_ctrl[i].napi); + } +} + +/* Should be held with conf_lock held */ +void +bnad_cleanup_tx(struct bnad *bnad, uint tx_id) +{ + struct bnad_tx_info *tx_info = &bnad->tx_info[tx_id]; + struct bna_res_info *res_info = &bnad->tx_res_info[tx_id].res_info[0]; + unsigned long flags; + + if (!tx_info->tx) + return; + + init_completion(&bnad->bnad_completions.tx_comp); + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_tx_disable(tx_info->tx, BNA_HARD_CLEANUP, bnad_cb_tx_disabled); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + wait_for_completion(&bnad->bnad_completions.tx_comp); + + if (tx_info->tcb[0]->intr_type == BNA_INTR_T_MSIX) + bnad_tx_msix_unregister(bnad, tx_info, + bnad->num_txq_per_tx); + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_tx_destroy(tx_info->tx); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + tx_info->tx = NULL; + + if (0 == tx_id) + tasklet_kill(&bnad->tx_free_tasklet); + + bnad_tx_res_free(bnad, res_info); +} + +/* Should be held with conf_lock held */ +int +bnad_setup_tx(struct bnad *bnad, uint tx_id) +{ + int err; + struct bnad_tx_info *tx_info = &bnad->tx_info[tx_id]; + struct bna_res_info *res_info = &bnad->tx_res_info[tx_id].res_info[0]; + struct bna_intr_info *intr_info = + &res_info[BNA_TX_RES_INTR_T_TXCMPL].res_u.intr_info; + struct bna_tx_config *tx_config = &bnad->tx_config[tx_id]; + struct bna_tx_event_cbfn tx_cbfn; + struct bna_tx *tx; + unsigned long flags; + + /* Initialize the Tx object configuration */ + tx_config->num_txq = bnad->num_txq_per_tx; + tx_config->txq_depth = bnad->txq_depth; + tx_config->tx_type = BNA_TX_T_REGULAR; + + /* Initialize the tx event handlers */ + tx_cbfn.tcb_setup_cbfn = bnad_cb_tcb_setup; + tx_cbfn.tcb_destroy_cbfn = bnad_cb_tcb_destroy; + tx_cbfn.tx_stall_cbfn = bnad_cb_tx_stall; + tx_cbfn.tx_resume_cbfn = bnad_cb_tx_resume; + tx_cbfn.tx_cleanup_cbfn = bnad_cb_tx_cleanup; + + /* Get BNA's resource requirement for one tx object */ + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_tx_res_req(bnad->num_txq_per_tx, + bnad->txq_depth, res_info); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + /* Fill Unmap Q memory requirements */ + BNAD_FILL_UNMAPQ_MEM_REQ( + &res_info[BNA_TX_RES_MEM_T_UNMAPQ], + bnad->num_txq_per_tx, + BNAD_TX_UNMAPQ_DEPTH); + + /* Allocate resources */ + err = bnad_tx_res_alloc(bnad, res_info, tx_id); + if (err) + return err; + + /* Ask BNA to create one Tx object, supplying required resources */ + spin_lock_irqsave(&bnad->bna_lock, flags); + tx = bna_tx_create(&bnad->bna, bnad, tx_config, &tx_cbfn, res_info, + tx_info); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + if (!tx) + goto err_return; + tx_info->tx = tx; + + /* Register ISR for the Tx object */ + if (intr_info->intr_type == BNA_INTR_T_MSIX) { + err = bnad_tx_msix_register(bnad, tx_info, + tx_id, bnad->num_txq_per_tx); + if (err) + goto err_return; + } + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_tx_enable(tx); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + return 0; + +err_return: + bnad_tx_res_free(bnad, res_info); + return err; +} + +/* Setup the rx config for bna_rx_create */ +/* bnad decides the configuration */ +static void +bnad_init_rx_config(struct bnad *bnad, struct bna_rx_config *rx_config) +{ + rx_config->rx_type = BNA_RX_T_REGULAR; + rx_config->num_paths = bnad->num_rxp_per_rx; + + if (bnad->num_rxp_per_rx > 1) { + rx_config->rss_status = BNA_STATUS_T_ENABLED; + rx_config->rss_config.hash_type = + (BFI_RSS_T_V4_TCP | + BFI_RSS_T_V6_TCP | + BFI_RSS_T_V4_IP | + BFI_RSS_T_V6_IP); + rx_config->rss_config.hash_mask = + bnad->num_rxp_per_rx - 1; + get_random_bytes(rx_config->rss_config.toeplitz_hash_key, + sizeof(rx_config->rss_config.toeplitz_hash_key)); + } else { + rx_config->rss_status = BNA_STATUS_T_DISABLED; + memset(&rx_config->rss_config, 0, + sizeof(rx_config->rss_config)); + } + rx_config->rxp_type = BNA_RXP_SLR; + rx_config->q_depth = bnad->rxq_depth; + + rx_config->small_buff_size = BFI_SMALL_RXBUF_SIZE; + + rx_config->vlan_strip_status = BNA_STATUS_T_ENABLED; +} + +/* Called with mutex_lock(&bnad->conf_mutex) held */ +void +bnad_cleanup_rx(struct bnad *bnad, uint rx_id) +{ + struct bnad_rx_info *rx_info = &bnad->rx_info[rx_id]; + struct bna_rx_config *rx_config = &bnad->rx_config[rx_id]; + struct bna_res_info *res_info = &bnad->rx_res_info[rx_id].res_info[0]; + unsigned long flags; + int dim_timer_del = 0; + + if (!rx_info->rx) + return; + + if (0 == rx_id) { + spin_lock_irqsave(&bnad->bna_lock, flags); + dim_timer_del = bnad_dim_timer_running(bnad); + if (dim_timer_del) + clear_bit(BNAD_RF_DIM_TIMER_RUNNING, &bnad->run_flags); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + if (dim_timer_del) + del_timer_sync(&bnad->dim_timer); + } + + bnad_napi_disable(bnad, rx_id); + + init_completion(&bnad->bnad_completions.rx_comp); + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_rx_disable(rx_info->rx, BNA_HARD_CLEANUP, bnad_cb_rx_disabled); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + wait_for_completion(&bnad->bnad_completions.rx_comp); + + if (rx_info->rx_ctrl[0].ccb->intr_type == BNA_INTR_T_MSIX) + bnad_rx_msix_unregister(bnad, rx_info, rx_config->num_paths); + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_rx_destroy(rx_info->rx); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + rx_info->rx = NULL; + + bnad_rx_res_free(bnad, res_info); +} + +/* Called with mutex_lock(&bnad->conf_mutex) held */ +int +bnad_setup_rx(struct bnad *bnad, uint rx_id) +{ + int err; + struct bnad_rx_info *rx_info = &bnad->rx_info[rx_id]; + struct bna_res_info *res_info = &bnad->rx_res_info[rx_id].res_info[0]; + struct bna_intr_info *intr_info = + &res_info[BNA_RX_RES_T_INTR].res_u.intr_info; + struct bna_rx_config *rx_config = &bnad->rx_config[rx_id]; + struct bna_rx_event_cbfn rx_cbfn; + struct bna_rx *rx; + unsigned long flags; + + /* Initialize the Rx object configuration */ + bnad_init_rx_config(bnad, rx_config); + + /* Initialize the Rx event handlers */ + rx_cbfn.rcb_setup_cbfn = bnad_cb_rcb_setup; + rx_cbfn.rcb_destroy_cbfn = NULL; + rx_cbfn.ccb_setup_cbfn = bnad_cb_ccb_setup; + rx_cbfn.ccb_destroy_cbfn = bnad_cb_ccb_destroy; + rx_cbfn.rx_cleanup_cbfn = bnad_cb_rx_cleanup; + rx_cbfn.rx_post_cbfn = bnad_cb_rx_post; + + /* Get BNA's resource requirement for one Rx object */ + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_rx_res_req(rx_config, res_info); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + /* Fill Unmap Q memory requirements */ + BNAD_FILL_UNMAPQ_MEM_REQ( + &res_info[BNA_RX_RES_MEM_T_UNMAPQ], + rx_config->num_paths + + ((rx_config->rxp_type == BNA_RXP_SINGLE) ? 0 : + rx_config->num_paths), BNAD_RX_UNMAPQ_DEPTH); + + /* Allocate resource */ + err = bnad_rx_res_alloc(bnad, res_info, rx_id); + if (err) + return err; + + /* Ask BNA to create one Rx object, supplying required resources */ + spin_lock_irqsave(&bnad->bna_lock, flags); + rx = bna_rx_create(&bnad->bna, bnad, rx_config, &rx_cbfn, res_info, + rx_info); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + if (!rx) + goto err_return; + rx_info->rx = rx; + + /* Register ISR for the Rx object */ + if (intr_info->intr_type == BNA_INTR_T_MSIX) { + err = bnad_rx_msix_register(bnad, rx_info, rx_id, + rx_config->num_paths); + if (err) + goto err_return; + } + + /* Enable NAPI */ + bnad_napi_enable(bnad, rx_id); + + spin_lock_irqsave(&bnad->bna_lock, flags); + if (0 == rx_id) { + /* Set up Dynamic Interrupt Moderation Vector */ + if (bnad->cfg_flags & BNAD_CF_DIM_ENABLED) + bna_rx_dim_reconfig(&bnad->bna, bna_napi_dim_vector); + + /* Enable VLAN filtering only on the default Rx */ + bna_rx_vlanfilter_enable(rx); + + /* Start the DIM timer */ + bnad_dim_timer_start(bnad); + } + + bna_rx_enable(rx); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + return 0; + +err_return: + bnad_cleanup_rx(bnad, rx_id); + return err; +} + +/* Called with conf_lock & bnad->bna_lock held */ +void +bnad_tx_coalescing_timeo_set(struct bnad *bnad) +{ + struct bnad_tx_info *tx_info; + + tx_info = &bnad->tx_info[0]; + if (!tx_info->tx) + return; + + bna_tx_coalescing_timeo_set(tx_info->tx, bnad->tx_coalescing_timeo); +} + +/* Called with conf_lock & bnad->bna_lock held */ +void +bnad_rx_coalescing_timeo_set(struct bnad *bnad) +{ + struct bnad_rx_info *rx_info; + int i; + + for (i = 0; i < bnad->num_rx; i++) { + rx_info = &bnad->rx_info[i]; + if (!rx_info->rx) + continue; + bna_rx_coalescing_timeo_set(rx_info->rx, + bnad->rx_coalescing_timeo); + } +} + +/* + * Called with bnad->bna_lock held + */ +static int +bnad_mac_addr_set_locked(struct bnad *bnad, u8 *mac_addr) +{ + int ret; + + if (!is_valid_ether_addr(mac_addr)) + return -EADDRNOTAVAIL; + + /* If datapath is down, pretend everything went through */ + if (!bnad->rx_info[0].rx) + return 0; + + ret = bna_rx_ucast_set(bnad->rx_info[0].rx, mac_addr, NULL); + if (ret != BNA_CB_SUCCESS) + return -EADDRNOTAVAIL; + + return 0; +} + +/* Should be called with conf_lock held */ +static int +bnad_enable_default_bcast(struct bnad *bnad) +{ + struct bnad_rx_info *rx_info = &bnad->rx_info[0]; + int ret; + unsigned long flags; + + init_completion(&bnad->bnad_completions.mcast_comp); + + spin_lock_irqsave(&bnad->bna_lock, flags); + ret = bna_rx_mcast_add(rx_info->rx, (u8 *)bnad_bcast_addr, + bnad_cb_rx_mcast_add); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + if (ret == BNA_CB_SUCCESS) + wait_for_completion(&bnad->bnad_completions.mcast_comp); + else + return -ENODEV; + + if (bnad->bnad_completions.mcast_comp_status != BNA_CB_SUCCESS) + return -ENODEV; + + return 0; +} + +/* Statistics utilities */ +void +bnad_netdev_qstats_fill(struct bnad *bnad) +{ + struct net_device_stats *net_stats = &bnad->net_stats; + int i, j; + + for (i = 0; i < bnad->num_rx; i++) { + for (j = 0; j < bnad->num_rxp_per_rx; j++) { + if (bnad->rx_info[i].rx_ctrl[j].ccb) { + net_stats->rx_packets += bnad->rx_info[i]. + rx_ctrl[j].ccb->rcb[0]->rxq->rx_packets; + net_stats->rx_bytes += bnad->rx_info[i]. + rx_ctrl[j].ccb->rcb[0]->rxq->rx_bytes; + if (bnad->rx_info[i].rx_ctrl[j].ccb->rcb[1] && + bnad->rx_info[i].rx_ctrl[j].ccb-> + rcb[1]->rxq) { + net_stats->rx_packets += + bnad->rx_info[i].rx_ctrl[j]. + ccb->rcb[1]->rxq->rx_packets; + net_stats->rx_bytes += + bnad->rx_info[i].rx_ctrl[j]. + ccb->rcb[1]->rxq->rx_bytes; + } + } + } + } + for (i = 0; i < bnad->num_tx; i++) { + for (j = 0; j < bnad->num_txq_per_tx; j++) { + if (bnad->tx_info[i].tcb[j]) { + net_stats->tx_packets += + bnad->tx_info[i].tcb[j]->txq->tx_packets; + net_stats->tx_bytes += + bnad->tx_info[i].tcb[j]->txq->tx_bytes; + } + } + } +} + +/* + * Must be called with the bna_lock held. + */ +void +bnad_netdev_hwstats_fill(struct bnad *bnad) +{ + struct bfi_ll_stats_mac *mac_stats; + struct net_device_stats *net_stats = &bnad->net_stats; + u64 bmap; + int i; + + mac_stats = &bnad->stats.bna_stats->hw_stats->mac_stats; + net_stats->rx_errors = + mac_stats->rx_fcs_error + mac_stats->rx_alignment_error + + mac_stats->rx_frame_length_error + mac_stats->rx_code_error + + mac_stats->rx_undersize; + net_stats->tx_errors = mac_stats->tx_fcs_error + + mac_stats->tx_undersize; + net_stats->rx_dropped = mac_stats->rx_drop; + net_stats->tx_dropped = mac_stats->tx_drop; + net_stats->multicast = mac_stats->rx_multicast; + net_stats->collisions = mac_stats->tx_total_collision; + + net_stats->rx_length_errors = mac_stats->rx_frame_length_error; + + /* receive ring buffer overflow ?? */ + + net_stats->rx_crc_errors = mac_stats->rx_fcs_error; + net_stats->rx_frame_errors = mac_stats->rx_alignment_error; + /* recv'r fifo overrun */ + bmap = (u64)bnad->stats.bna_stats->rxf_bmap[0] | + ((u64)bnad->stats.bna_stats->rxf_bmap[1] << 32); + for (i = 0; bmap && (i < BFI_LL_RXF_ID_MAX); i++) { + if (bmap & 1) { + net_stats->rx_fifo_errors = + bnad->stats.bna_stats-> + hw_stats->rxf_stats[i].frame_drops; + break; + } + bmap >>= 1; + } +} + +static void +bnad_mbox_irq_sync(struct bnad *bnad) +{ + u32 irq; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + if (bnad->cfg_flags & BNAD_CF_MSIX) + irq = bnad->msix_table[bnad->msix_num - 1].vector; + else + irq = bnad->pcidev->irq; + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + synchronize_irq(irq); +} + +/* Utility used by bnad_start_xmit, for doing TSO */ +static int +bnad_tso_prepare(struct bnad *bnad, struct sk_buff *skb) +{ + int err; + + /* SKB_GSO_TCPV4 and SKB_GSO_TCPV6 is defined since 2.6.18. */ + BUG_ON(!(skb_shinfo(skb)->gso_type == SKB_GSO_TCPV4 || + skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6)); + if (skb_header_cloned(skb)) { + err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); + if (err) { + BNAD_UPDATE_CTR(bnad, tso_err); + return err; + } + } + + /* + * For TSO, the TCP checksum field is seeded with pseudo-header sum + * excluding the length field. + */ + if (skb->protocol == htons(ETH_P_IP)) { + struct iphdr *iph = ip_hdr(skb); + + /* Do we really need these? */ + iph->tot_len = 0; + iph->check = 0; + + tcp_hdr(skb)->check = + ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0, + IPPROTO_TCP, 0); + BNAD_UPDATE_CTR(bnad, tso4); + } else { + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + + BUG_ON(!(skb->protocol == htons(ETH_P_IPV6))); + ipv6h->payload_len = 0; + tcp_hdr(skb)->check = + ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, 0, + IPPROTO_TCP, 0); + BNAD_UPDATE_CTR(bnad, tso6); + } + + return 0; +} + +/* + * Initialize Q numbers depending on Rx Paths + * Called with bnad->bna_lock held, because of cfg_flags + * access. + */ +static void +bnad_q_num_init(struct bnad *bnad) +{ + int rxps; + + rxps = min((uint)num_online_cpus(), + (uint)(BNAD_MAX_RXS * BNAD_MAX_RXPS_PER_RX)); + + if (!(bnad->cfg_flags & BNAD_CF_MSIX)) + rxps = 1; /* INTx */ + + bnad->num_rx = 1; + bnad->num_tx = 1; + bnad->num_rxp_per_rx = rxps; + bnad->num_txq_per_tx = BNAD_TXQ_NUM; +} + +/* + * Adjusts the Q numbers, given a number of msix vectors + * Give preference to RSS as opposed to Tx priority Queues, + * in such a case, just use 1 Tx Q + * Called with bnad->bna_lock held b'cos of cfg_flags access + */ +static void +bnad_q_num_adjust(struct bnad *bnad, int msix_vectors) +{ + bnad->num_txq_per_tx = 1; + if ((msix_vectors >= (bnad->num_tx * bnad->num_txq_per_tx) + + bnad_rxqs_per_cq + BNAD_MAILBOX_MSIX_VECTORS) && + (bnad->cfg_flags & BNAD_CF_MSIX)) { + bnad->num_rxp_per_rx = msix_vectors - + (bnad->num_tx * bnad->num_txq_per_tx) - + BNAD_MAILBOX_MSIX_VECTORS; + } else + bnad->num_rxp_per_rx = 1; +} + +static void +bnad_set_netdev_perm_addr(struct bnad *bnad) +{ + struct net_device *netdev = bnad->netdev; + + memcpy(netdev->perm_addr, &bnad->perm_addr, netdev->addr_len); + if (is_zero_ether_addr(netdev->dev_addr)) + memcpy(netdev->dev_addr, &bnad->perm_addr, netdev->addr_len); +} + +/* Enable / disable device */ +static void +bnad_device_disable(struct bnad *bnad) +{ + unsigned long flags; + + init_completion(&bnad->bnad_completions.ioc_comp); + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_device_disable(&bnad->bna.device, BNA_HARD_CLEANUP); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + wait_for_completion(&bnad->bnad_completions.ioc_comp); + +} + +static int +bnad_device_enable(struct bnad *bnad) +{ + int err = 0; + unsigned long flags; + + init_completion(&bnad->bnad_completions.ioc_comp); + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_device_enable(&bnad->bna.device); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + wait_for_completion(&bnad->bnad_completions.ioc_comp); + + if (bnad->bnad_completions.ioc_comp_status) + err = bnad->bnad_completions.ioc_comp_status; + + return err; +} + +/* Free BNA resources */ +static void +bnad_res_free(struct bnad *bnad) +{ + int i; + struct bna_res_info *res_info = &bnad->res_info[0]; + + for (i = 0; i < BNA_RES_T_MAX; i++) { + if (res_info[i].res_type == BNA_RES_T_MEM) + bnad_mem_free(bnad, &res_info[i].res_u.mem_info); + else + bnad_mbox_irq_free(bnad, &res_info[i].res_u.intr_info); + } +} + +/* Allocates memory and interrupt resources for BNA */ +static int +bnad_res_alloc(struct bnad *bnad) +{ + int i, err; + struct bna_res_info *res_info = &bnad->res_info[0]; + + for (i = 0; i < BNA_RES_T_MAX; i++) { + if (res_info[i].res_type == BNA_RES_T_MEM) + err = bnad_mem_alloc(bnad, &res_info[i].res_u.mem_info); + else + err = bnad_mbox_irq_alloc(bnad, + &res_info[i].res_u.intr_info); + if (err) + goto err_return; + } + return 0; + +err_return: + bnad_res_free(bnad); + return err; +} + +/* Interrupt enable / disable */ +static void +bnad_enable_msix(struct bnad *bnad) +{ + int i, ret; + u32 tot_msix_num; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + if (!(bnad->cfg_flags & BNAD_CF_MSIX)) { + spin_unlock_irqrestore(&bnad->bna_lock, flags); + return; + } + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + if (bnad->msix_table) + return; + + tot_msix_num = bnad->msix_num + bnad->msix_diag_num; + + bnad->msix_table = + kcalloc(tot_msix_num, sizeof(struct msix_entry), GFP_KERNEL); + + if (!bnad->msix_table) + goto intx_mode; + + for (i = 0; i < tot_msix_num; i++) + bnad->msix_table[i].entry = i; + + ret = pci_enable_msix(bnad->pcidev, bnad->msix_table, tot_msix_num); + if (ret > 0) { + /* Not enough MSI-X vectors. */ + + spin_lock_irqsave(&bnad->bna_lock, flags); + /* ret = #of vectors that we got */ + bnad_q_num_adjust(bnad, ret); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + bnad->msix_num = (bnad->num_tx * bnad->num_txq_per_tx) + + (bnad->num_rx + * bnad->num_rxp_per_rx) + + BNAD_MAILBOX_MSIX_VECTORS; + tot_msix_num = bnad->msix_num + bnad->msix_diag_num; + + /* Try once more with adjusted numbers */ + /* If this fails, fall back to INTx */ + ret = pci_enable_msix(bnad->pcidev, bnad->msix_table, + tot_msix_num); + if (ret) + goto intx_mode; + + } else if (ret < 0) + goto intx_mode; + return; + +intx_mode: + + kfree(bnad->msix_table); + bnad->msix_table = NULL; + bnad->msix_num = 0; + bnad->msix_diag_num = 0; + spin_lock_irqsave(&bnad->bna_lock, flags); + bnad->cfg_flags &= ~BNAD_CF_MSIX; + bnad_q_num_init(bnad); + spin_unlock_irqrestore(&bnad->bna_lock, flags); +} + +static void +bnad_disable_msix(struct bnad *bnad) +{ + u32 cfg_flags; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + cfg_flags = bnad->cfg_flags; + if (bnad->cfg_flags & BNAD_CF_MSIX) + bnad->cfg_flags &= ~BNAD_CF_MSIX; + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + if (cfg_flags & BNAD_CF_MSIX) { + pci_disable_msix(bnad->pcidev); + kfree(bnad->msix_table); + bnad->msix_table = NULL; + } +} + +/* Netdev entry points */ +static int +bnad_open(struct net_device *netdev) +{ + int err; + struct bnad *bnad = netdev_priv(netdev); + struct bna_pause_config pause_config; + int mtu; + unsigned long flags; + + mutex_lock(&bnad->conf_mutex); + + /* Tx */ + err = bnad_setup_tx(bnad, 0); + if (err) + goto err_return; + + /* Rx */ + err = bnad_setup_rx(bnad, 0); + if (err) + goto cleanup_tx; + + /* Port */ + pause_config.tx_pause = 0; + pause_config.rx_pause = 0; + + mtu = ETH_HLEN + bnad->netdev->mtu + ETH_FCS_LEN; + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_port_mtu_set(&bnad->bna.port, mtu, NULL); + bna_port_pause_config(&bnad->bna.port, &pause_config, NULL); + bna_port_enable(&bnad->bna.port); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + /* Enable broadcast */ + bnad_enable_default_bcast(bnad); + + /* Set the UCAST address */ + spin_lock_irqsave(&bnad->bna_lock, flags); + bnad_mac_addr_set_locked(bnad, netdev->dev_addr); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + /* Start the stats timer */ + bnad_stats_timer_start(bnad); + + mutex_unlock(&bnad->conf_mutex); + + return 0; + +cleanup_tx: + bnad_cleanup_tx(bnad, 0); + +err_return: + mutex_unlock(&bnad->conf_mutex); + return err; +} + +static int +bnad_stop(struct net_device *netdev) +{ + struct bnad *bnad = netdev_priv(netdev); + unsigned long flags; + + mutex_lock(&bnad->conf_mutex); + + /* Stop the stats timer */ + bnad_stats_timer_stop(bnad); + + init_completion(&bnad->bnad_completions.port_comp); + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_port_disable(&bnad->bna.port, BNA_HARD_CLEANUP, + bnad_cb_port_disabled); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + wait_for_completion(&bnad->bnad_completions.port_comp); + + bnad_cleanup_tx(bnad, 0); + bnad_cleanup_rx(bnad, 0); + + /* Synchronize mailbox IRQ */ + bnad_mbox_irq_sync(bnad); + + mutex_unlock(&bnad->conf_mutex); + + return 0; +} + +/* TX */ +/* + * bnad_start_xmit : Netdev entry point for Transmit + * Called under lock held by net_device + */ +static netdev_tx_t +bnad_start_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct bnad *bnad = netdev_priv(netdev); + + u16 txq_prod, vlan_tag = 0; + u32 unmap_prod, wis, wis_used, wi_range; + u32 vectors, vect_id, i, acked; + u32 tx_id; + int err; + + struct bnad_tx_info *tx_info; + struct bna_tcb *tcb; + struct bnad_unmap_q *unmap_q; + dma_addr_t dma_addr; + struct bna_txq_entry *txqent; + bna_txq_wi_ctrl_flag_t flags; + + if (unlikely + (skb->len <= ETH_HLEN || skb->len > BFI_TX_MAX_DATA_PER_PKT)) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + + /* + * Takes care of the Tx that is scheduled between clearing the flag + * and the netif_stop_queue() call. + */ + if (unlikely(!test_bit(BNAD_RF_TX_STARTED, &bnad->run_flags))) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + + tx_id = 0; + + tx_info = &bnad->tx_info[tx_id]; + tcb = tx_info->tcb[tx_id]; + unmap_q = tcb->unmap_q; + + vectors = 1 + skb_shinfo(skb)->nr_frags; + if (vectors > BFI_TX_MAX_VECTORS_PER_PKT) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + wis = BNA_TXQ_WI_NEEDED(vectors); /* 4 vectors per work item */ + acked = 0; + if (unlikely + (wis > BNA_QE_FREE_CNT(tcb, tcb->q_depth) || + vectors > BNA_QE_FREE_CNT(unmap_q, unmap_q->q_depth))) { + if ((u16) (*tcb->hw_consumer_index) != + tcb->consumer_index && + !test_and_set_bit(BNAD_TXQ_FREE_SENT, &tcb->flags)) { + acked = bnad_free_txbufs(bnad, tcb); + bna_ib_ack(tcb->i_dbell, acked); + smp_mb__before_clear_bit(); + clear_bit(BNAD_TXQ_FREE_SENT, &tcb->flags); + } else { + netif_stop_queue(netdev); + BNAD_UPDATE_CTR(bnad, netif_queue_stop); + } + + smp_mb(); + /* + * Check again to deal with race condition between + * netif_stop_queue here, and netif_wake_queue in + * interrupt handler which is not inside netif tx lock. + */ + if (likely + (wis > BNA_QE_FREE_CNT(tcb, tcb->q_depth) || + vectors > BNA_QE_FREE_CNT(unmap_q, unmap_q->q_depth))) { + BNAD_UPDATE_CTR(bnad, netif_queue_stop); + return NETDEV_TX_BUSY; + } else { + netif_wake_queue(netdev); + BNAD_UPDATE_CTR(bnad, netif_queue_wakeup); + } + } + + unmap_prod = unmap_q->producer_index; + wis_used = 1; + vect_id = 0; + flags = 0; + + txq_prod = tcb->producer_index; + BNA_TXQ_QPGE_PTR_GET(txq_prod, tcb->sw_qpt, txqent, wi_range); + BUG_ON(!(wi_range <= tcb->q_depth)); + txqent->hdr.wi.reserved = 0; + txqent->hdr.wi.num_vectors = vectors; + txqent->hdr.wi.opcode = + htons((skb_is_gso(skb) ? BNA_TXQ_WI_SEND_LSO : + BNA_TXQ_WI_SEND)); + + if (bnad->vlan_grp && vlan_tx_tag_present(skb)) { + vlan_tag = (u16) vlan_tx_tag_get(skb); + flags |= (BNA_TXQ_WI_CF_INS_PRIO | BNA_TXQ_WI_CF_INS_VLAN); + } + if (test_bit(BNAD_RF_CEE_RUNNING, &bnad->run_flags)) { + vlan_tag = + (tcb->priority & 0x7) << 13 | (vlan_tag & 0x1fff); + flags |= (BNA_TXQ_WI_CF_INS_PRIO | BNA_TXQ_WI_CF_INS_VLAN); + } + + txqent->hdr.wi.vlan_tag = htons(vlan_tag); + + if (skb_is_gso(skb)) { + err = bnad_tso_prepare(bnad, skb); + if (err) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + txqent->hdr.wi.lso_mss = htons(skb_is_gso(skb)); + flags |= (BNA_TXQ_WI_CF_IP_CKSUM | BNA_TXQ_WI_CF_TCP_CKSUM); + txqent->hdr.wi.l4_hdr_size_n_offset = + htons(BNA_TXQ_WI_L4_HDR_N_OFFSET + (tcp_hdrlen(skb) >> 2, + skb_transport_offset(skb))); + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { + u8 proto = 0; + + txqent->hdr.wi.lso_mss = 0; + + if (skb->protocol == htons(ETH_P_IP)) + proto = ip_hdr(skb)->protocol; + else if (skb->protocol == htons(ETH_P_IPV6)) { + /* nexthdr may not be TCP immediately. */ + proto = ipv6_hdr(skb)->nexthdr; + } + if (proto == IPPROTO_TCP) { + flags |= BNA_TXQ_WI_CF_TCP_CKSUM; + txqent->hdr.wi.l4_hdr_size_n_offset = + htons(BNA_TXQ_WI_L4_HDR_N_OFFSET + (0, skb_transport_offset(skb))); + + BNAD_UPDATE_CTR(bnad, tcpcsum_offload); + + BUG_ON(!(skb_headlen(skb) >= + skb_transport_offset(skb) + tcp_hdrlen(skb))); + + } else if (proto == IPPROTO_UDP) { + flags |= BNA_TXQ_WI_CF_UDP_CKSUM; + txqent->hdr.wi.l4_hdr_size_n_offset = + htons(BNA_TXQ_WI_L4_HDR_N_OFFSET + (0, skb_transport_offset(skb))); + + BNAD_UPDATE_CTR(bnad, udpcsum_offload); + + BUG_ON(!(skb_headlen(skb) >= + skb_transport_offset(skb) + + sizeof(struct udphdr))); + } else { + err = skb_checksum_help(skb); + BNAD_UPDATE_CTR(bnad, csum_help); + if (err) { + dev_kfree_skb(skb); + BNAD_UPDATE_CTR(bnad, csum_help_err); + return NETDEV_TX_OK; + } + } + } else { + txqent->hdr.wi.lso_mss = 0; + txqent->hdr.wi.l4_hdr_size_n_offset = 0; + } + + txqent->hdr.wi.flags = htons(flags); + + txqent->hdr.wi.frame_length = htonl(skb->len); + + unmap_q->unmap_array[unmap_prod].skb = skb; + BUG_ON(!(skb_headlen(skb) <= BFI_TX_MAX_DATA_PER_VECTOR)); + txqent->vector[vect_id].length = htons(skb_headlen(skb)); + dma_addr = pci_map_single(bnad->pcidev, skb->data, skb_headlen(skb), + PCI_DMA_TODEVICE); + pci_unmap_addr_set(&unmap_q->unmap_array[unmap_prod], dma_addr, + dma_addr); + + BNA_SET_DMA_ADDR(dma_addr, &txqent->vector[vect_id].host_addr); + BNA_QE_INDX_ADD(unmap_prod, 1, unmap_q->q_depth); + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; + u32 size = frag->size; + + if (++vect_id == BFI_TX_MAX_VECTORS_PER_WI) { + vect_id = 0; + if (--wi_range) + txqent++; + else { + BNA_QE_INDX_ADD(txq_prod, wis_used, + tcb->q_depth); + wis_used = 0; + BNA_TXQ_QPGE_PTR_GET(txq_prod, tcb->sw_qpt, + txqent, wi_range); + BUG_ON(!(wi_range <= tcb->q_depth)); + } + wis_used++; + txqent->hdr.wi_ext.opcode = htons(BNA_TXQ_WI_EXTENSION); + } + + BUG_ON(!(size <= BFI_TX_MAX_DATA_PER_VECTOR)); + txqent->vector[vect_id].length = htons(size); + dma_addr = + pci_map_page(bnad->pcidev, frag->page, + frag->page_offset, size, + PCI_DMA_TODEVICE); + pci_unmap_addr_set(&unmap_q->unmap_array[unmap_prod], dma_addr, + dma_addr); + BNA_SET_DMA_ADDR(dma_addr, &txqent->vector[vect_id].host_addr); + BNA_QE_INDX_ADD(unmap_prod, 1, unmap_q->q_depth); + } + + unmap_q->producer_index = unmap_prod; + BNA_QE_INDX_ADD(txq_prod, wis_used, tcb->q_depth); + tcb->producer_index = txq_prod; + + smp_mb(); + bna_txq_prod_indx_doorbell(tcb); + + if ((u16) (*tcb->hw_consumer_index) != tcb->consumer_index) + tasklet_schedule(&bnad->tx_free_tasklet); + + return NETDEV_TX_OK; +} + +/* + * Used spin_lock to synchronize reading of stats structures, which + * is written by BNA under the same lock. + */ +static struct net_device_stats * +bnad_get_netdev_stats(struct net_device *netdev) +{ + struct bnad *bnad = netdev_priv(netdev); + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + + memset(&bnad->net_stats, 0, sizeof(struct net_device_stats)); + + bnad_netdev_qstats_fill(bnad); + bnad_netdev_hwstats_fill(bnad); + + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + return &bnad->net_stats; +} + +static void +bnad_set_rx_mode(struct net_device *netdev) +{ + struct bnad *bnad = netdev_priv(netdev); + u32 new_mask, valid_mask; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + + new_mask = valid_mask = 0; + + if (netdev->flags & IFF_PROMISC) { + if (!(bnad->cfg_flags & BNAD_CF_PROMISC)) { + new_mask = BNAD_RXMODE_PROMISC_DEFAULT; + valid_mask = BNAD_RXMODE_PROMISC_DEFAULT; + bnad->cfg_flags |= BNAD_CF_PROMISC; + } + } else { + if (bnad->cfg_flags & BNAD_CF_PROMISC) { + new_mask = ~BNAD_RXMODE_PROMISC_DEFAULT; + valid_mask = BNAD_RXMODE_PROMISC_DEFAULT; + bnad->cfg_flags &= ~BNAD_CF_PROMISC; + } + } + + if (netdev->flags & IFF_ALLMULTI) { + if (!(bnad->cfg_flags & BNAD_CF_ALLMULTI)) { + new_mask |= BNA_RXMODE_ALLMULTI; + valid_mask |= BNA_RXMODE_ALLMULTI; + bnad->cfg_flags |= BNAD_CF_ALLMULTI; + } + } else { + if (bnad->cfg_flags & BNAD_CF_ALLMULTI) { + new_mask &= ~BNA_RXMODE_ALLMULTI; + valid_mask |= BNA_RXMODE_ALLMULTI; + bnad->cfg_flags &= ~BNAD_CF_ALLMULTI; + } + } + + bna_rx_mode_set(bnad->rx_info[0].rx, new_mask, valid_mask, NULL); + + if (!netdev_mc_empty(netdev)) { + u8 *mcaddr_list; + int mc_count = netdev_mc_count(netdev); + + /* Index 0 holds the broadcast address */ + mcaddr_list = + kzalloc((mc_count + 1) * ETH_ALEN, + GFP_ATOMIC); + if (!mcaddr_list) + return; + + memcpy(&mcaddr_list[0], &bnad_bcast_addr[0], ETH_ALEN); + + /* Copy rest of the MC addresses */ + bnad_netdev_mc_list_get(netdev, mcaddr_list); + + bna_rx_mcast_listset(bnad->rx_info[0].rx, mc_count + 1, + mcaddr_list, NULL); + + /* Should we enable BNAD_CF_ALLMULTI for err != 0 ? */ + kfree(mcaddr_list); + } + spin_unlock_irqrestore(&bnad->bna_lock, flags); +} + +/* + * bna_lock is used to sync writes to netdev->addr + * conf_lock cannot be used since this call may be made + * in a non-blocking context. + */ +static int +bnad_set_mac_address(struct net_device *netdev, void *mac_addr) +{ + int err; + struct bnad *bnad = netdev_priv(netdev); + struct sockaddr *sa = (struct sockaddr *)mac_addr; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + + err = bnad_mac_addr_set_locked(bnad, sa->sa_data); + + if (!err) + memcpy(netdev->dev_addr, sa->sa_data, netdev->addr_len); + + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + return err; +} + +static int +bnad_change_mtu(struct net_device *netdev, int new_mtu) +{ + int mtu, err = 0; + unsigned long flags; + + struct bnad *bnad = netdev_priv(netdev); + + if (new_mtu + ETH_HLEN < ETH_ZLEN || new_mtu > BNAD_JUMBO_MTU) + return -EINVAL; + + mutex_lock(&bnad->conf_mutex); + + netdev->mtu = new_mtu; + + mtu = ETH_HLEN + new_mtu + ETH_FCS_LEN; + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_port_mtu_set(&bnad->bna.port, mtu, NULL); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + mutex_unlock(&bnad->conf_mutex); + return err; +} + +static void +bnad_vlan_rx_register(struct net_device *netdev, + struct vlan_group *vlan_grp) +{ + struct bnad *bnad = netdev_priv(netdev); + + mutex_lock(&bnad->conf_mutex); + bnad->vlan_grp = vlan_grp; + mutex_unlock(&bnad->conf_mutex); +} + +static void +bnad_vlan_rx_add_vid(struct net_device *netdev, + unsigned short vid) +{ + struct bnad *bnad = netdev_priv(netdev); + unsigned long flags; + + if (!bnad->rx_info[0].rx) + return; + + mutex_lock(&bnad->conf_mutex); + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_rx_vlan_add(bnad->rx_info[0].rx, vid); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + mutex_unlock(&bnad->conf_mutex); +} + +static void +bnad_vlan_rx_kill_vid(struct net_device *netdev, + unsigned short vid) +{ + struct bnad *bnad = netdev_priv(netdev); + unsigned long flags; + + if (!bnad->rx_info[0].rx) + return; + + mutex_lock(&bnad->conf_mutex); + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_rx_vlan_del(bnad->rx_info[0].rx, vid); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + mutex_unlock(&bnad->conf_mutex); +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void +bnad_netpoll(struct net_device *netdev) +{ + struct bnad *bnad = netdev_priv(netdev); + struct bnad_rx_info *rx_info; + struct bnad_rx_ctrl *rx_ctrl; + u32 curr_mask; + int i, j; + + if (!(bnad->cfg_flags & BNAD_CF_MSIX)) { + bna_intx_disable(&bnad->bna, curr_mask); + bnad_isr(bnad->pcidev->irq, netdev); + bna_intx_enable(&bnad->bna, curr_mask); + } else { + for (i = 0; i < bnad->num_rx; i++) { + rx_info = &bnad->rx_info[i]; + if (!rx_info->rx) + continue; + for (j = 0; j < bnad->num_rxp_per_rx; j++) { + rx_ctrl = &rx_info->rx_ctrl[j]; + if (rx_ctrl->ccb) { + bnad_disable_rx_irq(bnad, + rx_ctrl->ccb); + bnad_netif_rx_schedule_poll(bnad, + rx_ctrl->ccb); + } + } + } + } +} +#endif + +static const struct net_device_ops bnad_netdev_ops = { + .ndo_open = bnad_open, + .ndo_stop = bnad_stop, + .ndo_start_xmit = bnad_start_xmit, + .ndo_get_stats = bnad_get_netdev_stats, + .ndo_set_rx_mode = bnad_set_rx_mode, + .ndo_set_multicast_list = bnad_set_rx_mode, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = bnad_set_mac_address, + .ndo_change_mtu = bnad_change_mtu, + .ndo_vlan_rx_register = bnad_vlan_rx_register, + .ndo_vlan_rx_add_vid = bnad_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = bnad_vlan_rx_kill_vid, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = bnad_netpoll +#endif +}; + +static void +bnad_netdev_init(struct bnad *bnad, bool using_dac) +{ + struct net_device *netdev = bnad->netdev; + + netdev->features |= NETIF_F_IPV6_CSUM; + netdev->features |= NETIF_F_TSO; + netdev->features |= NETIF_F_TSO6; + + netdev->features |= NETIF_F_GRO; + pr_warn("bna: GRO enabled, using kernel stack GRO\n"); + + netdev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; + + if (using_dac) + netdev->features |= NETIF_F_HIGHDMA; + + netdev->features |= + NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX | + NETIF_F_HW_VLAN_FILTER; + + netdev->vlan_features = netdev->features; + netdev->mem_start = bnad->mmio_start; + netdev->mem_end = bnad->mmio_start + bnad->mmio_len - 1; + + netdev->netdev_ops = &bnad_netdev_ops; + bnad_set_ethtool_ops(netdev); +} + +/* + * 1. Initialize the bnad structure + * 2. Setup netdev pointer in pci_dev + * 3. Initialze Tx free tasklet + * 4. Initialize no. of TxQ & CQs & MSIX vectors + */ +static int +bnad_init(struct bnad *bnad, + struct pci_dev *pdev, struct net_device *netdev) +{ + unsigned long flags; + + SET_NETDEV_DEV(netdev, &pdev->dev); + pci_set_drvdata(pdev, netdev); + + bnad->netdev = netdev; + bnad->pcidev = pdev; + bnad->mmio_start = pci_resource_start(pdev, 0); + bnad->mmio_len = pci_resource_len(pdev, 0); + bnad->bar0 = ioremap_nocache(bnad->mmio_start, bnad->mmio_len); + if (!bnad->bar0) { + dev_err(&pdev->dev, "ioremap for bar0 failed\n"); + pci_set_drvdata(pdev, NULL); + return -ENOMEM; + } + pr_info("bar0 mapped to %p, len %llu\n", bnad->bar0, + (unsigned long long) bnad->mmio_len); + + spin_lock_irqsave(&bnad->bna_lock, flags); + if (!bnad_msix_disable) + bnad->cfg_flags = BNAD_CF_MSIX; + + bnad->cfg_flags |= BNAD_CF_DIM_ENABLED; + + bnad_q_num_init(bnad); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + bnad->msix_num = (bnad->num_tx * bnad->num_txq_per_tx) + + (bnad->num_rx * bnad->num_rxp_per_rx) + + BNAD_MAILBOX_MSIX_VECTORS; + bnad->msix_diag_num = 2; /* 1 for Tx, 1 for Rx */ + + bnad->txq_depth = BNAD_TXQ_DEPTH; + bnad->rxq_depth = BNAD_RXQ_DEPTH; + bnad->rx_csum = true; + + bnad->tx_coalescing_timeo = BFI_TX_COALESCING_TIMEO; + bnad->rx_coalescing_timeo = BFI_RX_COALESCING_TIMEO; + + tasklet_init(&bnad->tx_free_tasklet, bnad_tx_free_tasklet, + (unsigned long)bnad); + + return 0; +} + +/* + * Must be called after bnad_pci_uninit() + * so that iounmap() and pci_set_drvdata(NULL) + * happens only after PCI uninitialization. + */ +static void +bnad_uninit(struct bnad *bnad) +{ + if (bnad->bar0) + iounmap(bnad->bar0); + pci_set_drvdata(bnad->pcidev, NULL); +} + +/* + * Initialize locks + a) Per device mutes used for serializing configuration + changes from OS interface + b) spin lock used to protect bna state machine + */ +static void +bnad_lock_init(struct bnad *bnad) +{ + spin_lock_init(&bnad->bna_lock); + mutex_init(&bnad->conf_mutex); +} + +static void +bnad_lock_uninit(struct bnad *bnad) +{ + mutex_destroy(&bnad->conf_mutex); +} + +/* PCI Initialization */ +static int +bnad_pci_init(struct bnad *bnad, + struct pci_dev *pdev, bool *using_dac) +{ + int err; + + err = pci_enable_device(pdev); + if (err) + return err; + err = pci_request_regions(pdev, BNAD_NAME); + if (err) + goto disable_device; + if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) && + !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) { + *using_dac = 1; + } else { + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) { + err = pci_set_consistent_dma_mask(pdev, + DMA_BIT_MASK(32)); + if (err) + goto release_regions; + } + *using_dac = 0; + } + pci_set_master(pdev); + return 0; + +release_regions: + pci_release_regions(pdev); +disable_device: + pci_disable_device(pdev); + + return err; +} + +static void +bnad_pci_uninit(struct pci_dev *pdev) +{ + pci_release_regions(pdev); + pci_disable_device(pdev); +} + +static int __devinit +bnad_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *pcidev_id) +{ + bool using_dac; + int err; + struct bnad *bnad; + struct bna *bna; + struct net_device *netdev; + struct bfa_pcidev pcidev_info; + unsigned long flags; + + pr_info("bnad_pci_probe : (0x%p, 0x%p) PCI Func : (%d)\n", + pdev, pcidev_id, PCI_FUNC(pdev->devfn)); + + mutex_lock(&bnad_fwimg_mutex); + if (!cna_get_firmware_buf(pdev)) { + mutex_unlock(&bnad_fwimg_mutex); + pr_warn("Failed to load Firmware Image!\n"); + return -ENODEV; + } + mutex_unlock(&bnad_fwimg_mutex); + + /* + * Allocates sizeof(struct net_device + struct bnad) + * bnad = netdev->priv + */ + netdev = alloc_etherdev(sizeof(struct bnad)); + if (!netdev) { + dev_err(&pdev->dev, "alloc_etherdev failed\n"); + err = -ENOMEM; + return err; + } + bnad = netdev_priv(netdev); + + + /* + * PCI initialization + * Output : using_dac = 1 for 64 bit DMA + * = 0 for 32 bit DMA + */ + err = bnad_pci_init(bnad, pdev, &using_dac); + if (err) + goto free_netdev; + + bnad_lock_init(bnad); + /* + * Initialize bnad structure + * Setup relation between pci_dev & netdev + * Init Tx free tasklet + */ + err = bnad_init(bnad, pdev, netdev); + if (err) + goto pci_uninit; + /* Initialize netdev structure, set up ethtool ops */ + bnad_netdev_init(bnad, using_dac); + + bnad_enable_msix(bnad); + + /* Get resource requirement form bna */ + bna_res_req(&bnad->res_info[0]); + + /* Allocate resources from bna */ + err = bnad_res_alloc(bnad); + if (err) + goto free_netdev; + + bna = &bnad->bna; + + /* Setup pcidev_info for bna_init() */ + pcidev_info.pci_slot = PCI_SLOT(bnad->pcidev->devfn); + pcidev_info.pci_func = PCI_FUNC(bnad->pcidev->devfn); + pcidev_info.device_id = bnad->pcidev->device; + pcidev_info.pci_bar_kva = bnad->bar0; + + mutex_lock(&bnad->conf_mutex); + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_init(bna, bnad, &pcidev_info, &bnad->res_info[0]); + + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + bnad->stats.bna_stats = &bna->stats; + + /* Set up timers */ + setup_timer(&bnad->bna.device.ioc.ioc_timer, bnad_ioc_timeout, + ((unsigned long)bnad)); + setup_timer(&bnad->bna.device.ioc.hb_timer, bnad_ioc_hb_check, + ((unsigned long)bnad)); + setup_timer(&bnad->bna.device.ioc.sem_timer, bnad_ioc_sem_timeout, + ((unsigned long)bnad)); + + /* Now start the timer before calling IOC */ + mod_timer(&bnad->bna.device.ioc.ioc_timer, + jiffies + msecs_to_jiffies(BNA_IOC_TIMER_FREQ)); + + /* + * Start the chip + * Don't care even if err != 0, bna state machine will + * deal with it + */ + err = bnad_device_enable(bnad); + + /* Get the burnt-in mac */ + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_port_mac_get(&bna->port, &bnad->perm_addr); + bnad_set_netdev_perm_addr(bnad); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + mutex_unlock(&bnad->conf_mutex); + + /* + * Make sure the link appears down to the stack + */ + netif_carrier_off(netdev); + + /* Finally, reguister with net_device layer */ + err = register_netdev(netdev); + if (err) { + pr_err("BNA : Registering with netdev failed\n"); + goto disable_device; + } + + return 0; + +disable_device: + mutex_lock(&bnad->conf_mutex); + bnad_device_disable(bnad); + del_timer_sync(&bnad->bna.device.ioc.ioc_timer); + del_timer_sync(&bnad->bna.device.ioc.sem_timer); + del_timer_sync(&bnad->bna.device.ioc.hb_timer); + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_uninit(bna); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + mutex_unlock(&bnad->conf_mutex); + + bnad_res_free(bnad); + bnad_disable_msix(bnad); +pci_uninit: + bnad_pci_uninit(pdev); + bnad_lock_uninit(bnad); + bnad_uninit(bnad); +free_netdev: + free_netdev(netdev); + return err; +} + +static void __devexit +bnad_pci_remove(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct bnad *bnad; + struct bna *bna; + unsigned long flags; + + if (!netdev) + return; + + pr_info("%s bnad_pci_remove\n", netdev->name); + bnad = netdev_priv(netdev); + bna = &bnad->bna; + + unregister_netdev(netdev); + + mutex_lock(&bnad->conf_mutex); + bnad_device_disable(bnad); + del_timer_sync(&bnad->bna.device.ioc.ioc_timer); + del_timer_sync(&bnad->bna.device.ioc.sem_timer); + del_timer_sync(&bnad->bna.device.ioc.hb_timer); + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_uninit(bna); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + mutex_unlock(&bnad->conf_mutex); + + bnad_res_free(bnad); + bnad_disable_msix(bnad); + bnad_pci_uninit(pdev); + bnad_lock_uninit(bnad); + bnad_uninit(bnad); + free_netdev(netdev); +} + +const struct pci_device_id bnad_pci_id_table[] = { + { + PCI_DEVICE(PCI_VENDOR_ID_BROCADE, + PCI_DEVICE_ID_BROCADE_CT), + .class = PCI_CLASS_NETWORK_ETHERNET << 8, + .class_mask = 0xffff00 + }, {0, } +}; + +MODULE_DEVICE_TABLE(pci, bnad_pci_id_table); + +static struct pci_driver bnad_pci_driver = { + .name = BNAD_NAME, + .id_table = bnad_pci_id_table, + .probe = bnad_pci_probe, + .remove = __devexit_p(bnad_pci_remove), +}; + +static int __init +bnad_module_init(void) +{ + int err; + + pr_info("Brocade 10G Ethernet driver\n"); + + bfa_ioc_auto_recover(bnad_ioc_auto_recover); + + err = pci_register_driver(&bnad_pci_driver); + if (err < 0) { + pr_err("bna : PCI registration failed in module init " + "(%d)\n", err); + return err; + } + + return 0; +} + +static void __exit +bnad_module_exit(void) +{ + pci_unregister_driver(&bnad_pci_driver); + + if (bfi_fw) + release_firmware(bfi_fw); +} + +module_init(bnad_module_init); +module_exit(bnad_module_exit); + +MODULE_AUTHOR("Brocade"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Brocade 10G PCIe Ethernet driver"); +MODULE_VERSION(BNAD_VERSION); +MODULE_FIRMWARE(CNA_FW_FILE_CT); diff --git a/drivers/net/bna/bnad.h b/drivers/net/bna/bnad.h new file mode 100644 index 000000000000..3261401e35cb --- /dev/null +++ b/drivers/net/bna/bnad.h @@ -0,0 +1,334 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ +#ifndef __BNAD_H__ +#define __BNAD_H__ + +#include +#include +#include +#include +#include +#include + +/* Fix for IA64 */ +#include +#include + +#include +#include + +#include "bna.h" + +#define BNAD_TXQ_DEPTH 2048 +#define BNAD_RXQ_DEPTH 2048 + +#define BNAD_MAX_TXS 1 +#define BNAD_MAX_TXQ_PER_TX 8 /* 8 priority queues */ +#define BNAD_TXQ_NUM 1 + +#define BNAD_MAX_RXS 1 +#define BNAD_MAX_RXPS_PER_RX 16 + +/* + * Control structure pointed to ccb->ctrl, which + * determines the NAPI / LRO behavior CCB + * There is 1:1 corres. between ccb & ctrl + */ +struct bnad_rx_ctrl { + struct bna_ccb *ccb; + struct napi_struct napi; +}; + +#define BNAD_RXMODE_PROMISC_DEFAULT BNA_RXMODE_PROMISC + +#define BNAD_GET_TX_ID(_skb) (0) + +/* + * GLOBAL #defines (CONSTANTS) + */ +#define BNAD_NAME "bna" +#define BNAD_NAME_LEN 64 + +#define BNAD_VERSION "2.3.2.0" + +#define BNAD_MAILBOX_MSIX_VECTORS 1 + +#define BNAD_STATS_TIMER_FREQ 1000 /* in msecs */ +#define BNAD_DIM_TIMER_FREQ 1000 /* in msecs */ + +#define BNAD_MAX_Q_DEPTH 0x10000 +#define BNAD_MIN_Q_DEPTH 0x200 + +#define BNAD_JUMBO_MTU 9000 + +#define BNAD_NETIF_WAKE_THRESHOLD 8 + +#define BNAD_RXQ_REFILL_THRESHOLD_SHIFT 3 + +/* Bit positions for tcb->flags */ +#define BNAD_TXQ_FREE_SENT 0 + +/* Bit positions for rcb->flags */ +#define BNAD_RXQ_REFILL 0 +#define BNAD_RXQ_STARTED 1 + +/* + * DATA STRUCTURES + */ + +/* enums */ +enum bnad_intr_source { + BNAD_INTR_TX = 1, + BNAD_INTR_RX = 2 +}; + +enum bnad_link_state { + BNAD_LS_DOWN = 0, + BNAD_LS_UP = 1 +}; + +struct bnad_completion { + struct completion ioc_comp; + struct completion ucast_comp; + struct completion mcast_comp; + struct completion tx_comp; + struct completion rx_comp; + struct completion stats_comp; + struct completion port_comp; + + u8 ioc_comp_status; + u8 ucast_comp_status; + u8 mcast_comp_status; + u8 tx_comp_status; + u8 rx_comp_status; + u8 stats_comp_status; + u8 port_comp_status; +}; + +/* Tx Rx Control Stats */ +struct bnad_drv_stats { + u64 netif_queue_stop; + u64 netif_queue_wakeup; + u64 tso4; + u64 tso6; + u64 tso_err; + u64 tcpcsum_offload; + u64 udpcsum_offload; + u64 csum_help; + u64 csum_help_err; + + u64 hw_stats_updates; + u64 netif_rx_schedule; + u64 netif_rx_complete; + u64 netif_rx_dropped; + + u64 link_toggle; + u64 cee_up; + + u64 rxp_info_alloc_failed; + u64 mbox_intr_disabled; + u64 mbox_intr_enabled; + u64 tx_unmap_q_alloc_failed; + u64 rx_unmap_q_alloc_failed; + + u64 rxbuf_alloc_failed; +}; + +/* Complete driver stats */ +struct bnad_stats { + struct bnad_drv_stats drv_stats; + struct bna_stats *bna_stats; +}; + +/* Tx / Rx Resources */ +struct bnad_tx_res_info { + struct bna_res_info res_info[BNA_TX_RES_T_MAX]; +}; + +struct bnad_rx_res_info { + struct bna_res_info res_info[BNA_RX_RES_T_MAX]; +}; + +struct bnad_tx_info { + struct bna_tx *tx; /* 1:1 between tx_info & tx */ + struct bna_tcb *tcb[BNAD_MAX_TXQ_PER_TX]; +} ____cacheline_aligned; + +struct bnad_rx_info { + struct bna_rx *rx; /* 1:1 between rx_info & rx */ + + struct bnad_rx_ctrl rx_ctrl[BNAD_MAX_RXPS_PER_RX]; +} ____cacheline_aligned; + +/* Unmap queues for Tx / Rx cleanup */ +struct bnad_skb_unmap { + struct sk_buff *skb; + DECLARE_PCI_UNMAP_ADDR(dma_addr) +}; + +struct bnad_unmap_q { + u32 producer_index; + u32 consumer_index; + u32 q_depth; + /* This should be the last one */ + struct bnad_skb_unmap unmap_array[1]; +}; + +/* Bit mask values for bnad->cfg_flags */ +#define BNAD_CF_DIM_ENABLED 0x01 /* DIM */ +#define BNAD_CF_PROMISC 0x02 +#define BNAD_CF_ALLMULTI 0x04 +#define BNAD_CF_MSIX 0x08 /* If in MSIx mode */ + +/* Defines for run_flags bit-mask */ +/* Set, tested & cleared using xxx_bit() functions */ +/* Values indicated bit positions */ +#define BNAD_RF_CEE_RUNNING 1 +#define BNAD_RF_HW_ERROR 2 +#define BNAD_RF_MBOX_IRQ_DISABLED 3 +#define BNAD_RF_TX_STARTED 4 +#define BNAD_RF_RX_STARTED 5 +#define BNAD_RF_DIM_TIMER_RUNNING 6 +#define BNAD_RF_STATS_TIMER_RUNNING 7 + +struct bnad { + struct net_device *netdev; + + /* Data path */ + struct bnad_tx_info tx_info[BNAD_MAX_TXS]; + struct bnad_rx_info rx_info[BNAD_MAX_RXS]; + + struct vlan_group *vlan_grp; + /* + * These q numbers are global only because + * they are used to calculate MSIx vectors. + * Actually the exact # of queues are per Tx/Rx + * object. + */ + u32 num_tx; + u32 num_rx; + u32 num_txq_per_tx; + u32 num_rxp_per_rx; + + u32 txq_depth; + u32 rxq_depth; + + u8 tx_coalescing_timeo; + u8 rx_coalescing_timeo; + + struct bna_rx_config rx_config[BNAD_MAX_RXS]; + struct bna_tx_config tx_config[BNAD_MAX_TXS]; + + u32 rx_csum; + + void __iomem *bar0; /* BAR0 address */ + + struct bna bna; + + u32 cfg_flags; + unsigned long run_flags; + + struct pci_dev *pcidev; + u64 mmio_start; + u64 mmio_len; + + u32 msix_num; + u32 msix_diag_num; + struct msix_entry *msix_table; + + struct mutex conf_mutex; + spinlock_t bna_lock ____cacheline_aligned; + + /* Timers */ + struct timer_list ioc_timer; + struct timer_list dim_timer; + struct timer_list stats_timer; + + /* Control path resources, memory & irq */ + struct bna_res_info res_info[BNA_RES_T_MAX]; + struct bnad_tx_res_info tx_res_info[BNAD_MAX_TXS]; + struct bnad_rx_res_info rx_res_info[BNAD_MAX_RXS]; + + struct bnad_completion bnad_completions; + + /* Burnt in MAC address */ + mac_t perm_addr; + + struct tasklet_struct tx_free_tasklet; + + /* Statistics */ + struct bnad_stats stats; + struct net_device_stats net_stats; + + struct bnad_diag *diag; + + char adapter_name[BNAD_NAME_LEN]; + char port_name[BNAD_NAME_LEN]; + char mbox_irq_name[BNAD_NAME_LEN]; +}; + +/* + * EXTERN VARIABLES + */ +extern struct firmware *bfi_fw; +extern u32 bnad_rxqs_per_cq; + +/* + * EXTERN PROTOTYPES + */ +extern u32 *cna_get_firmware_buf(struct pci_dev *pdev); +/* Netdev entry point prototypes */ +extern void bnad_set_ethtool_ops(struct net_device *netdev); + +/* Configuration & setup */ +extern void bnad_tx_coalescing_timeo_set(struct bnad *bnad); +extern void bnad_rx_coalescing_timeo_set(struct bnad *bnad); + +extern int bnad_setup_rx(struct bnad *bnad, uint rx_id); +extern int bnad_setup_tx(struct bnad *bnad, uint tx_id); +extern void bnad_cleanup_tx(struct bnad *bnad, uint tx_id); +extern void bnad_cleanup_rx(struct bnad *bnad, uint rx_id); + +/* Timer start/stop protos */ +extern void bnad_dim_timer_start(struct bnad *bnad); + +/* Statistics */ +extern void bnad_netdev_qstats_fill(struct bnad *bnad); +extern void bnad_netdev_hwstats_fill(struct bnad *bnad); + +/** + * MACROS + */ +/* To set & get the stats counters */ +#define BNAD_UPDATE_CTR(_bnad, _ctr) \ + (((_bnad)->stats.drv_stats._ctr)++) + +#define BNAD_GET_CTR(_bnad, _ctr) ((_bnad)->stats.drv_stats._ctr) + +#define bnad_enable_rx_irq_unsafe(_ccb) \ +{ \ + bna_ib_coalescing_timer_set((_ccb)->i_dbell, \ + (_ccb)->rx_coalescing_timeo); \ + bna_ib_ack((_ccb)->i_dbell, 0); \ +} + +#define bnad_dim_timer_running(_bnad) \ + (((_bnad)->cfg_flags & BNAD_CF_DIM_ENABLED) && \ + (test_bit(BNAD_RF_DIM_TIMER_RUNNING, &((_bnad)->run_flags)))) + +#endif /* __BNAD_H__ */ diff --git a/drivers/net/bna/bnad_ethtool.c b/drivers/net/bna/bnad_ethtool.c new file mode 100644 index 000000000000..e982785b6b25 --- /dev/null +++ b/drivers/net/bna/bnad_ethtool.c @@ -0,0 +1,1282 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +#include "cna.h" + +#include +#include +#include +#include + +#include "bna.h" + +#include "bnad.h" + +#define BNAD_NUM_TXF_COUNTERS 12 +#define BNAD_NUM_RXF_COUNTERS 10 +#define BNAD_NUM_CQ_COUNTERS 3 +#define BNAD_NUM_RXQ_COUNTERS 6 +#define BNAD_NUM_TXQ_COUNTERS 5 + +#define BNAD_ETHTOOL_STATS_NUM \ + (sizeof(struct net_device_stats) / sizeof(unsigned long) + \ + sizeof(struct bnad_drv_stats) / sizeof(u64) + \ + offsetof(struct bfi_ll_stats, rxf_stats[0]) / sizeof(u64)) + +static char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = { + "rx_packets", + "tx_packets", + "rx_bytes", + "tx_bytes", + "rx_errors", + "tx_errors", + "rx_dropped", + "tx_dropped", + "multicast", + "collisions", + + "rx_length_errors", + "rx_over_errors", + "rx_crc_errors", + "rx_frame_errors", + "rx_fifo_errors", + "rx_missed_errors", + + "tx_aborted_errors", + "tx_carrier_errors", + "tx_fifo_errors", + "tx_heartbeat_errors", + "tx_window_errors", + + "rx_compressed", + "tx_compressed", + + "netif_queue_stop", + "netif_queue_wakeup", + "tso4", + "tso6", + "tso_err", + "tcpcsum_offload", + "udpcsum_offload", + "csum_help", + "csum_help_err", + "hw_stats_updates", + "netif_rx_schedule", + "netif_rx_complete", + "netif_rx_dropped", + + "link_toggle", + "cee_up", + + "rxp_info_alloc_failed", + "mbox_intr_disabled", + "mbox_intr_enabled", + "tx_unmap_q_alloc_failed", + "rx_unmap_q_alloc_failed", + "rxbuf_alloc_failed", + + "mac_frame_64", + "mac_frame_65_127", + "mac_frame_128_255", + "mac_frame_256_511", + "mac_frame_512_1023", + "mac_frame_1024_1518", + "mac_frame_1518_1522", + "mac_rx_bytes", + "mac_rx_packets", + "mac_rx_fcs_error", + "mac_rx_multicast", + "mac_rx_broadcast", + "mac_rx_control_frames", + "mac_rx_pause", + "mac_rx_unknown_opcode", + "mac_rx_alignment_error", + "mac_rx_frame_length_error", + "mac_rx_code_error", + "mac_rx_carrier_sense_error", + "mac_rx_undersize", + "mac_rx_oversize", + "mac_rx_fragments", + "mac_rx_jabber", + "mac_rx_drop", + + "mac_tx_bytes", + "mac_tx_packets", + "mac_tx_multicast", + "mac_tx_broadcast", + "mac_tx_pause", + "mac_tx_deferral", + "mac_tx_excessive_deferral", + "mac_tx_single_collision", + "mac_tx_muliple_collision", + "mac_tx_late_collision", + "mac_tx_excessive_collision", + "mac_tx_total_collision", + "mac_tx_pause_honored", + "mac_tx_drop", + "mac_tx_jabber", + "mac_tx_fcs_error", + "mac_tx_control_frame", + "mac_tx_oversize", + "mac_tx_undersize", + "mac_tx_fragments", + + "bpc_tx_pause_0", + "bpc_tx_pause_1", + "bpc_tx_pause_2", + "bpc_tx_pause_3", + "bpc_tx_pause_4", + "bpc_tx_pause_5", + "bpc_tx_pause_6", + "bpc_tx_pause_7", + "bpc_tx_zero_pause_0", + "bpc_tx_zero_pause_1", + "bpc_tx_zero_pause_2", + "bpc_tx_zero_pause_3", + "bpc_tx_zero_pause_4", + "bpc_tx_zero_pause_5", + "bpc_tx_zero_pause_6", + "bpc_tx_zero_pause_7", + "bpc_tx_first_pause_0", + "bpc_tx_first_pause_1", + "bpc_tx_first_pause_2", + "bpc_tx_first_pause_3", + "bpc_tx_first_pause_4", + "bpc_tx_first_pause_5", + "bpc_tx_first_pause_6", + "bpc_tx_first_pause_7", + + "bpc_rx_pause_0", + "bpc_rx_pause_1", + "bpc_rx_pause_2", + "bpc_rx_pause_3", + "bpc_rx_pause_4", + "bpc_rx_pause_5", + "bpc_rx_pause_6", + "bpc_rx_pause_7", + "bpc_rx_zero_pause_0", + "bpc_rx_zero_pause_1", + "bpc_rx_zero_pause_2", + "bpc_rx_zero_pause_3", + "bpc_rx_zero_pause_4", + "bpc_rx_zero_pause_5", + "bpc_rx_zero_pause_6", + "bpc_rx_zero_pause_7", + "bpc_rx_first_pause_0", + "bpc_rx_first_pause_1", + "bpc_rx_first_pause_2", + "bpc_rx_first_pause_3", + "bpc_rx_first_pause_4", + "bpc_rx_first_pause_5", + "bpc_rx_first_pause_6", + "bpc_rx_first_pause_7", + + "rad_rx_frames", + "rad_rx_octets", + "rad_rx_vlan_frames", + "rad_rx_ucast", + "rad_rx_ucast_octets", + "rad_rx_ucast_vlan", + "rad_rx_mcast", + "rad_rx_mcast_octets", + "rad_rx_mcast_vlan", + "rad_rx_bcast", + "rad_rx_bcast_octets", + "rad_rx_bcast_vlan", + "rad_rx_drops", + + "fc_rx_ucast_octets", + "fc_rx_ucast", + "fc_rx_ucast_vlan", + "fc_rx_mcast_octets", + "fc_rx_mcast", + "fc_rx_mcast_vlan", + "fc_rx_bcast_octets", + "fc_rx_bcast", + "fc_rx_bcast_vlan", + + "fc_tx_ucast_octets", + "fc_tx_ucast", + "fc_tx_ucast_vlan", + "fc_tx_mcast_octets", + "fc_tx_mcast", + "fc_tx_mcast_vlan", + "fc_tx_bcast_octets", + "fc_tx_bcast", + "fc_tx_bcast_vlan", + "fc_tx_parity_errors", + "fc_tx_timeout", + "fc_tx_fid_parity_errors", +}; + +static int +bnad_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd) +{ + cmd->supported = SUPPORTED_10000baseT_Full; + cmd->advertising = ADVERTISED_10000baseT_Full; + cmd->autoneg = AUTONEG_DISABLE; + cmd->supported |= SUPPORTED_FIBRE; + cmd->advertising |= ADVERTISED_FIBRE; + cmd->port = PORT_FIBRE; + cmd->phy_address = 0; + + if (netif_carrier_ok(netdev)) { + cmd->speed = SPEED_10000; + cmd->duplex = DUPLEX_FULL; + } else { + cmd->speed = -1; + cmd->duplex = -1; + } + cmd->transceiver = XCVR_EXTERNAL; + cmd->maxtxpkt = 0; + cmd->maxrxpkt = 0; + + return 0; +} + +static int +bnad_set_settings(struct net_device *netdev, struct ethtool_cmd *cmd) +{ + /* 10G full duplex setting supported only */ + if (cmd->autoneg == AUTONEG_ENABLE) + return -EOPNOTSUPP; else { + if ((cmd->speed == SPEED_10000) && (cmd->duplex == DUPLEX_FULL)) + return 0; + } + + return -EOPNOTSUPP; +} + +static void +bnad_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) +{ + struct bnad *bnad = netdev_priv(netdev); + struct bfa_ioc_attr *ioc_attr; + unsigned long flags; + + strcpy(drvinfo->driver, BNAD_NAME); + strcpy(drvinfo->version, BNAD_VERSION); + + ioc_attr = kzalloc(sizeof(*ioc_attr), GFP_KERNEL); + if (ioc_attr) { + memset(ioc_attr, 0, sizeof(*ioc_attr)); + spin_lock_irqsave(&bnad->bna_lock, flags); + bfa_ioc_get_attr(&bnad->bna.device.ioc, ioc_attr); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + strncpy(drvinfo->fw_version, ioc_attr->adapter_attr.fw_ver, + sizeof(drvinfo->fw_version) - 1); + kfree(ioc_attr); + } + + strncpy(drvinfo->bus_info, pci_name(bnad->pcidev), ETHTOOL_BUSINFO_LEN); +} + +static int +get_regs(struct bnad *bnad, u32 * regs) +{ + int num = 0, i; + u32 reg_addr; + unsigned long flags; + +#define BNAD_GET_REG(addr) \ +do { \ + if (regs) \ + regs[num++] = readl(bnad->bar0 + (addr)); \ + else \ + num++; \ +} while (0) + + spin_lock_irqsave(&bnad->bna_lock, flags); + + /* DMA Block Internal Registers */ + BNAD_GET_REG(DMA_CTRL_REG0); + BNAD_GET_REG(DMA_CTRL_REG1); + BNAD_GET_REG(DMA_ERR_INT_STATUS); + BNAD_GET_REG(DMA_ERR_INT_ENABLE); + BNAD_GET_REG(DMA_ERR_INT_STATUS_SET); + + /* APP Block Register Address Offset from BAR0 */ + BNAD_GET_REG(HOSTFN0_INT_STATUS); + BNAD_GET_REG(HOSTFN0_INT_MASK); + BNAD_GET_REG(HOST_PAGE_NUM_FN0); + BNAD_GET_REG(HOST_MSIX_ERR_INDEX_FN0); + BNAD_GET_REG(FN0_PCIE_ERR_REG); + BNAD_GET_REG(FN0_ERR_TYPE_STATUS_REG); + BNAD_GET_REG(FN0_ERR_TYPE_MSK_STATUS_REG); + + BNAD_GET_REG(HOSTFN1_INT_STATUS); + BNAD_GET_REG(HOSTFN1_INT_MASK); + BNAD_GET_REG(HOST_PAGE_NUM_FN1); + BNAD_GET_REG(HOST_MSIX_ERR_INDEX_FN1); + BNAD_GET_REG(FN1_PCIE_ERR_REG); + BNAD_GET_REG(FN1_ERR_TYPE_STATUS_REG); + BNAD_GET_REG(FN1_ERR_TYPE_MSK_STATUS_REG); + + BNAD_GET_REG(PCIE_MISC_REG); + + BNAD_GET_REG(HOST_SEM0_REG); + BNAD_GET_REG(HOST_SEM1_REG); + BNAD_GET_REG(HOST_SEM2_REG); + BNAD_GET_REG(HOST_SEM3_REG); + BNAD_GET_REG(HOST_SEM0_INFO_REG); + BNAD_GET_REG(HOST_SEM1_INFO_REG); + BNAD_GET_REG(HOST_SEM2_INFO_REG); + BNAD_GET_REG(HOST_SEM3_INFO_REG); + + BNAD_GET_REG(TEMPSENSE_CNTL_REG); + BNAD_GET_REG(TEMPSENSE_STAT_REG); + + BNAD_GET_REG(APP_LOCAL_ERR_STAT); + BNAD_GET_REG(APP_LOCAL_ERR_MSK); + + BNAD_GET_REG(PCIE_LNK_ERR_STAT); + BNAD_GET_REG(PCIE_LNK_ERR_MSK); + + BNAD_GET_REG(FCOE_FIP_ETH_TYPE); + BNAD_GET_REG(RESV_ETH_TYPE); + + BNAD_GET_REG(HOSTFN2_INT_STATUS); + BNAD_GET_REG(HOSTFN2_INT_MASK); + BNAD_GET_REG(HOST_PAGE_NUM_FN2); + BNAD_GET_REG(HOST_MSIX_ERR_INDEX_FN2); + BNAD_GET_REG(FN2_PCIE_ERR_REG); + BNAD_GET_REG(FN2_ERR_TYPE_STATUS_REG); + BNAD_GET_REG(FN2_ERR_TYPE_MSK_STATUS_REG); + + BNAD_GET_REG(HOSTFN3_INT_STATUS); + BNAD_GET_REG(HOSTFN3_INT_MASK); + BNAD_GET_REG(HOST_PAGE_NUM_FN3); + BNAD_GET_REG(HOST_MSIX_ERR_INDEX_FN3); + BNAD_GET_REG(FN3_PCIE_ERR_REG); + BNAD_GET_REG(FN3_ERR_TYPE_STATUS_REG); + BNAD_GET_REG(FN3_ERR_TYPE_MSK_STATUS_REG); + + /* Host Command Status Registers */ + reg_addr = HOST_CMDSTS0_CLR_REG; + for (i = 0; i < 16; i++) { + BNAD_GET_REG(reg_addr); + BNAD_GET_REG(reg_addr + 4); + BNAD_GET_REG(reg_addr + 8); + reg_addr += 0x10; + } + + /* Function ID register */ + BNAD_GET_REG(FNC_ID_REG); + + /* Function personality register */ + BNAD_GET_REG(FNC_PERS_REG); + + /* Operation mode register */ + BNAD_GET_REG(OP_MODE); + + /* LPU0 Registers */ + BNAD_GET_REG(LPU0_MBOX_CTL_REG); + BNAD_GET_REG(LPU0_MBOX_CMD_REG); + BNAD_GET_REG(LPU0_MBOX_LINK_0REG); + BNAD_GET_REG(LPU1_MBOX_LINK_0REG); + BNAD_GET_REG(LPU0_MBOX_STATUS_0REG); + BNAD_GET_REG(LPU1_MBOX_STATUS_0REG); + BNAD_GET_REG(LPU0_ERR_STATUS_REG); + BNAD_GET_REG(LPU0_ERR_SET_REG); + + /* LPU1 Registers */ + BNAD_GET_REG(LPU1_MBOX_CTL_REG); + BNAD_GET_REG(LPU1_MBOX_CMD_REG); + BNAD_GET_REG(LPU0_MBOX_LINK_1REG); + BNAD_GET_REG(LPU1_MBOX_LINK_1REG); + BNAD_GET_REG(LPU0_MBOX_STATUS_1REG); + BNAD_GET_REG(LPU1_MBOX_STATUS_1REG); + BNAD_GET_REG(LPU1_ERR_STATUS_REG); + BNAD_GET_REG(LPU1_ERR_SET_REG); + + /* PSS Registers */ + BNAD_GET_REG(PSS_CTL_REG); + BNAD_GET_REG(PSS_ERR_STATUS_REG); + BNAD_GET_REG(ERR_STATUS_SET); + BNAD_GET_REG(PSS_RAM_ERR_STATUS_REG); + + /* Catapult CPQ Registers */ + BNAD_GET_REG(HOSTFN0_LPU0_MBOX0_CMD_STAT); + BNAD_GET_REG(HOSTFN0_LPU1_MBOX0_CMD_STAT); + BNAD_GET_REG(LPU0_HOSTFN0_MBOX0_CMD_STAT); + BNAD_GET_REG(LPU1_HOSTFN0_MBOX0_CMD_STAT); + + BNAD_GET_REG(HOSTFN0_LPU0_MBOX1_CMD_STAT); + BNAD_GET_REG(HOSTFN0_LPU1_MBOX1_CMD_STAT); + BNAD_GET_REG(LPU0_HOSTFN0_MBOX1_CMD_STAT); + BNAD_GET_REG(LPU1_HOSTFN0_MBOX1_CMD_STAT); + + BNAD_GET_REG(HOSTFN1_LPU0_MBOX0_CMD_STAT); + BNAD_GET_REG(HOSTFN1_LPU1_MBOX0_CMD_STAT); + BNAD_GET_REG(LPU0_HOSTFN1_MBOX0_CMD_STAT); + BNAD_GET_REG(LPU1_HOSTFN1_MBOX0_CMD_STAT); + + BNAD_GET_REG(HOSTFN1_LPU0_MBOX1_CMD_STAT); + BNAD_GET_REG(HOSTFN1_LPU1_MBOX1_CMD_STAT); + BNAD_GET_REG(LPU0_HOSTFN1_MBOX1_CMD_STAT); + BNAD_GET_REG(LPU1_HOSTFN1_MBOX1_CMD_STAT); + + BNAD_GET_REG(HOSTFN2_LPU0_MBOX0_CMD_STAT); + BNAD_GET_REG(HOSTFN2_LPU1_MBOX0_CMD_STAT); + BNAD_GET_REG(LPU0_HOSTFN2_MBOX0_CMD_STAT); + BNAD_GET_REG(LPU1_HOSTFN2_MBOX0_CMD_STAT); + + BNAD_GET_REG(HOSTFN2_LPU0_MBOX1_CMD_STAT); + BNAD_GET_REG(HOSTFN2_LPU1_MBOX1_CMD_STAT); + BNAD_GET_REG(LPU0_HOSTFN2_MBOX1_CMD_STAT); + BNAD_GET_REG(LPU1_HOSTFN2_MBOX1_CMD_STAT); + + BNAD_GET_REG(HOSTFN3_LPU0_MBOX0_CMD_STAT); + BNAD_GET_REG(HOSTFN3_LPU1_MBOX0_CMD_STAT); + BNAD_GET_REG(LPU0_HOSTFN3_MBOX0_CMD_STAT); + BNAD_GET_REG(LPU1_HOSTFN3_MBOX0_CMD_STAT); + + BNAD_GET_REG(HOSTFN3_LPU0_MBOX1_CMD_STAT); + BNAD_GET_REG(HOSTFN3_LPU1_MBOX1_CMD_STAT); + BNAD_GET_REG(LPU0_HOSTFN3_MBOX1_CMD_STAT); + BNAD_GET_REG(LPU1_HOSTFN3_MBOX1_CMD_STAT); + + /* Host Function Force Parity Error Registers */ + BNAD_GET_REG(HOSTFN0_LPU_FORCE_PERR); + BNAD_GET_REG(HOSTFN1_LPU_FORCE_PERR); + BNAD_GET_REG(HOSTFN2_LPU_FORCE_PERR); + BNAD_GET_REG(HOSTFN3_LPU_FORCE_PERR); + + /* LL Port[0|1] Halt Mask Registers */ + BNAD_GET_REG(LL_HALT_MSK_P0); + BNAD_GET_REG(LL_HALT_MSK_P1); + + /* LL Port[0|1] Error Mask Registers */ + BNAD_GET_REG(LL_ERR_MSK_P0); + BNAD_GET_REG(LL_ERR_MSK_P1); + + /* EMC FLI Registers */ + BNAD_GET_REG(FLI_CMD_REG); + BNAD_GET_REG(FLI_ADDR_REG); + BNAD_GET_REG(FLI_CTL_REG); + BNAD_GET_REG(FLI_WRDATA_REG); + BNAD_GET_REG(FLI_RDDATA_REG); + BNAD_GET_REG(FLI_DEV_STATUS_REG); + BNAD_GET_REG(FLI_SIG_WD_REG); + + BNAD_GET_REG(FLI_DEV_VENDOR_REG); + BNAD_GET_REG(FLI_ERR_STATUS_REG); + + /* RxAdm 0 Registers */ + BNAD_GET_REG(RAD0_CTL_REG); + BNAD_GET_REG(RAD0_PE_PARM_REG); + BNAD_GET_REG(RAD0_BCN_REG); + BNAD_GET_REG(RAD0_DEFAULT_REG); + BNAD_GET_REG(RAD0_PROMISC_REG); + BNAD_GET_REG(RAD0_BCNQ_REG); + BNAD_GET_REG(RAD0_DEFAULTQ_REG); + + BNAD_GET_REG(RAD0_ERR_STS); + BNAD_GET_REG(RAD0_SET_ERR_STS); + BNAD_GET_REG(RAD0_ERR_INT_EN); + BNAD_GET_REG(RAD0_FIRST_ERR); + BNAD_GET_REG(RAD0_FORCE_ERR); + + BNAD_GET_REG(RAD0_MAC_MAN_1H); + BNAD_GET_REG(RAD0_MAC_MAN_1L); + BNAD_GET_REG(RAD0_MAC_MAN_2H); + BNAD_GET_REG(RAD0_MAC_MAN_2L); + BNAD_GET_REG(RAD0_MAC_MAN_3H); + BNAD_GET_REG(RAD0_MAC_MAN_3L); + BNAD_GET_REG(RAD0_MAC_MAN_4H); + BNAD_GET_REG(RAD0_MAC_MAN_4L); + + BNAD_GET_REG(RAD0_LAST4_IP); + + /* RxAdm 1 Registers */ + BNAD_GET_REG(RAD1_CTL_REG); + BNAD_GET_REG(RAD1_PE_PARM_REG); + BNAD_GET_REG(RAD1_BCN_REG); + BNAD_GET_REG(RAD1_DEFAULT_REG); + BNAD_GET_REG(RAD1_PROMISC_REG); + BNAD_GET_REG(RAD1_BCNQ_REG); + BNAD_GET_REG(RAD1_DEFAULTQ_REG); + + BNAD_GET_REG(RAD1_ERR_STS); + BNAD_GET_REG(RAD1_SET_ERR_STS); + BNAD_GET_REG(RAD1_ERR_INT_EN); + + /* TxA0 Registers */ + BNAD_GET_REG(TXA0_CTRL_REG); + /* TxA0 TSO Sequence # Registers (RO) */ + for (i = 0; i < 8; i++) { + BNAD_GET_REG(TXA0_TSO_TCP_SEQ_REG(i)); + BNAD_GET_REG(TXA0_TSO_IP_INFO_REG(i)); + } + + /* TxA1 Registers */ + BNAD_GET_REG(TXA1_CTRL_REG); + /* TxA1 TSO Sequence # Registers (RO) */ + for (i = 0; i < 8; i++) { + BNAD_GET_REG(TXA1_TSO_TCP_SEQ_REG(i)); + BNAD_GET_REG(TXA1_TSO_IP_INFO_REG(i)); + } + + /* RxA Registers */ + BNAD_GET_REG(RXA0_CTL_REG); + BNAD_GET_REG(RXA1_CTL_REG); + + /* PLB0 Registers */ + BNAD_GET_REG(PLB0_ECM_TIMER_REG); + BNAD_GET_REG(PLB0_RL_CTL); + for (i = 0; i < 8; i++) + BNAD_GET_REG(PLB0_RL_MAX_BC(i)); + BNAD_GET_REG(PLB0_RL_TU_PRIO); + for (i = 0; i < 8; i++) + BNAD_GET_REG(PLB0_RL_BYTE_CNT(i)); + BNAD_GET_REG(PLB0_RL_MIN_REG); + BNAD_GET_REG(PLB0_RL_MAX_REG); + BNAD_GET_REG(PLB0_EMS_ADD_REG); + + /* PLB1 Registers */ + BNAD_GET_REG(PLB1_ECM_TIMER_REG); + BNAD_GET_REG(PLB1_RL_CTL); + for (i = 0; i < 8; i++) + BNAD_GET_REG(PLB1_RL_MAX_BC(i)); + BNAD_GET_REG(PLB1_RL_TU_PRIO); + for (i = 0; i < 8; i++) + BNAD_GET_REG(PLB1_RL_BYTE_CNT(i)); + BNAD_GET_REG(PLB1_RL_MIN_REG); + BNAD_GET_REG(PLB1_RL_MAX_REG); + BNAD_GET_REG(PLB1_EMS_ADD_REG); + + /* HQM Control Register */ + BNAD_GET_REG(HQM0_CTL_REG); + BNAD_GET_REG(HQM0_RXQ_STOP_SEM); + BNAD_GET_REG(HQM0_TXQ_STOP_SEM); + BNAD_GET_REG(HQM1_CTL_REG); + BNAD_GET_REG(HQM1_RXQ_STOP_SEM); + BNAD_GET_REG(HQM1_TXQ_STOP_SEM); + + /* LUT Registers */ + BNAD_GET_REG(LUT0_ERR_STS); + BNAD_GET_REG(LUT0_SET_ERR_STS); + BNAD_GET_REG(LUT1_ERR_STS); + BNAD_GET_REG(LUT1_SET_ERR_STS); + + /* TRC Registers */ + BNAD_GET_REG(TRC_CTL_REG); + BNAD_GET_REG(TRC_MODS_REG); + BNAD_GET_REG(TRC_TRGC_REG); + BNAD_GET_REG(TRC_CNT1_REG); + BNAD_GET_REG(TRC_CNT2_REG); + BNAD_GET_REG(TRC_NXTS_REG); + BNAD_GET_REG(TRC_DIRR_REG); + for (i = 0; i < 10; i++) + BNAD_GET_REG(TRC_TRGM_REG(i)); + for (i = 0; i < 10; i++) + BNAD_GET_REG(TRC_NXTM_REG(i)); + for (i = 0; i < 10; i++) + BNAD_GET_REG(TRC_STRM_REG(i)); + + spin_unlock_irqrestore(&bnad->bna_lock, flags); +#undef BNAD_GET_REG + return num; +} +static int +bnad_get_regs_len(struct net_device *netdev) +{ + int ret = get_regs(netdev_priv(netdev), NULL) * sizeof(u32); + return ret; +} + +static void +bnad_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *buf) +{ + memset(buf, 0, bnad_get_regs_len(netdev)); + get_regs(netdev_priv(netdev), buf); +} + +static void +bnad_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wolinfo) +{ + wolinfo->supported = 0; + wolinfo->wolopts = 0; +} + +static int +bnad_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *coalesce) +{ + struct bnad *bnad = netdev_priv(netdev); + unsigned long flags; + + /* Lock rqd. to access bnad->bna_lock */ + spin_lock_irqsave(&bnad->bna_lock, flags); + coalesce->use_adaptive_rx_coalesce = + (bnad->cfg_flags & BNAD_CF_DIM_ENABLED) ? true : false; + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + coalesce->rx_coalesce_usecs = bnad->rx_coalescing_timeo * + BFI_COALESCING_TIMER_UNIT; + coalesce->tx_coalesce_usecs = bnad->tx_coalescing_timeo * + BFI_COALESCING_TIMER_UNIT; + coalesce->tx_max_coalesced_frames = BFI_TX_INTERPKT_COUNT; + + return 0; +} + +static int +bnad_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *coalesce) +{ + struct bnad *bnad = netdev_priv(netdev); + unsigned long flags; + int dim_timer_del = 0; + + if (coalesce->rx_coalesce_usecs == 0 || + coalesce->rx_coalesce_usecs > + BFI_MAX_COALESCING_TIMEO * BFI_COALESCING_TIMER_UNIT) + return -EINVAL; + + if (coalesce->tx_coalesce_usecs == 0 || + coalesce->tx_coalesce_usecs > + BFI_MAX_COALESCING_TIMEO * BFI_COALESCING_TIMER_UNIT) + return -EINVAL; + + mutex_lock(&bnad->conf_mutex); + /* + * Do not need to store rx_coalesce_usecs here + * Every time DIM is disabled, we can get it from the + * stack. + */ + spin_lock_irqsave(&bnad->bna_lock, flags); + if (coalesce->use_adaptive_rx_coalesce) { + if (!(bnad->cfg_flags & BNAD_CF_DIM_ENABLED)) { + bnad->cfg_flags |= BNAD_CF_DIM_ENABLED; + bnad_dim_timer_start(bnad); + } + } else { + if (bnad->cfg_flags & BNAD_CF_DIM_ENABLED) { + bnad->cfg_flags &= ~BNAD_CF_DIM_ENABLED; + dim_timer_del = bnad_dim_timer_running(bnad); + if (dim_timer_del) { + clear_bit(BNAD_RF_DIM_TIMER_RUNNING, + &bnad->run_flags); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + del_timer_sync(&bnad->dim_timer); + spin_lock_irqsave(&bnad->bna_lock, flags); + } + bnad_rx_coalescing_timeo_set(bnad); + } + } + if (bnad->tx_coalescing_timeo != coalesce->tx_coalesce_usecs / + BFI_COALESCING_TIMER_UNIT) { + bnad->tx_coalescing_timeo = coalesce->tx_coalesce_usecs / + BFI_COALESCING_TIMER_UNIT; + bnad_tx_coalescing_timeo_set(bnad); + } + + if (bnad->rx_coalescing_timeo != coalesce->rx_coalesce_usecs / + BFI_COALESCING_TIMER_UNIT) { + bnad->rx_coalescing_timeo = coalesce->rx_coalesce_usecs / + BFI_COALESCING_TIMER_UNIT; + + if (!(bnad->cfg_flags & BNAD_CF_DIM_ENABLED)) + bnad_rx_coalescing_timeo_set(bnad); + + } + + /* Add Tx Inter-pkt DMA count? */ + + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + mutex_unlock(&bnad->conf_mutex); + return 0; +} + +static void +bnad_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ringparam) +{ + struct bnad *bnad = netdev_priv(netdev); + + ringparam->rx_max_pending = BNAD_MAX_Q_DEPTH / bnad_rxqs_per_cq; + ringparam->rx_mini_max_pending = 0; + ringparam->rx_jumbo_max_pending = 0; + ringparam->tx_max_pending = BNAD_MAX_Q_DEPTH; + + ringparam->rx_pending = bnad->rxq_depth; + ringparam->rx_mini_max_pending = 0; + ringparam->rx_jumbo_max_pending = 0; + ringparam->tx_pending = bnad->txq_depth; +} + +static int +bnad_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ringparam) +{ + int i, current_err, err = 0; + struct bnad *bnad = netdev_priv(netdev); + + mutex_lock(&bnad->conf_mutex); + if (ringparam->rx_pending == bnad->rxq_depth && + ringparam->tx_pending == bnad->txq_depth) { + mutex_unlock(&bnad->conf_mutex); + return 0; + } + + if (ringparam->rx_pending < BNAD_MIN_Q_DEPTH || + ringparam->rx_pending > BNAD_MAX_Q_DEPTH / bnad_rxqs_per_cq || + !BNA_POWER_OF_2(ringparam->rx_pending)) { + mutex_unlock(&bnad->conf_mutex); + return -EINVAL; + } + if (ringparam->tx_pending < BNAD_MIN_Q_DEPTH || + ringparam->tx_pending > BNAD_MAX_Q_DEPTH || + !BNA_POWER_OF_2(ringparam->tx_pending)) { + mutex_unlock(&bnad->conf_mutex); + return -EINVAL; + } + + if (ringparam->rx_pending != bnad->rxq_depth) { + bnad->rxq_depth = ringparam->rx_pending; + for (i = 0; i < bnad->num_rx; i++) { + if (!bnad->rx_info[i].rx) + continue; + bnad_cleanup_rx(bnad, i); + current_err = bnad_setup_rx(bnad, i); + if (current_err && !err) + err = current_err; + } + } + if (ringparam->tx_pending != bnad->txq_depth) { + bnad->txq_depth = ringparam->tx_pending; + for (i = 0; i < bnad->num_tx; i++) { + if (!bnad->tx_info[i].tx) + continue; + bnad_cleanup_tx(bnad, i); + current_err = bnad_setup_tx(bnad, i); + if (current_err && !err) + err = current_err; + } + } + + mutex_unlock(&bnad->conf_mutex); + return err; +} + +static void +bnad_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct bnad *bnad = netdev_priv(netdev); + + pauseparam->autoneg = 0; + pauseparam->rx_pause = bnad->bna.port.pause_config.rx_pause; + pauseparam->tx_pause = bnad->bna.port.pause_config.tx_pause; +} + +static int +bnad_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct bnad *bnad = netdev_priv(netdev); + struct bna_pause_config pause_config; + unsigned long flags; + + if (pauseparam->autoneg == AUTONEG_ENABLE) + return -EINVAL; + + mutex_lock(&bnad->conf_mutex); + if (pauseparam->rx_pause != bnad->bna.port.pause_config.rx_pause || + pauseparam->tx_pause != bnad->bna.port.pause_config.tx_pause) { + pause_config.rx_pause = pauseparam->rx_pause; + pause_config.tx_pause = pauseparam->tx_pause; + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_port_pause_config(&bnad->bna.port, &pause_config, NULL); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + } + mutex_unlock(&bnad->conf_mutex); + return 0; +} + +static u32 +bnad_get_rx_csum(struct net_device *netdev) +{ + u32 rx_csum; + struct bnad *bnad = netdev_priv(netdev); + + rx_csum = bnad->rx_csum; + return rx_csum; +} + +static int +bnad_set_rx_csum(struct net_device *netdev, u32 rx_csum) +{ + struct bnad *bnad = netdev_priv(netdev); + + mutex_lock(&bnad->conf_mutex); + bnad->rx_csum = rx_csum; + mutex_unlock(&bnad->conf_mutex); + return 0; +} + +static int +bnad_set_tx_csum(struct net_device *netdev, u32 tx_csum) +{ + struct bnad *bnad = netdev_priv(netdev); + + mutex_lock(&bnad->conf_mutex); + if (tx_csum) { + netdev->features |= NETIF_F_IP_CSUM; + netdev->features |= NETIF_F_IPV6_CSUM; + } else { + netdev->features &= ~NETIF_F_IP_CSUM; + netdev->features &= ~NETIF_F_IPV6_CSUM; + } + mutex_unlock(&bnad->conf_mutex); + return 0; +} + +static int +bnad_set_tso(struct net_device *netdev, u32 tso) +{ + struct bnad *bnad = netdev_priv(netdev); + + mutex_lock(&bnad->conf_mutex); + if (tso) { + netdev->features |= NETIF_F_TSO; + netdev->features |= NETIF_F_TSO6; + } else { + netdev->features &= ~NETIF_F_TSO; + netdev->features &= ~NETIF_F_TSO6; + } + mutex_unlock(&bnad->conf_mutex); + return 0; +} + +static void +bnad_get_strings(struct net_device *netdev, u32 stringset, u8 * string) +{ + struct bnad *bnad = netdev_priv(netdev); + int i, j, q_num; + u64 bmap; + + mutex_lock(&bnad->conf_mutex); + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < BNAD_ETHTOOL_STATS_NUM; i++) { + BUG_ON(!(strlen(bnad_net_stats_strings[i]) < + ETH_GSTRING_LEN)); + memcpy(string, bnad_net_stats_strings[i], + ETH_GSTRING_LEN); + string += ETH_GSTRING_LEN; + } + bmap = (u64)bnad->bna.tx_mod.txf_bmap[0] | + ((u64)bnad->bna.tx_mod.txf_bmap[1] << 32); + for (i = 0; bmap && (i < BFI_LL_TXF_ID_MAX); i++) { + if (bmap & 1) { + sprintf(string, "txf%d_ucast_octets", i); + string += ETH_GSTRING_LEN; + sprintf(string, "txf%d_ucast", i); + string += ETH_GSTRING_LEN; + sprintf(string, "txf%d_ucast_vlan", i); + string += ETH_GSTRING_LEN; + sprintf(string, "txf%d_mcast_octets", i); + string += ETH_GSTRING_LEN; + sprintf(string, "txf%d_mcast", i); + string += ETH_GSTRING_LEN; + sprintf(string, "txf%d_mcast_vlan", i); + string += ETH_GSTRING_LEN; + sprintf(string, "txf%d_bcast_octets", i); + string += ETH_GSTRING_LEN; + sprintf(string, "txf%d_bcast", i); + string += ETH_GSTRING_LEN; + sprintf(string, "txf%d_bcast_vlan", i); + string += ETH_GSTRING_LEN; + sprintf(string, "txf%d_errors", i); + string += ETH_GSTRING_LEN; + sprintf(string, "txf%d_filter_vlan", i); + string += ETH_GSTRING_LEN; + sprintf(string, "txf%d_filter_mac_sa", i); + string += ETH_GSTRING_LEN; + } + bmap >>= 1; + } + + bmap = (u64)bnad->bna.rx_mod.rxf_bmap[0] | + ((u64)bnad->bna.rx_mod.rxf_bmap[1] << 32); + for (i = 0; bmap && (i < BFI_LL_RXF_ID_MAX); i++) { + if (bmap & 1) { + sprintf(string, "rxf%d_ucast_octets", i); + string += ETH_GSTRING_LEN; + sprintf(string, "rxf%d_ucast", i); + string += ETH_GSTRING_LEN; + sprintf(string, "rxf%d_ucast_vlan", i); + string += ETH_GSTRING_LEN; + sprintf(string, "rxf%d_mcast_octets", i); + string += ETH_GSTRING_LEN; + sprintf(string, "rxf%d_mcast", i); + string += ETH_GSTRING_LEN; + sprintf(string, "rxf%d_mcast_vlan", i); + string += ETH_GSTRING_LEN; + sprintf(string, "rxf%d_bcast_octets", i); + string += ETH_GSTRING_LEN; + sprintf(string, "rxf%d_bcast", i); + string += ETH_GSTRING_LEN; + sprintf(string, "rxf%d_bcast_vlan", i); + string += ETH_GSTRING_LEN; + sprintf(string, "rxf%d_frame_drops", i); + string += ETH_GSTRING_LEN; + } + bmap >>= 1; + } + + q_num = 0; + for (i = 0; i < bnad->num_rx; i++) { + if (!bnad->rx_info[i].rx) + continue; + for (j = 0; j < bnad->num_rxp_per_rx; j++) { + sprintf(string, "cq%d_producer_index", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "cq%d_consumer_index", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "cq%d_hw_producer_index", + q_num); + string += ETH_GSTRING_LEN; + q_num++; + } + } + + q_num = 0; + for (i = 0; i < bnad->num_rx; i++) { + if (!bnad->rx_info[i].rx) + continue; + for (j = 0; j < bnad->num_rxp_per_rx; j++) { + sprintf(string, "rxq%d_packets", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_bytes", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_packets_with_error", + q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_allocbuf_failed", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_producer_index", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_consumer_index", q_num); + string += ETH_GSTRING_LEN; + q_num++; + if (bnad->rx_info[i].rx_ctrl[j].ccb && + bnad->rx_info[i].rx_ctrl[j].ccb-> + rcb[1] && + bnad->rx_info[i].rx_ctrl[j].ccb-> + rcb[1]->rxq) { + sprintf(string, "rxq%d_packets", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_bytes", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, + "rxq%d_packets_with_error", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_allocbuf_failed", + q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_producer_index", + q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_consumer_index", + q_num); + string += ETH_GSTRING_LEN; + q_num++; + } + } + } + + q_num = 0; + for (i = 0; i < bnad->num_tx; i++) { + if (!bnad->tx_info[i].tx) + continue; + for (j = 0; j < bnad->num_txq_per_tx; j++) { + sprintf(string, "txq%d_packets", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "txq%d_bytes", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "txq%d_producer_index", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "txq%d_consumer_index", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "txq%d_hw_consumer_index", + q_num); + string += ETH_GSTRING_LEN; + q_num++; + } + } + + break; + + default: + break; + } + + mutex_unlock(&bnad->conf_mutex); +} + +static int +bnad_get_stats_count_locked(struct net_device *netdev) +{ + struct bnad *bnad = netdev_priv(netdev); + int i, j, count, rxf_active_num = 0, txf_active_num = 0; + u64 bmap; + + bmap = (u64)bnad->bna.tx_mod.txf_bmap[0] | + ((u64)bnad->bna.tx_mod.txf_bmap[1] << 32); + for (i = 0; bmap && (i < BFI_LL_TXF_ID_MAX); i++) { + if (bmap & 1) + txf_active_num++; + bmap >>= 1; + } + bmap = (u64)bnad->bna.rx_mod.rxf_bmap[0] | + ((u64)bnad->bna.rx_mod.rxf_bmap[1] << 32); + for (i = 0; bmap && (i < BFI_LL_RXF_ID_MAX); i++) { + if (bmap & 1) + rxf_active_num++; + bmap >>= 1; + } + count = BNAD_ETHTOOL_STATS_NUM + + txf_active_num * BNAD_NUM_TXF_COUNTERS + + rxf_active_num * BNAD_NUM_RXF_COUNTERS; + + for (i = 0; i < bnad->num_rx; i++) { + if (!bnad->rx_info[i].rx) + continue; + count += bnad->num_rxp_per_rx * BNAD_NUM_CQ_COUNTERS; + count += bnad->num_rxp_per_rx * BNAD_NUM_RXQ_COUNTERS; + for (j = 0; j < bnad->num_rxp_per_rx; j++) + if (bnad->rx_info[i].rx_ctrl[j].ccb && + bnad->rx_info[i].rx_ctrl[j].ccb->rcb[1] && + bnad->rx_info[i].rx_ctrl[j].ccb->rcb[1]->rxq) + count += BNAD_NUM_RXQ_COUNTERS; + } + + for (i = 0; i < bnad->num_tx; i++) { + if (!bnad->tx_info[i].tx) + continue; + count += bnad->num_txq_per_tx * BNAD_NUM_TXQ_COUNTERS; + } + return count; +} + +static int +bnad_per_q_stats_fill(struct bnad *bnad, u64 *buf, int bi) +{ + int i, j; + struct bna_rcb *rcb = NULL; + struct bna_tcb *tcb = NULL; + + for (i = 0; i < bnad->num_rx; i++) { + if (!bnad->rx_info[i].rx) + continue; + for (j = 0; j < bnad->num_rxp_per_rx; j++) + if (bnad->rx_info[i].rx_ctrl[j].ccb && + bnad->rx_info[i].rx_ctrl[j].ccb->rcb[0] && + bnad->rx_info[i].rx_ctrl[j].ccb->rcb[0]->rxq) { + buf[bi++] = bnad->rx_info[i].rx_ctrl[j]. + ccb->producer_index; + buf[bi++] = 0; /* ccb->consumer_index */ + buf[bi++] = *(bnad->rx_info[i].rx_ctrl[j]. + ccb->hw_producer_index); + } + } + for (i = 0; i < bnad->num_rx; i++) { + if (!bnad->rx_info[i].rx) + continue; + for (j = 0; j < bnad->num_rxp_per_rx; j++) + if (bnad->rx_info[i].rx_ctrl[j].ccb) { + if (bnad->rx_info[i].rx_ctrl[j].ccb->rcb[0] && + bnad->rx_info[i].rx_ctrl[j].ccb-> + rcb[0]->rxq) { + rcb = bnad->rx_info[i].rx_ctrl[j]. + ccb->rcb[0]; + buf[bi++] = rcb->rxq->rx_packets; + buf[bi++] = rcb->rxq->rx_bytes; + buf[bi++] = rcb->rxq-> + rx_packets_with_error; + buf[bi++] = rcb->rxq-> + rxbuf_alloc_failed; + buf[bi++] = rcb->producer_index; + buf[bi++] = rcb->consumer_index; + } + if (bnad->rx_info[i].rx_ctrl[j].ccb->rcb[1] && + bnad->rx_info[i].rx_ctrl[j].ccb-> + rcb[1]->rxq) { + rcb = bnad->rx_info[i].rx_ctrl[j]. + ccb->rcb[1]; + buf[bi++] = rcb->rxq->rx_packets; + buf[bi++] = rcb->rxq->rx_bytes; + buf[bi++] = rcb->rxq-> + rx_packets_with_error; + buf[bi++] = rcb->rxq-> + rxbuf_alloc_failed; + buf[bi++] = rcb->producer_index; + buf[bi++] = rcb->consumer_index; + } + } + } + + for (i = 0; i < bnad->num_tx; i++) { + if (!bnad->tx_info[i].tx) + continue; + for (j = 0; j < bnad->num_txq_per_tx; j++) + if (bnad->tx_info[i].tcb[j] && + bnad->tx_info[i].tcb[j]->txq) { + tcb = bnad->tx_info[i].tcb[j]; + buf[bi++] = tcb->txq->tx_packets; + buf[bi++] = tcb->txq->tx_bytes; + buf[bi++] = tcb->producer_index; + buf[bi++] = tcb->consumer_index; + buf[bi++] = *(tcb->hw_consumer_index); + } + } + + return bi; +} + +static void +bnad_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, + u64 *buf) +{ + struct bnad *bnad = netdev_priv(netdev); + int i, j, bi; + unsigned long *net_stats, flags; + u64 *stats64; + u64 bmap; + + mutex_lock(&bnad->conf_mutex); + if (bnad_get_stats_count_locked(netdev) != stats->n_stats) { + mutex_unlock(&bnad->conf_mutex); + return; + } + + /* + * Used bna_lock to sync reads from bna_stats, which is written + * under the same lock + */ + spin_lock_irqsave(&bnad->bna_lock, flags); + bi = 0; + memset(buf, 0, stats->n_stats * sizeof(u64)); + memset(&bnad->net_stats, 0, sizeof(struct net_device_stats)); + + bnad_netdev_qstats_fill(bnad); + bnad_netdev_hwstats_fill(bnad); + + /* Fill net_stats into ethtool buffers */ + net_stats = (unsigned long *)&bnad->net_stats; + for (i = 0; i < sizeof(struct net_device_stats) / sizeof(unsigned long); + i++) + buf[bi++] = net_stats[i]; + + /* Fill driver stats into ethtool buffers */ + stats64 = (u64 *)&bnad->stats.drv_stats; + for (i = 0; i < sizeof(struct bnad_drv_stats) / sizeof(u64); i++) + buf[bi++] = stats64[i]; + + /* Fill hardware stats excluding the rxf/txf into ethtool bufs */ + stats64 = (u64 *) bnad->stats.bna_stats->hw_stats; + for (i = 0; + i < offsetof(struct bfi_ll_stats, rxf_stats[0]) / sizeof(u64); + i++) + buf[bi++] = stats64[i]; + + /* Fill txf stats into ethtool buffers */ + bmap = (u64)bnad->bna.tx_mod.txf_bmap[0] | + ((u64)bnad->bna.tx_mod.txf_bmap[1] << 32); + for (i = 0; bmap && (i < BFI_LL_TXF_ID_MAX); i++) { + if (bmap & 1) { + stats64 = (u64 *)&bnad->stats.bna_stats-> + hw_stats->txf_stats[i]; + for (j = 0; j < sizeof(struct bfi_ll_stats_txf) / + sizeof(u64); j++) + buf[bi++] = stats64[j]; + } + bmap >>= 1; + } + + /* Fill rxf stats into ethtool buffers */ + bmap = (u64)bnad->bna.rx_mod.rxf_bmap[0] | + ((u64)bnad->bna.rx_mod.rxf_bmap[1] << 32); + for (i = 0; bmap && (i < BFI_LL_RXF_ID_MAX); i++) { + if (bmap & 1) { + stats64 = (u64 *)&bnad->stats.bna_stats-> + hw_stats->rxf_stats[i]; + for (j = 0; j < sizeof(struct bfi_ll_stats_rxf) / + sizeof(u64); j++) + buf[bi++] = stats64[j]; + } + bmap >>= 1; + } + + /* Fill per Q stats into ethtool buffers */ + bi = bnad_per_q_stats_fill(bnad, buf, bi); + + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + mutex_unlock(&bnad->conf_mutex); +} + +static int +bnad_get_sset_count(struct net_device *netdev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return bnad_get_stats_count_locked(netdev); + default: + return -EOPNOTSUPP; + } +} + +static struct ethtool_ops bnad_ethtool_ops = { + .get_settings = bnad_get_settings, + .set_settings = bnad_set_settings, + .get_drvinfo = bnad_get_drvinfo, + .get_regs_len = bnad_get_regs_len, + .get_regs = bnad_get_regs, + .get_wol = bnad_get_wol, + .get_link = ethtool_op_get_link, + .get_coalesce = bnad_get_coalesce, + .set_coalesce = bnad_set_coalesce, + .get_ringparam = bnad_get_ringparam, + .set_ringparam = bnad_set_ringparam, + .get_pauseparam = bnad_get_pauseparam, + .set_pauseparam = bnad_set_pauseparam, + .get_rx_csum = bnad_get_rx_csum, + .set_rx_csum = bnad_set_rx_csum, + .get_tx_csum = ethtool_op_get_tx_csum, + .set_tx_csum = bnad_set_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = ethtool_op_set_sg, + .get_tso = ethtool_op_get_tso, + .set_tso = bnad_set_tso, + .get_flags = ethtool_op_get_flags, + .set_flags = ethtool_op_set_flags, + .get_strings = bnad_get_strings, + .get_ethtool_stats = bnad_get_ethtool_stats, + .get_sset_count = bnad_get_sset_count +}; + +void +bnad_set_ethtool_ops(struct net_device *netdev) +{ + SET_ETHTOOL_OPS(netdev, &bnad_ethtool_ops); +} diff --git a/drivers/net/bna/cna.h b/drivers/net/bna/cna.h new file mode 100644 index 000000000000..bbd39dc65972 --- /dev/null +++ b/drivers/net/bna/cna.h @@ -0,0 +1,81 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2006-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +#ifndef __CNA_H__ +#define __CNA_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define bfa_sm_fault(__mod, __event) do { \ + pr_err("SM Assertion failure: %s: %d: event = %d", __FILE__, __LINE__, \ + __event); \ +} while (0) + +extern char bfa_version[]; + +#define CNA_FW_FILE_CT "ctfw_cna.bin" +#define FC_SYMNAME_MAX 256 /*!< max name server symbolic name size */ + +#pragma pack(1) + +#define MAC_ADDRLEN (6) +typedef struct mac { u8 mac[MAC_ADDRLEN]; } mac_t; + +#pragma pack() + +#define bfa_q_first(_q) ((void *)(((struct list_head *) (_q))->next)) +#define bfa_q_next(_qe) (((struct list_head *) (_qe))->next) +#define bfa_q_prev(_qe) (((struct list_head *) (_qe))->prev) + +/* + * bfa_q_qe_init - to initialize a queue element + */ +#define bfa_q_qe_init(_qe) { \ + bfa_q_next(_qe) = (struct list_head *) NULL; \ + bfa_q_prev(_qe) = (struct list_head *) NULL; \ +} + +/* + * bfa_q_deq - dequeue an element from head of the queue + */ +#define bfa_q_deq(_q, _qe) { \ + if (!list_empty(_q)) { \ + (*((struct list_head **) (_qe))) = bfa_q_next(_q); \ + bfa_q_prev(bfa_q_next(*((struct list_head **) _qe))) = \ + (struct list_head *) (_q); \ + bfa_q_next(_q) = bfa_q_next(*((struct list_head **) _qe)); \ + bfa_q_qe_init(*((struct list_head **) _qe)); \ + } else { \ + *((struct list_head **) (_qe)) = (struct list_head *) NULL; \ + } \ +} + +#endif /* __CNA_H__ */ diff --git a/drivers/net/bna/cna_fwimg.c b/drivers/net/bna/cna_fwimg.c new file mode 100644 index 000000000000..0bd1d3790a27 --- /dev/null +++ b/drivers/net/bna/cna_fwimg.c @@ -0,0 +1,64 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ +#include +#include "cna.h" + +const struct firmware *bfi_fw; +static u32 *bfi_image_ct_cna; +static u32 bfi_image_ct_cna_size; + +u32 * +cna_read_firmware(struct pci_dev *pdev, u32 **bfi_image, + u32 *bfi_image_size, char *fw_name) +{ + const struct firmware *fw; + + if (request_firmware(&fw, fw_name, &pdev->dev)) { + pr_alert("Can't locate firmware %s\n", fw_name); + goto error; + } + + *bfi_image = (u32 *)fw->data; + *bfi_image_size = fw->size/sizeof(u32); + bfi_fw = fw; + + return *bfi_image; +error: + return NULL; +} + +u32 * +cna_get_firmware_buf(struct pci_dev *pdev) +{ + if (bfi_image_ct_cna_size == 0) + cna_read_firmware(pdev, &bfi_image_ct_cna, + &bfi_image_ct_cna_size, CNA_FW_FILE_CT); + return bfi_image_ct_cna; +} + +u32 * +bfa_cb_image_get_chunk(int type, u32 off) +{ + return (u32 *)(bfi_image_ct_cna + off); +} + +u32 +bfa_cb_image_get_size(int type) +{ + return bfi_image_ct_cna_size; +} diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f6a3b2d36cad..1f730de0df06 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2189,6 +2189,9 @@ #define PCI_VENDOR_ID_ARIMA 0x161f #define PCI_VENDOR_ID_BROCADE 0x1657 +#define PCI_DEVICE_ID_BROCADE_CT 0x0014 +#define PCI_DEVICE_ID_BROCADE_FC_8G1P 0x0017 +#define PCI_DEVICE_ID_BROCADE_CT_FC 0x0021 #define PCI_VENDOR_ID_SIBYTE 0x166d #define PCI_DEVICE_ID_BCM1250_PCI 0x0001 -- cgit v1.2.3 From 1726442e115a9e58f40747d009a5b4f303e0840a Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 23 Aug 2010 16:26:41 +0000 Subject: net: increase the size of priv_flags and add IFF_OVS_DATAPATH IFF_OVS_DATAPATH is a place-holder for the Open vSwitch datapath which I am preparing to submit for merging. As all 16 bits of priv_flags are already assigned flags, also increase the size of priv_flags to 32 bits. Unfortunately, by my calculations this increases the size of struct net_device by 4 bytes on 32bit architectures and 8 bytes on 64 bit architectures. I couldn't see an obvious way to avoid that. Cc: Jesse Gross Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/if.h | 2 ++ include/linux/netdevice.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/if.h b/include/linux/if.h index 53558ec59e1b..6ed43c1f07ab 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -75,6 +75,8 @@ #define IFF_DISABLE_NETPOLL 0x2000 /* disable netpoll at run-time */ #define IFF_MACVLAN_PORT 0x4000 /* device used as macvlan port */ #define IFF_BRIDGE_PORT 0x8000 /* device used as bridge port */ +#define IFF_OVS_DATAPATH 0x10000 /* device used as Open vSwitch + * dapath port */ #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ce2de8b64083..59962dbc2758 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -901,7 +901,7 @@ struct net_device { unsigned int flags; /* interface flags (a la BSD) */ unsigned short gflags; - unsigned short priv_flags; /* Like 'flags' but invisible to userspace. */ + unsigned int priv_flags; /* Like 'flags' but invisible to userspace. */ unsigned short padded; /* How much padding added by alloc_netdev() */ unsigned char operstate; /* RFC2863 operstate */ -- cgit v1.2.3 From 633adf1ad1c92c02bd3f10bbd73737a969179378 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 12 Aug 2010 14:49:58 +0200 Subject: cfg80211: mark ieee80211_hdrlen const This function analyses only its single, value-passed argument, and has no side effects. Thus it can be const, which makes mac80211 smaller, for example: text data bss dec hex filename 362518 16720 884 380122 5ccda mac80211.ko (before) 362358 16720 884 379962 5cc3a mac80211.ko (after) a 160 byte saving in text size, and an optimisation because the function won't be called as often. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 2 +- net/wireless/util.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2fd06c60ffbb..2b403c7ee32e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1715,7 +1715,7 @@ unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb); * ieee80211_hdrlen - get header length in bytes from frame control * @fc: frame control field in little-endian format */ -unsigned int ieee80211_hdrlen(__le16 fc); +unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc); /** * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3 diff --git a/net/wireless/util.c b/net/wireless/util.c index 0c8a1e8b7690..1eb24162be61 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -221,7 +221,7 @@ const unsigned char bridge_tunnel_header[] __aligned(2) = { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; EXPORT_SYMBOL(bridge_tunnel_header); -unsigned int ieee80211_hdrlen(__le16 fc) +unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc) { unsigned int hdrlen = 24; -- cgit v1.2.3 From 2e161f78e5f63a7f9fd25a766bb7f816a01eb14a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 12 Aug 2010 15:38:38 +0200 Subject: cfg80211/mac80211: extensible frame processing Allow userspace to register for more than just action frames by giving the frame subtype, and make it possible to use this in various modes as well. With some tweaks and some added functionality this will, in the future, also be usable in AP mode and be able to replace the cooked monitor interface currently used in that case. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 93 +++++++++++++++++++++++------ include/net/cfg80211.h | 56 +++++++++++------- net/mac80211/cfg.c | 12 ++-- net/mac80211/ieee80211_i.h | 1 + net/mac80211/iface.c | 6 +- net/mac80211/main.c | 37 ++++++++++++ net/mac80211/rx.c | 137 +++++++++++++++++++++++++++++------------- net/mac80211/status.c | 2 +- net/mac80211/util.c | 6 +- net/wireless/core.c | 8 +-- net/wireless/core.h | 21 +++---- net/wireless/mlme.c | 144 +++++++++++++++++++++++++++++---------------- net/wireless/nl80211.c | 108 +++++++++++++++++++++++++--------- net/wireless/nl80211.h | 14 ++--- net/wireless/util.c | 2 +- 15 files changed, 452 insertions(+), 195 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 2c8701687336..8af1e66c3cf9 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -39,6 +39,43 @@ * TODO: need more info? */ +/** + * DOC: Frame transmission/registration support + * + * Frame transmission and registration support exists to allow userspace + * management entities such as wpa_supplicant react to management frames + * that are not being handled by the kernel. This includes, for example, + * certain classes of action frames that cannot be handled in the kernel + * for various reasons. + * + * Frame registration is done on a per-interface basis and registrations + * cannot be removed other than by closing the socket. It is possible to + * specify a registration filter to register, for example, only for a + * certain type of action frame. In particular with action frames, those + * that userspace registers for will not be returned as unhandled by the + * driver, so that the registered application has to take responsibility + * for doing that. + * + * The type of frame that can be registered for is also dependent on the + * driver and interface type. The frame types are advertised in wiphy + * attributes so applications know what to expect. + * + * NOTE: When an interface changes type while registrations are active, + * these registrations are ignored until the interface type is + * changed again. This means that changing the interface type can + * lead to a situation that couldn't otherwise be produced, but + * any such registrations will be dormant in the sense that they + * will not be serviced, i.e. they will not receive any frames. + * + * Frame transmission allows userspace to send for example the required + * responses to action frames. It is subject to some sanity checking, + * but many frames can be transmitted. When a frame was transmitted, its + * status is indicated to the sending socket. + * + * For more technical details, see the corresponding command descriptions + * below. + */ + /** * enum nl80211_commands - supported nl80211 commands * @@ -301,16 +338,18 @@ * rate selection. %NL80211_ATTR_IFINDEX is used to specify the interface * and @NL80211_ATTR_TX_RATES the set of allowed rates. * - * @NL80211_CMD_REGISTER_ACTION: Register for receiving certain action frames - * (via @NL80211_CMD_ACTION) for processing in userspace. This command - * requires an interface index and a match attribute containing the first - * few bytes of the frame that should match, e.g. a single byte for only - * a category match or four bytes for vendor frames including the OUI. - * The registration cannot be dropped, but is removed automatically - * when the netlink socket is closed. Multiple registrations can be made. - * @NL80211_CMD_ACTION: Action frame TX request and RX notification. This - * command is used both as a request to transmit an Action frame and as an - * event indicating reception of an Action frame that was not processed in + * @NL80211_CMD_REGISTER_FRAME: Register for receiving certain mgmt frames + * (via @NL80211_CMD_FRAME) for processing in userspace. This command + * requires an interface index, a frame type attribute (optional for + * backward compatibility reasons, if not given assumes action frames) + * and a match attribute containing the first few bytes of the frame + * that should match, e.g. a single byte for only a category match or + * four bytes for vendor frames including the OUI. The registration + * cannot be dropped, but is removed automatically when the netlink + * socket is closed. Multiple registrations can be made. + * @NL80211_CMD_FRAME: Management frame TX request and RX notification. This + * command is used both as a request to transmit a management frame and + * as an event indicating reception of a frame that was not processed in * kernel code, but is for us (i.e., which may need to be processed in a * user space application). %NL80211_ATTR_FRAME is used to specify the * frame contents (including header). %NL80211_ATTR_WIPHY_FREQ (and @@ -320,8 +359,8 @@ * operational channel). When called, this operation returns a cookie * (%NL80211_ATTR_COOKIE) that will be included with the TX status event * pertaining to the TX request. - * @NL80211_CMD_ACTION_TX_STATUS: Report TX status of an Action frame - * transmitted with %NL80211_CMD_ACTION. %NL80211_ATTR_COOKIE identifies + * @NL80211_CMD_FRAME_TX_STATUS: Report TX status of a management frame + * transmitted with %NL80211_CMD_FRAME. %NL80211_ATTR_COOKIE identifies * the TX command and %NL80211_ATTR_FRAME includes the contents of the * frame. %NL80211_ATTR_ACK flag is included if the recipient acknowledged * the frame. @@ -429,9 +468,12 @@ enum nl80211_commands { NL80211_CMD_SET_TX_BITRATE_MASK, - NL80211_CMD_REGISTER_ACTION, - NL80211_CMD_ACTION, - NL80211_CMD_ACTION_TX_STATUS, + NL80211_CMD_REGISTER_FRAME, + NL80211_CMD_REGISTER_ACTION = NL80211_CMD_REGISTER_FRAME, + NL80211_CMD_FRAME, + NL80211_CMD_ACTION = NL80211_CMD_FRAME, + NL80211_CMD_FRAME_TX_STATUS, + NL80211_CMD_ACTION_TX_STATUS = NL80211_CMD_FRAME_TX_STATUS, NL80211_CMD_SET_POWER_SAVE, NL80211_CMD_GET_POWER_SAVE, @@ -708,7 +750,16 @@ enum nl80211_commands { * is used with %NL80211_CMD_SET_TX_BITRATE_MASK. * * @NL80211_ATTR_FRAME_MATCH: A binary attribute which typically must contain - * at least one byte, currently used with @NL80211_CMD_REGISTER_ACTION. + * at least one byte, currently used with @NL80211_CMD_REGISTER_FRAME. + * @NL80211_ATTR_FRAME_TYPE: A u16 indicating the frame type/subtype for the + * @NL80211_CMD_REGISTER_FRAME command. + * @NL80211_ATTR_TX_FRAME_TYPES: wiphy capability attribute, which is a + * nested attribute of %NL80211_ATTR_FRAME_TYPE attributes, containing + * information about which frame types can be transmitted with + * %NL80211_CMD_FRAME. + * @NL80211_ATTR_RX_FRAME_TYPES: wiphy capability attribute, which is a + * nested attribute of %NL80211_ATTR_FRAME_TYPE attributes, containing + * information about which frame types can be registered for RX. * * @NL80211_ATTR_ACK: Flag attribute indicating that the frame was * acknowledged by the recipient. @@ -891,6 +942,10 @@ enum nl80211_attrs { NL80211_ATTR_WIPHY_TX_POWER_SETTING, NL80211_ATTR_WIPHY_TX_POWER_LEVEL, + NL80211_ATTR_TX_FRAME_TYPES, + NL80211_ATTR_RX_FRAME_TYPES, + NL80211_ATTR_FRAME_TYPE, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -947,7 +1002,7 @@ enum nl80211_attrs { * @NL80211_IFTYPE_MONITOR: monitor interface receiving all frames * @NL80211_IFTYPE_MESH_POINT: mesh point * @NL80211_IFTYPE_MAX: highest interface type number currently defined - * @__NL80211_IFTYPE_AFTER_LAST: internal use + * @NUM_NL80211_IFTYPES: number of defined interface types * * These values are used with the %NL80211_ATTR_IFTYPE * to set the type of an interface. @@ -964,8 +1019,8 @@ enum nl80211_iftype { NL80211_IFTYPE_MESH_POINT, /* keep last */ - __NL80211_IFTYPE_AFTER_LAST, - NL80211_IFTYPE_MAX = __NL80211_IFTYPE_AFTER_LAST - 1 + NUM_NL80211_IFTYPES, + NL80211_IFTYPE_MAX = NUM_NL80211_IFTYPES - 1 }; /** diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2b403c7ee32e..6a98b1b3bfde 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1020,7 +1020,7 @@ struct cfg80211_pmksa { * @cancel_remain_on_channel: Cancel an on-going remain-on-channel operation. * This allows the operation to be terminated prior to timeout based on * the duration value. - * @action: Transmit an action frame + * @mgmt_tx: Transmit a management frame * * @testmode_cmd: run a test mode command * @@ -1172,7 +1172,7 @@ struct cfg80211_ops { struct net_device *dev, u64 cookie); - int (*action)(struct wiphy *wiphy, struct net_device *dev, + int (*mgmt_tx)(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, bool channel_type_valid, @@ -1236,6 +1236,10 @@ struct mac_address { u8 addr[ETH_ALEN]; }; +struct ieee80211_txrx_stypes { + u16 tx, rx; +}; + /** * struct wiphy - wireless hardware description * @reg_notifier: the driver's regulatory notification callback @@ -1286,6 +1290,10 @@ struct mac_address { * @privid: a pointer that drivers can use to identify if an arbitrary * wiphy is theirs, e.g. in global notifiers * @bands: information about bands/channels supported by this device + * + * @mgmt_stypes: bitmasks of frame subtypes that can be subscribed to or + * transmitted through nl80211, points to an array indexed by interface + * type */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -1294,9 +1302,12 @@ struct wiphy { u8 perm_addr[ETH_ALEN]; u8 addr_mask[ETH_ALEN]; - u16 n_addresses; struct mac_address *addresses; + const struct ieee80211_txrx_stypes *mgmt_stypes; + + u16 n_addresses; + /* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */ u16 interface_modes; @@ -1492,8 +1503,8 @@ struct cfg80211_cached_keys; * set by driver (if supported) on add_interface BEFORE registering the * netdev and may otherwise be used by driver read-only, will be update * by cfg80211 on change_interface - * @action_registrations: list of registrations for action frames - * @action_registrations_lock: lock for the list + * @mgmt_registrations: list of registrations for management frames + * @mgmt_registrations_lock: lock for the list * @mtx: mutex used to lock data in this struct * @cleanup_work: work struct used for cleanup that can't be done directly */ @@ -1505,8 +1516,8 @@ struct wireless_dev { struct list_head list; struct net_device *netdev; - struct list_head action_registrations; - spinlock_t action_registrations_lock; + struct list_head mgmt_registrations; + spinlock_t mgmt_registrations_lock; struct mutex mtx; @@ -2373,38 +2384,39 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp); /** - * cfg80211_rx_action - notification of received, unprocessed Action frame + * cfg80211_rx_mgmt - notification of received, unprocessed management frame * @dev: network device * @freq: Frequency on which the frame was received in MHz - * @buf: Action frame (header + body) + * @buf: Management frame (header + body) * @len: length of the frame data * @gfp: context flags - * Returns %true if a user space application is responsible for rejecting the - * unrecognized Action frame; %false if no such application is registered - * (i.e., the driver is responsible for rejecting the unrecognized Action - * frame) + * + * Returns %true if a user space application has registered for this frame. + * For action frames, that makes it responsible for rejecting unrecognized + * action frames; %false otherwise, in which case for action frames the + * driver is responsible for rejecting the frame. * * This function is called whenever an Action frame is received for a station * mode interface, but is not processed in kernel. */ -bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf, - size_t len, gfp_t gfp); +bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf, + size_t len, gfp_t gfp); /** - * cfg80211_action_tx_status - notification of TX status for Action frame + * cfg80211_mgmt_tx_status - notification of TX status for management frame * @dev: network device - * @cookie: Cookie returned by cfg80211_ops::action() - * @buf: Action frame (header + body) + * @cookie: Cookie returned by cfg80211_ops::mgmt_tx() + * @buf: Management frame (header + body) * @len: length of the frame data * @ack: Whether frame was acknowledged * @gfp: context flags * - * This function is called whenever an Action frame was requested to be - * transmitted with cfg80211_ops::action() to report the TX status of the + * This function is called whenever a management frame was requested to be + * transmitted with cfg80211_ops::mgmt_tx() to report the TX status of the * transmission attempt. */ -void cfg80211_action_tx_status(struct net_device *dev, u64 cookie, - const u8 *buf, size_t len, bool ack, gfp_t gfp); +void cfg80211_mgmt_tx_status(struct net_device *dev, u64 cookie, + const u8 *buf, size_t len, bool ack, gfp_t gfp); /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f9a317766136..94787d21282c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1521,11 +1521,11 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, return ieee80211_wk_cancel_remain_on_channel(sdata, cookie); } -static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, - const u8 *buf, size_t len, u64 *cookie) +static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + bool channel_type_valid, + const u8 *buf, size_t len, u64 *cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -1625,6 +1625,6 @@ struct cfg80211_ops mac80211_config_ops = { .set_bitrate_mask = ieee80211_set_bitrate_mask, .remain_on_channel = ieee80211_remain_on_channel, .cancel_remain_on_channel = ieee80211_cancel_remain_on_channel, - .action = ieee80211_action, + .mgmt_tx = ieee80211_mgmt_tx, .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config, }; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 1bf05bfd149d..e73ae51dc036 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -170,6 +170,7 @@ typedef unsigned __bitwise__ ieee80211_rx_result; #define IEEE80211_RX_RA_MATCH BIT(1) #define IEEE80211_RX_AMSDU BIT(2) #define IEEE80211_RX_FRAGMENTED BIT(3) +#define IEEE80211_MALFORMED_ACTION_FRM BIT(4) /* only add flags here that do not change with subframes of an aMPDU */ struct ieee80211_rx_data { diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 9459aeee0ddc..86f434f234ae 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -177,7 +177,7 @@ static int ieee80211_open(struct net_device *dev) /* no special treatment */ break; case NL80211_IFTYPE_UNSPECIFIED: - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: /* cannot happen */ WARN_ON(1); break; @@ -634,7 +634,7 @@ static void ieee80211_teardown_sdata(struct net_device *dev) case NL80211_IFTYPE_MONITOR: break; case NL80211_IFTYPE_UNSPECIFIED: - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: BUG(); break; } @@ -886,7 +886,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_AP_VLAN: break; case NL80211_IFTYPE_UNSPECIFIED: - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: BUG(); break; } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 0afccda42a24..a53feac4618c 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -417,6 +417,41 @@ void ieee80211_napi_complete(struct ieee80211_hw *hw) } EXPORT_SYMBOL(ieee80211_napi_complete); +/* There isn't a lot of sense in it, but you can transmit anything you like */ +static const struct ieee80211_txrx_stypes +ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = { + [NL80211_IFTYPE_ADHOC] = { + .tx = 0xffff, + .rx = BIT(IEEE80211_STYPE_ACTION >> 4), + }, + [NL80211_IFTYPE_STATION] = { + .tx = 0xffff, + .rx = BIT(IEEE80211_STYPE_ACTION >> 4) | + BIT(IEEE80211_STYPE_PROBE_REQ >> 4), + }, + [NL80211_IFTYPE_AP] = { + .tx = 0xffff, + .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) | + BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) | + BIT(IEEE80211_STYPE_PROBE_REQ >> 4) | + BIT(IEEE80211_STYPE_DISASSOC >> 4) | + BIT(IEEE80211_STYPE_AUTH >> 4) | + BIT(IEEE80211_STYPE_DEAUTH >> 4) | + BIT(IEEE80211_STYPE_ACTION >> 4), + }, + [NL80211_IFTYPE_AP_VLAN] = { + /* copy AP */ + .tx = 0xffff, + .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) | + BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) | + BIT(IEEE80211_STYPE_PROBE_REQ >> 4) | + BIT(IEEE80211_STYPE_DISASSOC >> 4) | + BIT(IEEE80211_STYPE_AUTH >> 4) | + BIT(IEEE80211_STYPE_DEAUTH >> 4) | + BIT(IEEE80211_STYPE_ACTION >> 4), + }, +}; + struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, const struct ieee80211_ops *ops) { @@ -446,6 +481,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, if (!wiphy) return NULL; + wiphy->mgmt_stypes = ieee80211_default_mgmt_stypes; + wiphy->flags |= WIPHY_FLAG_NETNS_OK | WIPHY_FLAG_4ADDR_AP | WIPHY_FLAG_4ADDR_STATION; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 4fdbed58ca2f..aa41e382bbb3 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1939,14 +1939,37 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, ieee80211_tx_skb(sdata, skb); } +static ieee80211_rx_result debug_noinline +ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) +{ + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; + + /* + * From here on, look only at management frames. + * Data and control frames are already handled, + * and unknown (reserved) frames are useless. + */ + if (rx->skb->len < 24) + return RX_DROP_MONITOR; + + if (!ieee80211_is_mgmt(mgmt->frame_control)) + return RX_DROP_MONITOR; + + if (!(rx->flags & IEEE80211_RX_RA_MATCH)) + return RX_DROP_MONITOR; + + if (ieee80211_drop_unencrypted_mgmt(rx)) + return RX_DROP_UNUSABLE; + + return RX_CONTINUE; +} + static ieee80211_rx_result debug_noinline ieee80211_rx_h_action(struct ieee80211_rx_data *rx) { struct ieee80211_local *local = rx->local; struct ieee80211_sub_if_data *sdata = rx->sdata; struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; - struct sk_buff *nskb; - struct ieee80211_rx_status *status; int len = rx->skb->len; if (!ieee80211_is_action(mgmt->frame_control)) @@ -1962,9 +1985,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) if (!(rx->flags & IEEE80211_RX_RA_MATCH)) return RX_DROP_UNUSABLE; - if (ieee80211_drop_unencrypted_mgmt(rx)) - return RX_DROP_UNUSABLE; - switch (mgmt->u.action.category) { case WLAN_CATEGORY_BACK: /* @@ -2055,17 +2075,36 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) goto queue; } + return RX_CONTINUE; + invalid: - /* - * For AP mode, hostapd is responsible for handling any action - * frames that we didn't handle, including returning unknown - * ones. For all other modes we will return them to the sender, - * setting the 0x80 bit in the action category, as required by - * 802.11-2007 7.3.1.11. - */ - if (sdata->vif.type == NL80211_IFTYPE_AP || - sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - return RX_DROP_MONITOR; + rx->flags |= IEEE80211_MALFORMED_ACTION_FRM; + /* will return in the next handlers */ + return RX_CONTINUE; + + handled: + if (rx->sta) + rx->sta->rx_packets++; + dev_kfree_skb(rx->skb); + return RX_QUEUED; + + queue: + rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; + skb_queue_tail(&sdata->skb_queue, rx->skb); + ieee80211_queue_work(&local->hw, &sdata->work); + if (rx->sta) + rx->sta->rx_packets++; + return RX_QUEUED; +} + +static ieee80211_rx_result debug_noinline +ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx) +{ + struct ieee80211_rx_status *status; + + /* skip known-bad action frames and return them in the next handler */ + if (rx->flags & IEEE80211_MALFORMED_ACTION_FRM) + return RX_CONTINUE; /* * Getting here means the kernel doesn't know how to handle @@ -2075,10 +2114,44 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) */ status = IEEE80211_SKB_RXCB(rx->skb); - if (cfg80211_rx_action(rx->sdata->dev, status->freq, - rx->skb->data, rx->skb->len, - GFP_ATOMIC)) - goto handled; + if (cfg80211_rx_mgmt(rx->sdata->dev, status->freq, + rx->skb->data, rx->skb->len, + GFP_ATOMIC)) { + if (rx->sta) + rx->sta->rx_packets++; + dev_kfree_skb(rx->skb); + return RX_QUEUED; + } + + + return RX_CONTINUE; +} + +static ieee80211_rx_result debug_noinline +ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx) +{ + struct ieee80211_local *local = rx->local; + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; + struct sk_buff *nskb; + struct ieee80211_sub_if_data *sdata = rx->sdata; + + if (!ieee80211_is_action(mgmt->frame_control)) + return RX_CONTINUE; + + /* + * For AP mode, hostapd is responsible for handling any action + * frames that we didn't handle, including returning unknown + * ones. For all other modes we will return them to the sender, + * setting the 0x80 bit in the action category, as required by + * 802.11-2007 7.3.1.11. + * Newer versions of hostapd shall also use the management frame + * registration mechanisms, but older ones still use cooked + * monitor interfaces so push all frames there. + */ + if (!(rx->flags & IEEE80211_MALFORMED_ACTION_FRM) && + (sdata->vif.type == NL80211_IFTYPE_AP || + sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) + return RX_DROP_MONITOR; /* do not return rejected action frames */ if (mgmt->u.action.category & 0x80) @@ -2097,20 +2170,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) ieee80211_tx_skb(rx->sdata, nskb); } - - handled: - if (rx->sta) - rx->sta->rx_packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; - - queue: - rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; - skb_queue_tail(&sdata->skb_queue, rx->skb); - ieee80211_queue_work(&local->hw, &sdata->work); - if (rx->sta) - rx->sta->rx_packets++; - return RX_QUEUED; } static ieee80211_rx_result debug_noinline @@ -2121,15 +2182,6 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) struct ieee80211_mgmt *mgmt = (void *)rx->skb->data; __le16 stype; - if (!(rx->flags & IEEE80211_RX_RA_MATCH)) - return RX_DROP_MONITOR; - - if (rx->skb->len < 24) - return RX_DROP_MONITOR; - - if (ieee80211_drop_unencrypted_mgmt(rx)) - return RX_DROP_UNUSABLE; - rxs = ieee80211_work_rx_mgmt(rx->sdata, rx->skb); if (rxs != RX_CONTINUE) return rxs; @@ -2374,7 +2426,10 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx, if (res != RX_CONTINUE) goto rxh_next; + CALL_RXH(ieee80211_rx_h_mgmt_check) CALL_RXH(ieee80211_rx_h_action) + CALL_RXH(ieee80211_rx_h_userspace_mgmt) + CALL_RXH(ieee80211_rx_h_action_return) CALL_RXH(ieee80211_rx_h_mgmt) rxh_next: @@ -2527,7 +2582,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, break; case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_UNSPECIFIED: - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: /* should never get here */ WARN_ON(1); break; diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 10caec5ea8fa..67a35841bef0 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -296,7 +296,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) } if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) - cfg80211_action_tx_status( + cfg80211_mgmt_tx_status( skb->dev, (unsigned long) skb, skb->data, skb->len, !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 748387d45bc0..cd2b485fed4f 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -471,7 +471,7 @@ void ieee80211_iterate_active_interfaces( list_for_each_entry(sdata, &local->interfaces, list) { switch (sdata->vif.type) { - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_AP_VLAN: @@ -505,7 +505,7 @@ void ieee80211_iterate_active_interfaces_atomic( list_for_each_entry_rcu(sdata, &local->interfaces, list) { switch (sdata->vif.type) { - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_AP_VLAN: @@ -1189,7 +1189,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) /* ignore virtual */ break; case NL80211_IFTYPE_UNSPECIFIED: - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: WARN_ON(1); break; } diff --git a/net/wireless/core.c b/net/wireless/core.c index c70909c3eae4..d52630bbab04 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -433,7 +433,7 @@ int wiphy_register(struct wiphy *wiphy) /* sanity check ifmodes */ WARN_ON(!ifmodes); - ifmodes &= ((1 << __NL80211_IFTYPE_AFTER_LAST) - 1) & ~1; + ifmodes &= ((1 << NUM_NL80211_IFTYPES) - 1) & ~1; if (WARN_ON(ifmodes != wiphy->interface_modes)) wiphy->interface_modes = ifmodes; @@ -685,8 +685,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work); INIT_LIST_HEAD(&wdev->event_list); spin_lock_init(&wdev->event_lock); - INIT_LIST_HEAD(&wdev->action_registrations); - spin_lock_init(&wdev->action_registrations_lock); + INIT_LIST_HEAD(&wdev->mgmt_registrations); + spin_lock_init(&wdev->mgmt_registrations_lock); mutex_lock(&rdev->devlist_mtx); list_add_rcu(&wdev->list, &rdev->netdev_list); @@ -806,7 +806,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, sysfs_remove_link(&dev->dev.kobj, "phy80211"); list_del_rcu(&wdev->list); rdev->devlist_generation++; - cfg80211_mlme_purge_actions(wdev); + cfg80211_mlme_purge_registrations(wdev); #ifdef CONFIG_CFG80211_WEXT kfree(wdev->wext.keys); #endif diff --git a/net/wireless/core.h b/net/wireless/core.h index 63d57ae399c3..58ab2c791d28 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -331,16 +331,17 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, const u8 *resp_ie, size_t resp_ie_len, u16 status, bool wextev, struct cfg80211_bss *bss); -int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid, - const u8 *match_data, int match_len); -void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid); -void cfg80211_mlme_purge_actions(struct wireless_dev *wdev); -int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, - const u8 *buf, size_t len, u64 *cookie); +int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, + u16 frame_type, const u8 *match_data, + int match_len); +void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid); +void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev); +int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + bool channel_type_valid, + const u8 *buf, size_t len, u64 *cookie); /* SME */ int __cfg80211_connect(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index ee0af32ed59e..8515b1e5c578 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -748,31 +748,51 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, } EXPORT_SYMBOL(cfg80211_new_sta); -struct cfg80211_action_registration { +struct cfg80211_mgmt_registration { struct list_head list; u32 nlpid; int match_len; + __le16 frame_type; + u8 match[]; }; -int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid, - const u8 *match_data, int match_len) +int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, + u16 frame_type, const u8 *match_data, + int match_len) { - struct cfg80211_action_registration *reg, *nreg; + struct cfg80211_mgmt_registration *reg, *nreg; int err = 0; + u16 mgmt_type; + + if (!wdev->wiphy->mgmt_stypes) + return -EOPNOTSUPP; + + if ((frame_type & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT) + return -EINVAL; + + if (frame_type & ~(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) + return -EINVAL; + + mgmt_type = (frame_type & IEEE80211_FCTL_STYPE) >> 4; + if (!(wdev->wiphy->mgmt_stypes[wdev->iftype].rx & BIT(mgmt_type))) + return -EINVAL; nreg = kzalloc(sizeof(*reg) + match_len, GFP_KERNEL); if (!nreg) return -ENOMEM; - spin_lock_bh(&wdev->action_registrations_lock); + spin_lock_bh(&wdev->mgmt_registrations_lock); - list_for_each_entry(reg, &wdev->action_registrations, list) { + list_for_each_entry(reg, &wdev->mgmt_registrations, list) { int mlen = min(match_len, reg->match_len); + if (frame_type != le16_to_cpu(reg->frame_type)) + continue; + if (memcmp(reg->match, match_data, mlen) == 0) { err = -EALREADY; break; @@ -787,62 +807,75 @@ int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid, memcpy(nreg->match, match_data, match_len); nreg->match_len = match_len; nreg->nlpid = snd_pid; - list_add(&nreg->list, &wdev->action_registrations); + nreg->frame_type = cpu_to_le16(frame_type); + list_add(&nreg->list, &wdev->mgmt_registrations); out: - spin_unlock_bh(&wdev->action_registrations_lock); + spin_unlock_bh(&wdev->mgmt_registrations_lock); return err; } -void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid) +void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid) { - struct cfg80211_action_registration *reg, *tmp; + struct cfg80211_mgmt_registration *reg, *tmp; - spin_lock_bh(&wdev->action_registrations_lock); + spin_lock_bh(&wdev->mgmt_registrations_lock); - list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) { + list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) { if (reg->nlpid == nlpid) { list_del(®->list); kfree(reg); } } - spin_unlock_bh(&wdev->action_registrations_lock); + spin_unlock_bh(&wdev->mgmt_registrations_lock); } -void cfg80211_mlme_purge_actions(struct wireless_dev *wdev) +void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) { - struct cfg80211_action_registration *reg, *tmp; + struct cfg80211_mgmt_registration *reg, *tmp; - spin_lock_bh(&wdev->action_registrations_lock); + spin_lock_bh(&wdev->mgmt_registrations_lock); - list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) { + list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) { list_del(®->list); kfree(reg); } - spin_unlock_bh(&wdev->action_registrations_lock); + spin_unlock_bh(&wdev->mgmt_registrations_lock); } -int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, - const u8 *buf, size_t len, u64 *cookie) +int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + bool channel_type_valid, + const u8 *buf, size_t len, u64 *cookie) { struct wireless_dev *wdev = dev->ieee80211_ptr; const struct ieee80211_mgmt *mgmt; + u16 stype; + + if (!wdev->wiphy->mgmt_stypes) + return -EOPNOTSUPP; - if (rdev->ops->action == NULL) + if (!rdev->ops->mgmt_tx) return -EOPNOTSUPP; + if (len < 24 + 1) return -EINVAL; mgmt = (const struct ieee80211_mgmt *) buf; - if (!ieee80211_is_action(mgmt->frame_control)) + + if (!ieee80211_is_mgmt(mgmt->frame_control)) return -EINVAL; - if (mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) { + + stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE; + if (!(wdev->wiphy->mgmt_stypes[wdev->iftype].tx & BIT(stype >> 4))) + return -EINVAL; + + if (ieee80211_is_action(mgmt->frame_control) && + mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) { /* Verify that we are associated with the destination AP */ wdev_lock(wdev); @@ -863,64 +896,75 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, return -EINVAL; /* Transmit the Action frame as requested by user space */ - return rdev->ops->action(&rdev->wiphy, dev, chan, channel_type, - channel_type_valid, buf, len, cookie); + return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, channel_type, + channel_type_valid, buf, len, cookie); } -bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf, - size_t len, gfp_t gfp) +bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf, + size_t len, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - struct cfg80211_action_registration *reg; - const u8 *action_data; - int action_data_len; + struct cfg80211_mgmt_registration *reg; + const struct ieee80211_txrx_stypes *stypes = + &wiphy->mgmt_stypes[wdev->iftype]; + struct ieee80211_mgmt *mgmt = (void *)buf; + const u8 *data; + int data_len; bool result = false; + __le16 ftype = mgmt->frame_control & + cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE); + u16 stype; - /* frame length - min size excluding category */ - action_data_len = len - (IEEE80211_MIN_ACTION_SIZE - 1); + stype = (le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE) >> 4; - /* action data starts with category */ - action_data = buf + IEEE80211_MIN_ACTION_SIZE - 1; + if (!(stypes->rx & BIT(stype))) + return false; - spin_lock_bh(&wdev->action_registrations_lock); + data = buf + ieee80211_hdrlen(mgmt->frame_control); + data_len = len - ieee80211_hdrlen(mgmt->frame_control); + + spin_lock_bh(&wdev->mgmt_registrations_lock); + + list_for_each_entry(reg, &wdev->mgmt_registrations, list) { + if (reg->frame_type != ftype) + continue; - list_for_each_entry(reg, &wdev->action_registrations, list) { - if (reg->match_len > action_data_len) + if (reg->match_len > data_len) continue; - if (memcmp(reg->match, action_data, reg->match_len)) + if (memcmp(reg->match, data, reg->match_len)) continue; /* found match! */ /* Indicate the received Action frame to user space */ - if (nl80211_send_action(rdev, dev, reg->nlpid, freq, - buf, len, gfp)) + if (nl80211_send_mgmt(rdev, dev, reg->nlpid, freq, + buf, len, gfp)) continue; result = true; break; } - spin_unlock_bh(&wdev->action_registrations_lock); + spin_unlock_bh(&wdev->mgmt_registrations_lock); return result; } -EXPORT_SYMBOL(cfg80211_rx_action); +EXPORT_SYMBOL(cfg80211_rx_mgmt); -void cfg80211_action_tx_status(struct net_device *dev, u64 cookie, - const u8 *buf, size_t len, bool ack, gfp_t gfp) +void cfg80211_mgmt_tx_status(struct net_device *dev, u64 cookie, + const u8 *buf, size_t len, bool ack, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); /* Indicate TX status of the Action frame to user space */ - nl80211_send_action_tx_status(rdev, dev, cookie, buf, len, ack, gfp); + nl80211_send_mgmt_tx_status(rdev, dev, cookie, buf, len, ack, gfp); } -EXPORT_SYMBOL(cfg80211_action_tx_status); +EXPORT_SYMBOL(cfg80211_mgmt_tx_status); void cfg80211_cqm_rssi_notify(struct net_device *dev, enum nl80211_cqm_rssi_threshold_event rssi_event, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index bb5b78eebeb2..927ffbd2aebc 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -156,6 +156,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 }, + [NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 }, }; /* policy for the attributes */ @@ -437,6 +438,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, struct ieee80211_rate *rate; int i; u16 ifmodes = dev->wiphy.interface_modes; + const struct ieee80211_txrx_stypes *mgmt_stypes = + dev->wiphy.mgmt_stypes; hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY); if (!hdr) @@ -587,7 +590,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(flush_pmksa, FLUSH_PMKSA); CMD(remain_on_channel, REMAIN_ON_CHANNEL); CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); - CMD(action, ACTION); + CMD(mgmt_tx, FRAME); if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { i++; NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS); @@ -608,6 +611,53 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, nla_nest_end(msg, nl_cmds); + if (mgmt_stypes) { + u16 stypes; + struct nlattr *nl_ftypes, *nl_ifs; + enum nl80211_iftype ift; + + nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES); + if (!nl_ifs) + goto nla_put_failure; + + for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { + nl_ftypes = nla_nest_start(msg, ift); + if (!nl_ftypes) + goto nla_put_failure; + i = 0; + stypes = mgmt_stypes[ift].tx; + while (stypes) { + if (stypes & 1) + NLA_PUT_U16(msg, NL80211_ATTR_FRAME_TYPE, + (i << 4) | IEEE80211_FTYPE_MGMT); + stypes >>= 1; + i++; + } + nla_nest_end(msg, nl_ftypes); + } + + nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES); + if (!nl_ifs) + goto nla_put_failure; + + for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { + nl_ftypes = nla_nest_start(msg, ift); + if (!nl_ftypes) + goto nla_put_failure; + i = 0; + stypes = mgmt_stypes[ift].rx; + while (stypes) { + if (stypes & 1) + NLA_PUT_U16(msg, NL80211_ATTR_FRAME_TYPE, + (i << 4) | IEEE80211_FTYPE_MGMT); + stypes >>= 1; + i++; + } + nla_nest_end(msg, nl_ftypes); + } + nla_nest_end(msg, nl_ifs); + } + return genlmsg_end(msg, hdr); nla_put_failure: @@ -4732,17 +4782,18 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, return err; } -static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info) +static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev; struct net_device *dev; + u16 frame_type = IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION; int err; if (!info->attrs[NL80211_ATTR_FRAME_MATCH]) return -EINVAL; - if (nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]) < 1) - return -EINVAL; + if (info->attrs[NL80211_ATTR_FRAME_TYPE]) + frame_type = nla_get_u16(info->attrs[NL80211_ATTR_FRAME_TYPE]); rtnl_lock(); @@ -4757,12 +4808,13 @@ static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info) } /* not much point in registering if we can't reply */ - if (!rdev->ops->action) { + if (!rdev->ops->mgmt_tx) { err = -EOPNOTSUPP; goto out; } - err = cfg80211_mlme_register_action(dev->ieee80211_ptr, info->snd_pid, + err = cfg80211_mlme_register_mgmt(dev->ieee80211_ptr, info->snd_pid, + frame_type, nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]), nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH])); out: @@ -4773,7 +4825,7 @@ static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info) return err; } -static int nl80211_action(struct sk_buff *skb, struct genl_info *info) +static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev; struct net_device *dev; @@ -4796,7 +4848,7 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info) if (err) goto unlock_rtnl; - if (!rdev->ops->action) { + if (!rdev->ops->mgmt_tx) { err = -EOPNOTSUPP; goto out; } @@ -4839,17 +4891,17 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info) } hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, - NL80211_CMD_ACTION); + NL80211_CMD_FRAME); if (IS_ERR(hdr)) { err = PTR_ERR(hdr); goto free_msg; } - err = cfg80211_mlme_action(rdev, dev, chan, channel_type, - channel_type_valid, - nla_data(info->attrs[NL80211_ATTR_FRAME]), - nla_len(info->attrs[NL80211_ATTR_FRAME]), - &cookie); + err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, channel_type, + channel_type_valid, + nla_data(info->attrs[NL80211_ATTR_FRAME]), + nla_len(info->attrs[NL80211_ATTR_FRAME]), + &cookie); if (err) goto free_msg; @@ -5348,14 +5400,14 @@ static struct genl_ops nl80211_ops[] = { .flags = GENL_ADMIN_PERM, }, { - .cmd = NL80211_CMD_REGISTER_ACTION, - .doit = nl80211_register_action, + .cmd = NL80211_CMD_REGISTER_FRAME, + .doit = nl80211_register_mgmt, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, { - .cmd = NL80211_CMD_ACTION, - .doit = nl80211_action, + .cmd = NL80211_CMD_FRAME, + .doit = nl80211_tx_mgmt, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, @@ -6055,9 +6107,9 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, nl80211_mlme_mcgrp.id, gfp); } -int nl80211_send_action(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u32 nlpid, - int freq, const u8 *buf, size_t len, gfp_t gfp) +int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, + struct net_device *netdev, u32 nlpid, + int freq, const u8 *buf, size_t len, gfp_t gfp) { struct sk_buff *msg; void *hdr; @@ -6067,7 +6119,7 @@ int nl80211_send_action(struct cfg80211_registered_device *rdev, if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION); + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME); if (!hdr) { nlmsg_free(msg); return -ENOMEM; @@ -6095,10 +6147,10 @@ int nl80211_send_action(struct cfg80211_registered_device *rdev, return -ENOBUFS; } -void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u64 cookie, - const u8 *buf, size_t len, bool ack, - gfp_t gfp) +void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev, + struct net_device *netdev, u64 cookie, + const u8 *buf, size_t len, bool ack, + gfp_t gfp) { struct sk_buff *msg; void *hdr; @@ -6107,7 +6159,7 @@ void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev, if (!msg) return; - hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION_TX_STATUS); + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME_TX_STATUS); if (!hdr) { nlmsg_free(msg); return; @@ -6194,7 +6246,7 @@ static int nl80211_netlink_notify(struct notifier_block * nb, list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) list_for_each_entry_rcu(wdev, &rdev->netdev_list, list) - cfg80211_mlme_unregister_actions(wdev, notify->pid); + cfg80211_mlme_unregister_socket(wdev, notify->pid); rcu_read_unlock(); diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 2ad7fbc7d9f1..30d2f939150d 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -74,13 +74,13 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp); -int nl80211_send_action(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u32 nlpid, int freq, - const u8 *buf, size_t len, gfp_t gfp); -void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u64 cookie, - const u8 *buf, size_t len, bool ack, - gfp_t gfp); +int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, + struct net_device *netdev, u32 nlpid, int freq, + const u8 *buf, size_t len, gfp_t gfp); +void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev, + struct net_device *netdev, u64 cookie, + const u8 *buf, size_t len, bool ack, + gfp_t gfp); void nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/util.c b/net/wireless/util.c index 1eb24162be61..8d961cc4ae98 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -823,7 +823,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, /* monitor can't bridge anyway */ break; case NL80211_IFTYPE_UNSPECIFIED: - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: /* not happening */ break; } -- cgit v1.2.3 From d2730b2a6a019d14455556019d744ab051e6554b Mon Sep 17 00:00:00 2001 From: Gábor Stefanik Date: Mon, 16 Aug 2010 22:39:16 +0200 Subject: b43: N-PHY: Implement MAC PHY clock set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Gábor Stefanik Signed-off-by: John W. Linville --- drivers/net/wireless/b43/phy_n.c | 13 ++++++++++++- include/linux/ssb/ssb_regs.h | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/wireless/b43/phy_n.c b/drivers/net/wireless/b43/phy_n.c index f1b57070ada9..d212726d509b 100644 --- a/drivers/net/wireless/b43/phy_n.c +++ b/drivers/net/wireless/b43/phy_n.c @@ -3074,6 +3074,17 @@ static int b43_nphy_cal_rx_iq(struct b43_wldev *dev, return b43_nphy_rev2_cal_rx_iq(dev, target, type, debug); } +/* http://bcm-v4.sipsolutions.net/802.11/PHY/N/MacPhyClkSet */ +static void b43_nphy_mac_phy_clock_set(struct b43_wldev *dev, bool on) +{ + u32 tmslow = ssb_read32(dev->dev, SSB_TMSLOW); + if (on) + tmslow |= SSB_TMSLOW_PHYCLK; + else + tmslow &= ~SSB_TMSLOW_PHYCLK; + ssb_write32(dev->dev, SSB_TMSLOW, tmslow); +} + /* * Init N-PHY * http://bcm-v4.sipsolutions.net/802.11/PHY/Init/N @@ -3174,7 +3185,7 @@ int b43_phy_initn(struct b43_wldev *dev) b43_phy_write(dev, B43_NPHY_BBCFG, tmp & ~B43_NPHY_BBCFG_RSTCCA); b43_nphy_bmac_clock_fgc(dev, 0); - /* TODO N PHY MAC PHY Clock Set with argument 1 */ + b43_nphy_mac_phy_clock_set(dev, true); b43_nphy_pa_override(dev, false); b43_nphy_force_rf_sequence(dev, B43_RFSEQ_RX2TX); diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h index a6d5225b9275..11daf9c140e7 100644 --- a/include/linux/ssb/ssb_regs.h +++ b/include/linux/ssb/ssb_regs.h @@ -97,6 +97,7 @@ #define SSB_TMSLOW_RESET 0x00000001 /* Reset */ #define SSB_TMSLOW_REJECT_22 0x00000002 /* Reject (Backplane rev 2.2) */ #define SSB_TMSLOW_REJECT_23 0x00000004 /* Reject (Backplane rev 2.3) */ +#define SSB_TMSLOW_PHYCLK 0x00000010 /* MAC PHY Clock Control Enable */ #define SSB_TMSLOW_CLOCK 0x00010000 /* Clock Enable */ #define SSB_TMSLOW_FGC 0x00020000 /* Force Gated Clocks On */ #define SSB_TMSLOW_PE 0x40000000 /* Power Management Enable */ -- cgit v1.2.3 From 633dd1ea683d907af944bcd9814092efe9869b05 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Aug 2010 15:01:23 +0200 Subject: mac80211: fix docbook Fix a small problem in the documentation for ieee80211_request_smps, and a now erroneous inclusion of enum ieee80211_key_alg, which no longer exists after the change to ciphers. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- Documentation/DocBook/80211.tmpl | 1 - include/net/mac80211.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl index 6f88e184d2e7..52310ac892ea 100644 --- a/Documentation/DocBook/80211.tmpl +++ b/Documentation/DocBook/80211.tmpl @@ -248,7 +248,6 @@ MISSING !Finclude/net/mac80211.h set_key_cmd !Finclude/net/mac80211.h ieee80211_key_conf -!Finclude/net/mac80211.h ieee80211_key_alg !Finclude/net/mac80211.h ieee80211_key_flags diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 914c747ac3a0..2a1811366076 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2537,7 +2537,7 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); /** * ieee80211_request_smps - request SM PS transition * @vif: &struct ieee80211_vif pointer from the add_interface callback. - * @mode: new SM PS mode + * @smps_mode: new SM PS mode * * This allows the driver to request an SM PS transition in managed * mode. This is useful when the driver has more information than -- cgit v1.2.3 From 2a5fb7b088f8418958775774dda9427d6c73c522 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Aug 2010 17:44:36 +0200 Subject: nl80211: some documentation fixes The nl80211 documentation is currently never generated, so problems have accumulated. Fix most of the trivial ones. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 57 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 8af1e66c3cf9..ec1690da7845 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -347,6 +347,8 @@ * four bytes for vendor frames including the OUI. The registration * cannot be dropped, but is removed automatically when the netlink * socket is closed. Multiple registrations can be made. + * @NL80211_CMD_REGISTER_ACTION: Alias for @NL80211_CMD_REGISTER_FRAME for + * backward compatibility * @NL80211_CMD_FRAME: Management frame TX request and RX notification. This * command is used both as a request to transmit a management frame and * as an event indicating reception of a frame that was not processed in @@ -359,11 +361,14 @@ * operational channel). When called, this operation returns a cookie * (%NL80211_ATTR_COOKIE) that will be included with the TX status event * pertaining to the TX request. + * @NL80211_CMD_ACTION: Alias for @NL80211_CMD_FRAME for backward compatibility. * @NL80211_CMD_FRAME_TX_STATUS: Report TX status of a management frame * transmitted with %NL80211_CMD_FRAME. %NL80211_ATTR_COOKIE identifies * the TX command and %NL80211_ATTR_FRAME includes the contents of the * frame. %NL80211_ATTR_ACK flag is included if the recipient acknowledged * the frame. + * @NL80211_CMD_ACTION_TX_STATUS: Alias for @NL80211_CMD_FRAME_TX_STATUS for + * backward compatibility. * @NL80211_CMD_SET_CQM: Connection quality monitor configuration. This command * is used to configure connection quality monitoring notification trigger * levels. @@ -1029,11 +1034,14 @@ enum nl80211_iftype { * Station flags. When a station is added to an AP interface, it is * assumed to be already associated (and hence authenticated.) * + * @__NL80211_STA_FLAG_INVALID: attribute number 0 is reserved * @NL80211_STA_FLAG_AUTHORIZED: station is authorized (802.1X) * @NL80211_STA_FLAG_SHORT_PREAMBLE: station is capable of receiving frames * with short barker preamble * @NL80211_STA_FLAG_WME: station is WME/QoS capable * @NL80211_STA_FLAG_MFP: station uses management frame protection + * @NL80211_STA_FLAG_MAX: highest station flag number currently defined + * @__NL80211_STA_FLAG_AFTER_LAST: internal use */ enum nl80211_sta_flags { __NL80211_STA_FLAG_INVALID, @@ -1146,14 +1154,17 @@ enum nl80211_mpath_flags { * information about a mesh path. * * @__NL80211_MPATH_INFO_INVALID: attribute number 0 is reserved - * @NL80211_ATTR_MPATH_FRAME_QLEN: number of queued frames for this destination - * @NL80211_ATTR_MPATH_SN: destination sequence number - * @NL80211_ATTR_MPATH_METRIC: metric (cost) of this mesh path - * @NL80211_ATTR_MPATH_EXPTIME: expiration time for the path, in msec from now - * @NL80211_ATTR_MPATH_FLAGS: mesh path flags, enumerated in + * @NL80211_MPATH_INFO_FRAME_QLEN: number of queued frames for this destination + * @NL80211_MPATH_INFO_SN: destination sequence number + * @NL80211_MPATH_INFO_METRIC: metric (cost) of this mesh path + * @NL80211_MPATH_INFO_EXPTIME: expiration time for the path, in msec from now + * @NL80211_MPATH_INFO_FLAGS: mesh path flags, enumerated in * &enum nl80211_mpath_flags; - * @NL80211_ATTR_MPATH_DISCOVERY_TIMEOUT: total path discovery timeout, in msec - * @NL80211_ATTR_MPATH_DISCOVERY_RETRIES: mesh path discovery retries + * @NL80211_MPATH_INFO_DISCOVERY_TIMEOUT: total path discovery timeout, in msec + * @NL80211_MPATH_INFO_DISCOVERY_RETRIES: mesh path discovery retries + * @NL80211_MPATH_INFO_MAX: highest mesh path information attribute number + * currently defind + * @__NL80211_MPATH_INFO_AFTER_LAST: internal use */ enum nl80211_mpath_info { __NL80211_MPATH_INFO_INVALID, @@ -1182,6 +1193,8 @@ enum nl80211_mpath_info { * @NL80211_BAND_ATTR_HT_CAPA: HT capabilities, as in the HT information IE * @NL80211_BAND_ATTR_HT_AMPDU_FACTOR: A-MPDU factor, as in 11n * @NL80211_BAND_ATTR_HT_AMPDU_DENSITY: A-MPDU density, as in 11n + * @NL80211_BAND_ATTR_MAX: highest band attribute currently defined + * @__NL80211_BAND_ATTR_AFTER_LAST: internal use */ enum nl80211_band_attr { __NL80211_BAND_ATTR_INVALID, @@ -1202,6 +1215,7 @@ enum nl80211_band_attr { /** * enum nl80211_frequency_attr - frequency attributes + * @__NL80211_FREQUENCY_ATTR_INVALID: attribute number 0 is reserved * @NL80211_FREQUENCY_ATTR_FREQ: Frequency in MHz * @NL80211_FREQUENCY_ATTR_DISABLED: Channel is disabled in current * regulatory domain. @@ -1213,6 +1227,9 @@ enum nl80211_band_attr { * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in mBm * (100 * dBm). + * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number + * currently defined + * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use */ enum nl80211_frequency_attr { __NL80211_FREQUENCY_ATTR_INVALID, @@ -1232,9 +1249,13 @@ enum nl80211_frequency_attr { /** * enum nl80211_bitrate_attr - bitrate attributes + * @__NL80211_BITRATE_ATTR_INVALID: attribute number 0 is reserved * @NL80211_BITRATE_ATTR_RATE: Bitrate in units of 100 kbps * @NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE: Short preamble supported * in 2.4 GHz band. + * @NL80211_BITRATE_ATTR_MAX: highest bitrate attribute number + * currently defined + * @__NL80211_BITRATE_ATTR_AFTER_LAST: internal use */ enum nl80211_bitrate_attr { __NL80211_BITRATE_ATTR_INVALID, @@ -1290,6 +1311,7 @@ enum nl80211_reg_type { /** * enum nl80211_reg_rule_attr - regulatory rule attributes + * @__NL80211_REG_RULE_ATTR_INVALID: attribute number 0 is reserved * @NL80211_ATTR_REG_RULE_FLAGS: a set of flags which specify additional * considerations for a given frequency range. These are the * &enum nl80211_reg_rule_flags. @@ -1306,6 +1328,9 @@ enum nl80211_reg_type { * If you don't have one then don't send this. * @NL80211_ATTR_POWER_RULE_MAX_EIRP: the maximum allowed EIRP for * a given frequency range. The value is in mBm (100 * dBm). + * @NL80211_REG_RULE_ATTR_MAX: highest regulatory rule attribute number + * currently defined + * @__NL80211_REG_RULE_ATTR_AFTER_LAST: internal use */ enum nl80211_reg_rule_attr { __NL80211_REG_RULE_ATTR_INVALID, @@ -1357,6 +1382,9 @@ enum nl80211_reg_rule_flags { * @__NL80211_SURVEY_INFO_INVALID: attribute number 0 is reserved * @NL80211_SURVEY_INFO_FREQUENCY: center frequency of channel * @NL80211_SURVEY_INFO_NOISE: noise level of channel (u8, dBm) + * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number + * currently defined + * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use */ enum nl80211_survey_info { __NL80211_SURVEY_INFO_INVALID, @@ -1521,6 +1549,7 @@ enum nl80211_channel_type { * enum nl80211_bss - netlink attributes for a BSS * * @__NL80211_BSS_INVALID: invalid + * @NL80211_BSS_BSSID: BSSID of the BSS (6 octets) * @NL80211_BSS_FREQUENCY: frequency in MHz (u32) * @NL80211_BSS_TSF: TSF of the received probe response/beacon (u64) * @NL80211_BSS_BEACON_INTERVAL: beacon interval of the (I)BSS (u16) @@ -1564,6 +1593,12 @@ enum nl80211_bss { /** * enum nl80211_bss_status - BSS "status" + * @NL80211_BSS_STATUS_AUTHENTICATED: Authenticated with this BSS. + * @NL80211_BSS_STATUS_ASSOCIATED: Associated with this BSS. + * @NL80211_BSS_STATUS_IBSS_JOINED: Joined to this IBSS. + * + * The BSS status is a BSS attribute in scan dumps, which + * indicates the status the interface has wrt. this BSS. */ enum nl80211_bss_status { NL80211_BSS_STATUS_AUTHENTICATED, @@ -1674,8 +1709,8 @@ enum nl80211_tx_rate_attributes { /** * enum nl80211_band - Frequency band - * @NL80211_BAND_2GHZ - 2.4 GHz ISM band - * @NL80211_BAND_5GHZ - around 5 GHz band (4.9 - 5.7 GHz) + * @NL80211_BAND_2GHZ: 2.4 GHz ISM band + * @NL80211_BAND_5GHZ: around 5 GHz band (4.9 - 5.7 GHz) */ enum nl80211_band { NL80211_BAND_2GHZ, @@ -1713,9 +1748,9 @@ enum nl80211_attr_cqm { /** * enum nl80211_cqm_rssi_threshold_event - RSSI threshold event - * @NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW - The RSSI level is lower than the + * @NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW: The RSSI level is lower than the * configured threshold - * @NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH - The RSSI is higher than the + * @NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH: The RSSI is higher than the * configured threshold */ enum nl80211_cqm_rssi_threshold_event { -- cgit v1.2.3 From d70e96932de55fb2c05b1c0af1dff178651a9b77 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 19 Aug 2010 16:11:27 +0200 Subject: cfg80211: add some documentation Add some documentation for cfg80211. I'm hoping some of the regulatory documentation will be filled by somebody more familiar with it, hint hint! :) Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- Documentation/DocBook/80211.tmpl | 136 +++++++++++++++++++++++++++++++++++++-- include/net/cfg80211.h | 121 ++++++++++++++++++++++++++++++++-- 2 files changed, 246 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl index 52310ac892ea..b84c9282828f 100644 --- a/Documentation/DocBook/80211.tmpl +++ b/Documentation/DocBook/80211.tmpl @@ -65,18 +65,144 @@ +!Ainclude/net/cfg80211.h The cfg80211 subsystem - -MISSING - +!Pinclude/net/cfg80211.h Introduction - + + Device registration +!Pinclude/net/cfg80211.h Device registration !Finclude/net/cfg80211.h ieee80211_band - +!Finclude/net/cfg80211.h ieee80211_channel_flags +!Finclude/net/cfg80211.h ieee80211_channel +!Finclude/net/cfg80211.h ieee80211_rate_flags +!Finclude/net/cfg80211.h ieee80211_rate +!Finclude/net/cfg80211.h ieee80211_sta_ht_cap +!Finclude/net/cfg80211.h ieee80211_supported_band +!Finclude/net/cfg80211.h cfg80211_signal_type +!Finclude/net/cfg80211.h wiphy_params_flags +!Finclude/net/cfg80211.h wiphy_flags +!Finclude/net/cfg80211.h wiphy +!Finclude/net/cfg80211.h wireless_dev +!Finclude/net/cfg80211.h wiphy_new +!Finclude/net/cfg80211.h wiphy_register +!Finclude/net/cfg80211.h wiphy_unregister +!Finclude/net/cfg80211.h wiphy_free + +!Finclude/net/cfg80211.h wiphy_name +!Finclude/net/cfg80211.h wiphy_dev +!Finclude/net/cfg80211.h wiphy_priv +!Finclude/net/cfg80211.h priv_to_wiphy +!Finclude/net/cfg80211.h set_wiphy_dev +!Finclude/net/cfg80211.h wdev_priv + + + Actions and configuration +!Pinclude/net/cfg80211.h Actions and configuration +!Finclude/net/cfg80211.h cfg80211_ops +!Finclude/net/cfg80211.h vif_params +!Finclude/net/cfg80211.h key_params +!Finclude/net/cfg80211.h survey_info_flags +!Finclude/net/cfg80211.h survey_info +!Finclude/net/cfg80211.h beacon_parameters +!Finclude/net/cfg80211.h plink_actions +!Finclude/net/cfg80211.h station_parameters +!Finclude/net/cfg80211.h station_info_flags +!Finclude/net/cfg80211.h rate_info_flags +!Finclude/net/cfg80211.h rate_info +!Finclude/net/cfg80211.h station_info +!Finclude/net/cfg80211.h monitor_flags +!Finclude/net/cfg80211.h mpath_info_flags +!Finclude/net/cfg80211.h mpath_info +!Finclude/net/cfg80211.h bss_parameters +!Finclude/net/cfg80211.h ieee80211_txq_params +!Finclude/net/cfg80211.h cfg80211_crypto_settings +!Finclude/net/cfg80211.h cfg80211_auth_request +!Finclude/net/cfg80211.h cfg80211_assoc_request +!Finclude/net/cfg80211.h cfg80211_deauth_request +!Finclude/net/cfg80211.h cfg80211_disassoc_request +!Finclude/net/cfg80211.h cfg80211_ibss_params +!Finclude/net/cfg80211.h cfg80211_connect_params +!Finclude/net/cfg80211.h cfg80211_pmksa +!Finclude/net/cfg80211.h cfg80211_send_rx_auth +!Finclude/net/cfg80211.h cfg80211_send_auth_timeout +!Finclude/net/cfg80211.h __cfg80211_auth_canceled +!Finclude/net/cfg80211.h cfg80211_send_rx_assoc +!Finclude/net/cfg80211.h cfg80211_send_assoc_timeout +!Finclude/net/cfg80211.h cfg80211_send_deauth +!Finclude/net/cfg80211.h __cfg80211_send_deauth +!Finclude/net/cfg80211.h cfg80211_send_disassoc +!Finclude/net/cfg80211.h __cfg80211_send_disassoc +!Finclude/net/cfg80211.h cfg80211_ibss_joined +!Finclude/net/cfg80211.h cfg80211_connect_result +!Finclude/net/cfg80211.h cfg80211_roamed +!Finclude/net/cfg80211.h cfg80211_disconnected +!Finclude/net/cfg80211.h cfg80211_ready_on_channel +!Finclude/net/cfg80211.h cfg80211_remain_on_channel_expired +!Finclude/net/cfg80211.h cfg80211_new_sta +!Finclude/net/cfg80211.h cfg80211_rx_mgmt +!Finclude/net/cfg80211.h cfg80211_mgmt_tx_status +!Finclude/net/cfg80211.h cfg80211_cqm_rssi_notify +!Finclude/net/cfg80211.h cfg80211_michael_mic_failure + + + Scanning and BSS list handling +!Pinclude/net/cfg80211.h Scanning and BSS list handling +!Finclude/net/cfg80211.h cfg80211_ssid +!Finclude/net/cfg80211.h cfg80211_scan_request +!Finclude/net/cfg80211.h cfg80211_scan_done +!Finclude/net/cfg80211.h cfg80211_bss +!Finclude/net/cfg80211.h cfg80211_inform_bss_frame +!Finclude/net/cfg80211.h cfg80211_inform_bss +!Finclude/net/cfg80211.h cfg80211_unlink_bss +!Finclude/net/cfg80211.h cfg80211_find_ie +!Finclude/net/cfg80211.h ieee80211_bss_get_ie + + + Utility functions +!Pinclude/net/cfg80211.h Utility functions +!Finclude/net/cfg80211.h ieee80211_channel_to_frequency +!Finclude/net/cfg80211.h ieee80211_frequency_to_channel +!Finclude/net/cfg80211.h ieee80211_get_channel +!Finclude/net/cfg80211.h ieee80211_get_response_rate +!Finclude/net/cfg80211.h ieee80211_hdrlen +!Finclude/net/cfg80211.h ieee80211_get_hdrlen_from_skb +!Finclude/net/cfg80211.h ieee80211_radiotap_iterator + + + Data path helpers +!Pinclude/net/cfg80211.h Data path helpers +!Finclude/net/cfg80211.h ieee80211_data_to_8023 +!Finclude/net/cfg80211.h ieee80211_data_from_8023 +!Finclude/net/cfg80211.h ieee80211_amsdu_to_8023s +!Finclude/net/cfg80211.h cfg80211_classify8021d + + + Regulatory enforcement infrastructure +!Pinclude/net/cfg80211.h Regulatory enforcement infrastructure +!Finclude/net/cfg80211.h regulatory_hint +!Finclude/net/cfg80211.h wiphy_apply_custom_regulatory +!Finclude/net/cfg80211.h freq_reg_info + + + RFkill integration +!Pinclude/net/cfg80211.h RFkill integration +!Finclude/net/cfg80211.h wiphy_rfkill_set_hw_state +!Finclude/net/cfg80211.h wiphy_rfkill_start_polling +!Finclude/net/cfg80211.h wiphy_rfkill_stop_polling + + + Test mode +!Pinclude/net/cfg80211.h Test mode +!Finclude/net/cfg80211.h cfg80211_testmode_alloc_reply_skb +!Finclude/net/cfg80211.h cfg80211_testmode_reply +!Finclude/net/cfg80211.h cfg80211_testmode_alloc_event_skb +!Finclude/net/cfg80211.h cfg80211_testmode_event + diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 6a98b1b3bfde..f2740537b5d6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -25,6 +25,43 @@ #include +/** + * DOC: Introduction + * + * cfg80211 is the configuration API for 802.11 devices in Linux. It bridges + * userspace and drivers, and offers some utility functionality associated + * with 802.11. cfg80211 must, directly or indirectly via mac80211, be used + * by all modern wireless drivers in Linux, so that they offer a consistent + * API through nl80211. For backward compatibility, cfg80211 also offers + * wireless extensions to userspace, but hides them from drivers completely. + * + * Additionally, cfg80211 contains code to help enforce regulatory spectrum + * use restrictions. + */ + + +/** + * DOC: Device registration + * + * In order for a driver to use cfg80211, it must register the hardware device + * with cfg80211. This happens through a number of hardware capability structs + * described below. + * + * The fundamental structure for each device is the 'wiphy', of which each + * instance describes a physical wireless device connected to the system. Each + * such wiphy can have zero, one, or many virtual interfaces associated with + * it, which need to be identified as such by pointing the network interface's + * @ieee80211_ptr pointer to a &struct wireless_dev which further describes + * the wireless part of the interface, normally this struct is embedded in the + * network interface's private data area. Drivers can optionally allow creating + * or destroying virtual interfaces on the fly, but without at least one or the + * ability to create some the wireless device isn't useful. + * + * Each wiphy structure contains device capability information, and also has + * a pointer to the various operations the driver offers. The definitions and + * structures here describe these capabilities in detail. + */ + /* * wireless hardware capability structures */ @@ -204,6 +241,21 @@ struct ieee80211_supported_band { * Wireless hardware/device configuration structures and methods */ +/** + * DOC: Actions and configuration + * + * Each wireless device and each virtual interface offer a set of configuration + * operations and other actions that are invoked by userspace. Each of these + * actions is described in the operations structure, and the parameters these + * operations use are described separately. + * + * Additionally, some operations are asynchronous and expect to get status + * information via some functions that drivers need to call. + * + * Scanning and BSS list handling with its associated functionality is described + * in a separate chapter. + */ + /** * struct vif_params - describes virtual interface parameters * @mesh_id: mesh ID to use @@ -570,8 +622,28 @@ struct ieee80211_txq_params { /* from net/wireless.h */ struct wiphy; -/* from net/ieee80211.h */ -struct ieee80211_channel; +/** + * DOC: Scanning and BSS list handling + * + * The scanning process itself is fairly simple, but cfg80211 offers quite + * a bit of helper functionality. To start a scan, the scan operation will + * be invoked with a scan definition. This scan definition contains the + * channels to scan, and the SSIDs to send probe requests for (including the + * wildcard, if desired). A passive scan is indicated by having no SSIDs to + * probe. Additionally, a scan request may contain extra information elements + * that should be added to the probe request. The IEs are guaranteed to be + * well-formed, and will not exceed the maximum length the driver advertised + * in the wiphy structure. + * + * When scanning finds a BSS, cfg80211 needs to be notified of that, because + * it is responsible for maintaining the BSS list; the driver should not + * maintain a list itself. For this notification, various functions exist. + * + * Since drivers do not maintain a BSS list, there are also a number of + * functions to search for a BSS and obtain information about it from the + * BSS structure cfg80211 maintains. The BSS list is also made available + * to userspace. + */ /** * struct cfg80211_ssid - SSID description @@ -1574,8 +1646,10 @@ static inline void *wdev_priv(struct wireless_dev *wdev) return wiphy_priv(wdev->wiphy); } -/* - * Utility functions +/** + * DOC: Utility functions + * + * cfg80211 offers a number of utility functions that can be useful. */ /** @@ -1728,6 +1802,14 @@ unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb); */ unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc); +/** + * DOC: Data path helpers + * + * In addition to generic utilities, cfg80211 also offers + * functions that help implement the data path for devices + * that do not do the 802.11/802.3 conversion on the device. + */ + /** * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3 * @skb: the 802.11 data frame @@ -1788,8 +1870,10 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb); */ const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len); -/* - * Regulatory helper functions for wiphys +/** + * DOC: Regulatory enforcement infrastructure + * + * TODO */ /** @@ -2191,6 +2275,20 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, */ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp); +/** + * DOC: RFkill integration + * + * RFkill integration in cfg80211 is almost invisible to drivers, + * as cfg80211 automatically registers an rfkill instance for each + * wireless device it knows about. Soft kill is also translated + * into disconnecting and turning all interfaces off, drivers are + * expected to turn off the device when all interfaces are down. + * + * However, devices may have a hard RFkill line, in which case they + * also need to interact with the rfkill subsystem, via cfg80211. + * They can do this with a few helper functions documented here. + */ + /** * wiphy_rfkill_set_hw_state - notify cfg80211 about hw block state * @wiphy: the wiphy @@ -2211,6 +2309,17 @@ void wiphy_rfkill_start_polling(struct wiphy *wiphy); void wiphy_rfkill_stop_polling(struct wiphy *wiphy); #ifdef CONFIG_NL80211_TESTMODE +/** + * DOC: Test mode + * + * Test mode is a set of utility functions to allow drivers to + * interact with driver-specific tools to aid, for instance, + * factory programming. + * + * This chapter describes how drivers interact with it, for more + * information see the nl80211 book's chapter on it. + */ + /** * cfg80211_testmode_alloc_reply_skb - allocate testmode reply * @wiphy: the wiphy -- cgit v1.2.3 From 5a46790ca4c40fdb6ed5336d7d6b593c96326b31 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 24 Aug 2010 14:46:53 -0700 Subject: include/linux/if_ether.h: Remove unused #define MAC_FMT Last use was removed, so remove the #define. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/linux/if_ether.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index bed7a4682b90..f9c3df03db0f 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -137,8 +137,6 @@ extern struct ctl_table ether_table[]; extern ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len); -#define MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x" - #endif #endif /* _LINUX_IF_ETHER_H */ -- cgit v1.2.3 From c2e3143e3c46ede22336316b3ff4746727c0d93a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 24 Aug 2010 14:48:10 -0700 Subject: tc: add meta match on receive hash Trivial extension to existing meta data match rules to allow matching on skb receive hash value. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/tc_ematch/tc_em_meta.h | 1 + net/sched/em_meta.c | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/linux/tc_ematch/tc_em_meta.h index 0864206ec1a3..7138962664f8 100644 --- a/include/linux/tc_ematch/tc_em_meta.h +++ b/include/linux/tc_ematch/tc_em_meta.h @@ -79,6 +79,7 @@ enum { TCF_META_ID_SK_SENDMSG_OFF, TCF_META_ID_SK_WRITE_PENDING, TCF_META_ID_VLAN_TAG, + TCF_META_ID_RXHASH, __TCF_META_ID_MAX }; #define TCF_META_ID_MAX (__TCF_META_ID_MAX - 1) diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 3bcac8aa333c..34da5e29ea1a 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -223,6 +223,11 @@ META_COLLECTOR(int_maclen) dst->value = skb->mac_len; } +META_COLLECTOR(int_rxhash) +{ + dst->value = skb_get_rxhash(skb); +} + /************************************************************************** * Netfilter **************************************************************************/ @@ -541,6 +546,7 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = { [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off), [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend), [META_ID(VLAN_TAG)] = META_FUNC(int_vlan_tag), + [META_ID(RXHASH)] = META_FUNC(int_rxhash), } }; -- cgit v1.2.3 From e7c1c2c46201e46f8ce817196507d2ffd3dafd8e Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Tue, 24 Aug 2010 03:46:18 +0000 Subject: mlx4_en: Added self diagnostics test implementation The selftest includes 5 features: 1. Interrupt test: Executing commands and receiving command completion on all our interrupt vectors. 2. Link test: Verifying we are connected to valid link partner. 3. Speed test: Check that we negotiated link speed correctly. 4. Registers test: Activate HW health check command. 5. Loopback test: Send a packet on loopback interface and catch it on RX side. Signed-off-by: Yevgeny Petrilin Signed-off-by: David S. Miller --- drivers/net/mlx4/Makefile | 2 +- drivers/net/mlx4/en_ethtool.c | 79 ++++++++++++------ drivers/net/mlx4/en_netdev.c | 2 +- drivers/net/mlx4/en_port.c | 32 ++++++++ drivers/net/mlx4/en_port.h | 14 ++++ drivers/net/mlx4/en_rx.c | 20 +++++ drivers/net/mlx4/en_selftest.c | 179 +++++++++++++++++++++++++++++++++++++++++ drivers/net/mlx4/en_tx.c | 16 ++++ drivers/net/mlx4/eq.c | 44 ++++++++++ drivers/net/mlx4/fw.c | 3 + drivers/net/mlx4/fw.h | 1 + drivers/net/mlx4/main.c | 1 + drivers/net/mlx4/mlx4_en.h | 19 +++++ include/linux/mlx4/cmd.h | 1 + include/linux/mlx4/device.h | 2 + 15 files changed, 388 insertions(+), 27 deletions(-) create mode 100644 drivers/net/mlx4/en_selftest.c (limited to 'include') diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile index 1fd068e1d930..d1aa45a15854 100644 --- a/drivers/net/mlx4/Makefile +++ b/drivers/net/mlx4/Makefile @@ -6,4 +6,4 @@ mlx4_core-y := alloc.o catas.o cmd.o cq.o eq.o fw.o icm.o intf.o main.o mcg.o \ obj-$(CONFIG_MLX4_EN) += mlx4_en.o mlx4_en-y := en_main.o en_tx.o en_rx.o en_ethtool.o en_port.o en_cq.o \ - en_resources.o en_netdev.o + en_resources.o en_netdev.o en_selftest.o diff --git a/drivers/net/mlx4/en_ethtool.c b/drivers/net/mlx4/en_ethtool.c index 398d54136967..f7d72d7a8704 100644 --- a/drivers/net/mlx4/en_ethtool.c +++ b/drivers/net/mlx4/en_ethtool.c @@ -125,6 +125,14 @@ static const char main_strings[][ETH_GSTRING_LEN] = { #define NUM_MAIN_STATS 21 #define NUM_ALL_STATS (NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + NUM_PERF_STATS) +static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= { + "Interupt Test", + "Link Test", + "Speed Test", + "Register Test", + "Loopback Test", +}; + static u32 mlx4_en_get_msglevel(struct net_device *dev) { return ((struct mlx4_en_priv *) netdev_priv(dev))->msg_enable; @@ -146,10 +154,15 @@ static int mlx4_en_get_sset_count(struct net_device *dev, int sset) { struct mlx4_en_priv *priv = netdev_priv(dev); - if (sset != ETH_SS_STATS) + switch (sset) { + case ETH_SS_STATS: + return NUM_ALL_STATS + + (priv->tx_ring_num + priv->rx_ring_num) * 2; + case ETH_SS_TEST: + return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.loopback_support) * 2; + default: return -EOPNOTSUPP; - - return NUM_ALL_STATS + (priv->tx_ring_num + priv->rx_ring_num) * 2; + } } static void mlx4_en_get_ethtool_stats(struct net_device *dev, @@ -181,6 +194,12 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev, } +static void mlx4_en_self_test(struct net_device *dev, + struct ethtool_test *etest, u64 *buf) +{ + mlx4_en_ex_selftest(dev, &etest->flags, buf); +} + static void mlx4_en_get_strings(struct net_device *dev, uint32_t stringset, uint8_t *data) { @@ -188,30 +207,39 @@ static void mlx4_en_get_strings(struct net_device *dev, int index = 0; int i; - if (stringset != ETH_SS_STATS) - return; - - /* Add main counters */ - for (i = 0; i < NUM_MAIN_STATS; i++) - strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[i]); - for (i = 0; i < NUM_PORT_STATS; i++) - strcpy(data + (index++) * ETH_GSTRING_LEN, + switch (stringset) { + case ETH_SS_TEST: + for (i = 0; i < MLX4_EN_NUM_SELF_TEST - 2; i++) + strcpy(data + i * ETH_GSTRING_LEN, mlx4_en_test_names[i]); + if (priv->mdev->dev->caps.loopback_support) + for (; i < MLX4_EN_NUM_SELF_TEST; i++) + strcpy(data + i * ETH_GSTRING_LEN, mlx4_en_test_names[i]); + break; + + case ETH_SS_STATS: + /* Add main counters */ + for (i = 0; i < NUM_MAIN_STATS; i++) + strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[i]); + for (i = 0; i< NUM_PORT_STATS; i++) + strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[i + NUM_MAIN_STATS]); - for (i = 0; i < priv->tx_ring_num; i++) { - sprintf(data + (index++) * ETH_GSTRING_LEN, - "tx%d_packets", i); - sprintf(data + (index++) * ETH_GSTRING_LEN, - "tx%d_bytes", i); - } - for (i = 0; i < priv->rx_ring_num; i++) { - sprintf(data + (index++) * ETH_GSTRING_LEN, - "rx%d_packets", i); - sprintf(data + (index++) * ETH_GSTRING_LEN, - "rx%d_bytes", i); - } - for (i = 0; i < NUM_PKT_STATS; i++) - strcpy(data + (index++) * ETH_GSTRING_LEN, + for (i = 0; i < priv->tx_ring_num; i++) { + sprintf(data + (index++) * ETH_GSTRING_LEN, + "tx%d_packets", i); + sprintf(data + (index++) * ETH_GSTRING_LEN, + "tx%d_bytes", i); + } + for (i = 0; i < priv->rx_ring_num; i++) { + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_packets", i); + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_bytes", i); + } + for (i = 0; i< NUM_PKT_STATS; i++) + strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[i + NUM_MAIN_STATS + NUM_PORT_STATS]); + break; + } } static int mlx4_en_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) @@ -439,6 +467,7 @@ const struct ethtool_ops mlx4_en_ethtool_ops = { .get_strings = mlx4_en_get_strings, .get_sset_count = mlx4_en_get_sset_count, .get_ethtool_stats = mlx4_en_get_ethtool_stats, + .self_test = mlx4_en_self_test, .get_wol = mlx4_en_get_wol, .get_msglevel = mlx4_en_get_msglevel, .set_msglevel = mlx4_en_set_msglevel, diff --git a/drivers/net/mlx4/en_netdev.c b/drivers/net/mlx4/en_netdev.c index 34cfa3cfbdae..968e75b7acf4 100644 --- a/drivers/net/mlx4/en_netdev.c +++ b/drivers/net/mlx4/en_netdev.c @@ -109,7 +109,7 @@ static void mlx4_en_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) mutex_unlock(&mdev->state_lock); } -static u64 mlx4_en_mac_to_u64(u8 *addr) +u64 mlx4_en_mac_to_u64(u8 *addr) { u64 mac = 0; int i; diff --git a/drivers/net/mlx4/en_port.c b/drivers/net/mlx4/en_port.c index a29abe845d2e..aa3ef2aee5bf 100644 --- a/drivers/net/mlx4/en_port.c +++ b/drivers/net/mlx4/en_port.c @@ -142,6 +142,38 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, return err; } +int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port) +{ + struct mlx4_en_query_port_context *qport_context; + struct mlx4_en_priv *priv = netdev_priv(mdev->pndev[port]); + struct mlx4_en_port_state *state = &priv->port_state; + struct mlx4_cmd_mailbox *mailbox; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + memset(mailbox->buf, 0, sizeof(*qport_context)); + err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0, + MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B); + if (err) + goto out; + qport_context = mailbox->buf; + + /* This command is always accessed from Ethtool context + * already synchronized, no need in locking */ + state->link_state = !!(qport_context->link_up & MLX4_EN_LINK_UP_MASK); + if ((qport_context->link_speed & MLX4_EN_SPEED_MASK) == + MLX4_EN_1G_SPEED) + state->link_speed = 1000; + else + state->link_speed = 10000; + state->transciver = qport_context->transceiver; + +out: + mlx4_free_cmd_mailbox(mdev->dev, mailbox); + return err; +} int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) { diff --git a/drivers/net/mlx4/en_port.h b/drivers/net/mlx4/en_port.h index e6477f12beb5..f6511aa2b7df 100644 --- a/drivers/net/mlx4/en_port.h +++ b/drivers/net/mlx4/en_port.h @@ -84,6 +84,20 @@ enum { MLX4_MCAST_ENABLE = 2, }; +struct mlx4_en_query_port_context { + u8 link_up; +#define MLX4_EN_LINK_UP_MASK 0x80 + u8 reserved; + __be16 mtu; + u8 reserved2; + u8 link_speed; +#define MLX4_EN_SPEED_MASK 0x3 +#define MLX4_EN_1G_SPEED 0x2 + u16 reserved3[5]; + __be64 mac; + u8 transceiver; +}; + struct mlx4_en_stat_out_mbox { /* Received frames with a length of 64 octets */ diff --git a/drivers/net/mlx4/en_rx.c b/drivers/net/mlx4/en_rx.c index 7a5123c4a366..f421a3d42723 100644 --- a/drivers/net/mlx4/en_rx.c +++ b/drivers/net/mlx4/en_rx.c @@ -541,6 +541,21 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, return skb; } +static void validate_loopback(struct mlx4_en_priv *priv, struct sk_buff *skb) +{ + int i; + int offset = ETH_HLEN; + + for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++, offset++) { + if (*(skb->data + offset) != (unsigned char) (i & 0xff)) + goto out_loopback; + } + /* Loopback found */ + priv->loopback_ok = 1; + +out_loopback: + dev_kfree_skb_any(skb); +} int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) { @@ -655,6 +670,11 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud goto next; } + if (unlikely(priv->validate_loopback)) { + validate_loopback(priv, skb); + goto next; + } + skb->ip_summed = ip_summed; skb->protocol = eth_type_trans(skb, dev); skb_record_rx_queue(skb, cq->ring); diff --git a/drivers/net/mlx4/en_selftest.c b/drivers/net/mlx4/en_selftest.c new file mode 100644 index 000000000000..43357d35616a --- /dev/null +++ b/drivers/net/mlx4/en_selftest.c @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include +#include +#include +#include +#include + +#include "mlx4_en.h" + + +static int mlx4_en_test_registers(struct mlx4_en_priv *priv) +{ + return mlx4_cmd(priv->mdev->dev, 0, 0, 0, MLX4_CMD_HW_HEALTH_CHECK, + MLX4_CMD_TIME_CLASS_A); +} + +static int mlx4_en_test_loopback_xmit(struct mlx4_en_priv *priv) +{ + struct sk_buff *skb; + struct ethhdr *ethh; + unsigned char *packet; + unsigned int packet_size = MLX4_LOOPBACK_TEST_PAYLOAD; + unsigned int i; + int err; + + + /* build the pkt before xmit */ + skb = netdev_alloc_skb(priv->dev, MLX4_LOOPBACK_TEST_PAYLOAD + ETH_HLEN + NET_IP_ALIGN); + if (!skb) { + en_err(priv, "-LOOPBACK_TEST_XMIT- failed to create skb for xmit\n"); + return -ENOMEM; + } + skb_reserve(skb, NET_IP_ALIGN); + + ethh = (struct ethhdr *)skb_put(skb, sizeof(struct ethhdr)); + packet = (unsigned char *)skb_put(skb, packet_size); + memcpy(ethh->h_dest, priv->dev->dev_addr, ETH_ALEN); + memset(ethh->h_source, 0, ETH_ALEN); + ethh->h_proto = htons(ETH_P_ARP); + skb_set_mac_header(skb, 0); + for (i = 0; i < packet_size; ++i) /* fill our packet */ + packet[i] = (unsigned char)(i & 0xff); + + /* xmit the pkt */ + err = mlx4_en_xmit(skb, priv->dev); + return err; +} + +static int mlx4_en_test_loopback(struct mlx4_en_priv *priv) +{ + u32 loopback_ok = 0; + int i; + + + priv->loopback_ok = 0; + priv->validate_loopback = 1; + + /* xmit */ + if (mlx4_en_test_loopback_xmit(priv)) { + en_err(priv, "Transmitting loopback packet failed\n"); + goto mlx4_en_test_loopback_exit; + } + + /* polling for result */ + for (i = 0; i < MLX4_EN_LOOPBACK_RETRIES; ++i) { + msleep(MLX4_EN_LOOPBACK_TIMEOUT); + if (priv->loopback_ok) { + loopback_ok = 1; + break; + } + } + if (!loopback_ok) + en_err(priv, "Loopback packet didn't arrive\n"); + +mlx4_en_test_loopback_exit: + + priv->validate_loopback = 0; + return (!loopback_ok); +} + + +static int mlx4_en_test_link(struct mlx4_en_priv *priv) +{ + if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) + return -ENOMEM; + if (priv->port_state.link_state == 1) + return 0; + else + return 1; +} + +static int mlx4_en_test_speed(struct mlx4_en_priv *priv) +{ + + if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) + return -ENOMEM; + + /* The device currently only supports 10G speed */ + if (priv->port_state.link_speed != SPEED_10000) + return priv->port_state.link_speed; + return 0; +} + + +void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_tx_ring *tx_ring; + int i, carrier_ok; + + memset(buf, 0, sizeof(u64) * MLX4_EN_NUM_SELF_TEST); + + if (*flags & ETH_TEST_FL_OFFLINE) { + /* disable the interface */ + carrier_ok = netif_carrier_ok(dev); + + netif_carrier_off(dev); +retry_tx: + /* Wait untill all tx queues are empty. + * there should not be any additional incoming traffic + * since we turned the carrier off */ + msleep(200); + for (i = 0; i < priv->tx_ring_num && carrier_ok; i++) { + tx_ring = &priv->tx_ring[i]; + if (tx_ring->prod != (tx_ring->cons + tx_ring->last_nr_txbb)) + goto retry_tx; + } + + if (priv->mdev->dev->caps.loopback_support){ + buf[3] = mlx4_en_test_registers(priv); + buf[4] = mlx4_en_test_loopback(priv); + } + + if (carrier_ok) + netif_carrier_on(dev); + + } + buf[0] = mlx4_test_interrupts(mdev->dev); + buf[1] = mlx4_en_test_link(priv); + buf[2] = mlx4_en_test_speed(priv); + + for (i = 0; i < MLX4_EN_NUM_SELF_TEST; i++) { + if (buf[i]) + *flags |= ETH_TEST_FL_FAILED; + } +} diff --git a/drivers/net/mlx4/en_tx.c b/drivers/net/mlx4/en_tx.c index 3deabd1d0357..b875f9c38848 100644 --- a/drivers/net/mlx4/en_tx.c +++ b/drivers/net/mlx4/en_tx.c @@ -600,6 +600,9 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) struct mlx4_wqe_data_seg *data; struct skb_frag_struct *frag; struct mlx4_en_tx_info *tx_info; + struct ethhdr *ethh; + u64 mac; + u32 mac_l, mac_h; int tx_ind = 0; int nr_txbb; int desc_size; @@ -679,6 +682,19 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) priv->port_stats.tx_chksum_offload++; } + if (unlikely(priv->validate_loopback)) { + /* Copy dst mac address to wqe */ + skb_reset_mac_header(skb); + ethh = eth_hdr(skb); + if (ethh && ethh->h_dest) { + mac = mlx4_en_mac_to_u64(ethh->h_dest); + mac_h = (u32) ((mac & 0xffff00000000ULL) >> 16); + mac_l = (u32) (mac & 0xffffffff); + tx_desc->ctrl.srcrb_flags |= cpu_to_be32(mac_h); + tx_desc->ctrl.imm = cpu_to_be32(mac_l); + } + } + /* Handle LSO (TSO) packets */ if (lso_header_size) { /* Mark opcode as LSO */ diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c index 6d7b2bf210ce..552d0fce6f67 100644 --- a/drivers/net/mlx4/eq.c +++ b/drivers/net/mlx4/eq.c @@ -699,3 +699,47 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev) kfree(priv->eq_table.uar_map); } + +/* A test that verifies that we can accept interrupts on all + * the irq vectors of the device. + * Interrupts are checked using the NOP command. + */ +int mlx4_test_interrupts(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int i; + int err; + + err = mlx4_NOP(dev); + /* When not in MSI_X, there is only one irq to check */ + if (!(dev->flags & MLX4_FLAG_MSI_X)) + return err; + + /* A loop over all completion vectors, for each vector we will check + * whether it works by mapping command completions to that vector + * and performing a NOP command + */ + for(i = 0; !err && (i < dev->caps.num_comp_vectors); ++i) { + /* Temporary use polling for command completions */ + mlx4_cmd_use_polling(dev); + + /* Map the new eq to handle all asyncronous events */ + err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0, + priv->eq_table.eq[i].eqn); + if (err) { + mlx4_warn(dev, "Failed mapping eq for interrupt test\n"); + mlx4_cmd_use_events(dev); + break; + } + + /* Go back to using events */ + mlx4_cmd_use_events(dev); + err = mlx4_NOP(dev); + } + + /* Return to default */ + mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0, + priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); + return err; +} +EXPORT_SYMBOL(mlx4_test_interrupts); diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c index 04f42ae1eda0..a87bf3c97055 100644 --- a/drivers/net/mlx4/fw.c +++ b/drivers/net/mlx4/fw.c @@ -178,6 +178,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAX_GID_OFFSET 0x3b #define QUERY_DEV_CAP_RATE_SUPPORT_OFFSET 0x3c #define QUERY_DEV_CAP_MAX_PKEY_OFFSET 0x3f +#define QUERY_DEV_CAP_ETH_UC_LOOPBACK_OFFSET 0x43 #define QUERY_DEV_CAP_FLAGS_OFFSET 0x44 #define QUERY_DEV_CAP_RSVD_UAR_OFFSET 0x48 #define QUERY_DEV_CAP_UAR_SZ_OFFSET 0x49 @@ -268,6 +269,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->max_msg_sz = 1 << (field & 0x1f); MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET); dev_cap->stat_rate_support = stat_rate; + MLX4_GET(field, outbox, QUERY_DEV_CAP_ETH_UC_LOOPBACK_OFFSET); + dev_cap->loopback_support = field & 0x1; MLX4_GET(dev_cap->flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_UAR_OFFSET); dev_cap->reserved_uars = field >> 4; diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h index 526d7f30c041..2cc1ba5452e3 100644 --- a/drivers/net/mlx4/fw.h +++ b/drivers/net/mlx4/fw.h @@ -74,6 +74,7 @@ struct mlx4_dev_cap { u64 def_mac[MLX4_MAX_PORTS + 1]; u16 eth_mtu[MLX4_MAX_PORTS + 1]; u16 stat_rate_support; + int loopback_support; u32 flags; int reserved_uars; int uar_size; diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index 5102ab1ac561..7390cdb19414 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -221,6 +221,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.bmme_flags = dev_cap->bmme_flags; dev->caps.reserved_lkey = dev_cap->reserved_lkey; dev->caps.stat_rate_support = dev_cap->stat_rate_support; + dev->caps.loopback_support = dev_cap->loopback_support; dev->caps.max_gso_sz = dev_cap->max_gso_sz; dev->caps.log_num_macs = log_num_mac; diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h index edf6523702c0..a09598b2b12e 100644 --- a/drivers/net/mlx4/mlx4_en.h +++ b/drivers/net/mlx4/mlx4_en.h @@ -45,6 +45,7 @@ #include #include #include +#include #include "en_port.h" @@ -139,10 +140,14 @@ enum { #define SMALL_PACKET_SIZE (256 - NET_IP_ALIGN) #define HEADER_COPY_SIZE (128 - NET_IP_ALIGN) +#define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN) #define MLX4_EN_MIN_MTU 46 #define ETH_BCAST 0xffffffffffffULL +#define MLX4_EN_LOOPBACK_RETRIES 5 +#define MLX4_EN_LOOPBACK_TIMEOUT 100 + #ifdef MLX4_EN_PERF_STAT /* Number of samples to 'average' */ #define AVG_SIZE 128 @@ -356,6 +361,12 @@ struct mlx4_en_rss_context { __be32 rss_key[10]; }; +struct mlx4_en_port_state { + int link_state; + int link_speed; + int transciver; +}; + struct mlx4_en_pkt_stats { unsigned long broadcast; unsigned long rx_prio[8]; @@ -404,6 +415,7 @@ struct mlx4_en_priv { struct vlan_group *vlgrp; struct net_device_stats stats; struct net_device_stats ret_stats; + struct mlx4_en_port_state port_state; spinlock_t stats_lock; unsigned long last_moder_packets; @@ -422,6 +434,8 @@ struct mlx4_en_priv { u16 sample_interval; u16 adaptive_rx_coal; u32 msg_enable; + u32 loopback_ok; + u32 validate_loopback; struct mlx4_hwq_resources res; int link_state; @@ -530,6 +544,11 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, u8 promisc); int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset); +int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port); + +#define MLX4_EN_NUM_SELF_TEST 5 +void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf); +u64 mlx4_en_mac_to_u64(u8 *addr); /* * Globals diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 0f82293a82ed..78a1b9671752 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -56,6 +56,7 @@ enum { MLX4_CMD_QUERY_HCA = 0xb, MLX4_CMD_QUERY_PORT = 0x43, MLX4_CMD_SENSE_PORT = 0x4d, + MLX4_CMD_HW_HEALTH_CHECK = 0x50, MLX4_CMD_SET_PORT = 0xc, MLX4_CMD_ACCESS_DDR = 0x2e, MLX4_CMD_MAP_ICM = 0xffa, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 7a7f9c1e679a..2cec58722738 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -229,6 +229,7 @@ struct mlx4_caps { u32 bmme_flags; u32 reserved_lkey; u16 stat_rate_support; + int loopback_support; u8 port_width_cap[MLX4_MAX_PORTS + 1]; int max_gso_sz; int reserved_qps_cnt[MLX4_NUM_QP_REGION]; @@ -480,5 +481,6 @@ void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u32 *lkey, u32 *rkey); int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr); int mlx4_SYNC_TPT(struct mlx4_dev *dev); +int mlx4_test_interrupts(struct mlx4_dev *dev); #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3 From 7699517db435fd24143bd32dd644275e3eeb4c86 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Tue, 24 Aug 2010 03:46:23 +0000 Subject: mlx4_en: Fixing report in Ethtool get_settings The report now based on query from FW, giving the correct tranciever type and link speed. Signed-off-by: Yevgeny Petrilin Signed-off-by: David S. Miller --- drivers/net/mlx4/en_ethtool.c | 27 +++++++++++++++++++++++++-- drivers/net/mlx4/fw.c | 9 +++++++++ drivers/net/mlx4/fw.h | 4 ++++ drivers/net/mlx4/main.c | 4 ++++ include/linux/mlx4/device.h | 4 ++++ 5 files changed, 46 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/mlx4/en_ethtool.c b/drivers/net/mlx4/en_ethtool.c index f7d72d7a8704..fa2f2f43ab48 100644 --- a/drivers/net/mlx4/en_ethtool.c +++ b/drivers/net/mlx4/en_ethtool.c @@ -244,16 +244,39 @@ static void mlx4_en_get_strings(struct net_device *dev, static int mlx4_en_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { + struct mlx4_en_priv *priv = netdev_priv(dev); + int trans_type; + cmd->autoneg = AUTONEG_DISABLE; cmd->supported = SUPPORTED_10000baseT_Full; - cmd->advertising = ADVERTISED_1000baseT_Full; + cmd->advertising = ADVERTISED_10000baseT_Full; + + if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) + return -ENOMEM; + + trans_type = priv->port_state.transciver; if (netif_carrier_ok(dev)) { - cmd->speed = SPEED_10000; + cmd->speed = priv->port_state.link_speed; cmd->duplex = DUPLEX_FULL; } else { cmd->speed = -1; cmd->duplex = -1; } + + if (trans_type > 0 && trans_type <= 0xC) { + cmd->port = PORT_FIBRE; + cmd->transceiver = XCVR_EXTERNAL; + cmd->supported |= SUPPORTED_FIBRE; + cmd->advertising |= ADVERTISED_FIBRE; + } else if (trans_type == 0x80 || trans_type == 0) { + cmd->port = PORT_TP; + cmd->transceiver = XCVR_INTERNAL; + cmd->supported |= SUPPORTED_TP; + cmd->advertising |= ADVERTISED_TP; + } else { + cmd->port = -1; + cmd->transceiver = -1; + } return 0; } diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c index a87bf3c97055..515c6348f32b 100644 --- a/drivers/net/mlx4/fw.c +++ b/drivers/net/mlx4/fw.c @@ -141,6 +141,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) struct mlx4_cmd_mailbox *mailbox; u32 *outbox; u8 field; + u32 field32; u16 size; u16 stat_rate; int err; @@ -368,6 +369,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_PORT_MAX_MACVLAN_OFFSET 0x0a #define QUERY_PORT_MAX_VL_OFFSET 0x0b #define QUERY_PORT_MAC_OFFSET 0x10 +#define QUERY_PORT_TRANS_VENDOR_OFFSET 0x18 +#define QUERY_PORT_WAVELENGTH_OFFSET 0x1c +#define QUERY_PORT_TRANS_CODE_OFFSET 0x20 for (i = 1; i <= dev_cap->num_ports; ++i) { err = mlx4_cmd_box(dev, 0, mailbox->dma, i, 0, MLX4_CMD_QUERY_PORT, @@ -391,6 +395,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->log_max_vlans[i] = field >> 4; MLX4_GET(dev_cap->eth_mtu[i], outbox, QUERY_PORT_ETH_MTU_OFFSET); MLX4_GET(dev_cap->def_mac[i], outbox, QUERY_PORT_MAC_OFFSET); + MLX4_GET(field32, outbox, QUERY_PORT_TRANS_VENDOR_OFFSET); + dev_cap->trans_type[i] = field32 >> 24; + dev_cap->vendor_oui[i] = field32 & 0xffffff; + MLX4_GET(dev_cap->wavelength[i], outbox, QUERY_PORT_WAVELENGTH_OFFSET); + MLX4_GET(dev_cap->trans_code[i], outbox, QUERY_PORT_TRANS_CODE_OFFSET); } } diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h index 2cc1ba5452e3..443e456c9dab 100644 --- a/drivers/net/mlx4/fw.h +++ b/drivers/net/mlx4/fw.h @@ -73,6 +73,10 @@ struct mlx4_dev_cap { int max_pkeys[MLX4_MAX_PORTS + 1]; u64 def_mac[MLX4_MAX_PORTS + 1]; u16 eth_mtu[MLX4_MAX_PORTS + 1]; + int trans_type[MLX4_MAX_PORTS + 1]; + int vendor_oui[MLX4_MAX_PORTS + 1]; + u16 wavelength[MLX4_MAX_PORTS + 1]; + u64 trans_code[MLX4_MAX_PORTS + 1]; u16 stat_rate_support; int loopback_support; u32 flags; diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index 7390cdb19414..f4791fa2472f 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -184,6 +184,10 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.eth_mtu_cap[i] = dev_cap->eth_mtu[i]; dev->caps.def_mac[i] = dev_cap->def_mac[i]; dev->caps.supported_type[i] = dev_cap->supported_port_types[i]; + dev->caps.trans_type[i] = dev_cap->trans_type[i]; + dev->caps.vendor_oui[i] = dev_cap->vendor_oui[i]; + dev->caps.wavelength[i] = dev_cap->wavelength[i]; + dev->caps.trans_code[i] = dev_cap->trans_code[i]; } dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 2cec58722738..2a36a344fb3d 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -186,6 +186,10 @@ struct mlx4_caps { int eth_mtu_cap[MLX4_MAX_PORTS + 1]; int gid_table_len[MLX4_MAX_PORTS + 1]; int pkey_table_len[MLX4_MAX_PORTS + 1]; + int trans_type[MLX4_MAX_PORTS + 1]; + int vendor_oui[MLX4_MAX_PORTS + 1]; + int wavelength[MLX4_MAX_PORTS + 1]; + u64 trans_code[MLX4_MAX_PORTS + 1]; int local_ca_ack_delay; int num_uars; int bf_reg_size; -- cgit v1.2.3 From 0533943c5c45cce2e26432bf0a6b8e114757c897 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Tue, 24 Aug 2010 03:46:42 +0000 Subject: mlx4_en: UDP RSS support Adding capability for RSS for UDP traffic, hashing is done based on IP addresses and UDP port number. The support depends on HW/FW capabilities. Signed-off-by: Yevgeny Petrilin Signed-off-by: David S. Miller --- drivers/net/mlx4/en_main.c | 19 ++++++++++++------- drivers/net/mlx4/en_rx.c | 8 ++++---- drivers/net/mlx4/fw.c | 3 +++ drivers/net/mlx4/fw.h | 1 + drivers/net/mlx4/main.c | 1 + drivers/net/mlx4/mlx4_en.h | 3 +++ include/linux/mlx4/device.h | 1 + 7 files changed, 25 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/net/mlx4/en_main.c b/drivers/net/mlx4/en_main.c index cacac4e966ee..143906417048 100644 --- a/drivers/net/mlx4/en_main.c +++ b/drivers/net/mlx4/en_main.c @@ -63,11 +63,12 @@ static const char mlx4_en_version[] = */ -/* Use a XOR rathern than Toeplitz hash function for RSS */ -MLX4_EN_PARM_INT(rss_xor, 0, "Use XOR hash function for RSS"); - -/* RSS hash type mask - default to */ -MLX4_EN_PARM_INT(rss_mask, 0xf, "RSS hash type bitmask"); +/* Enable RSS TCP traffic */ +MLX4_EN_PARM_INT(tcp_rss, 1, + "Enable RSS for incomming TCP traffic or disabled (0)"); +/* Enable RSS UDP traffic */ +MLX4_EN_PARM_INT(udp_rss, 1, + "Enable RSS for incomming UDP traffic or disabled (0)"); /* Priority pausing */ MLX4_EN_PARM_INT(pfctx, 0, "Priority based Flow Control policy on TX[7:0]." @@ -103,8 +104,12 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev) struct mlx4_en_profile *params = &mdev->profile; int i; - params->rss_xor = (rss_xor != 0); - params->rss_mask = rss_mask & 0x1f; + params->tcp_rss = tcp_rss; + params->udp_rss = udp_rss; + if (params->udp_rss && !mdev->dev->caps.udp_rss) { + mlx4_warn(mdev, "UDP RSS is not supported on this device.\n"); + params->udp_rss = 0; + } for (i = 1; i <= MLX4_MAX_PORTS; i++) { params->prof[i].rx_pause = 1; params->prof[i].rx_ppp = pfcrx; diff --git a/drivers/net/mlx4/en_rx.c b/drivers/net/mlx4/en_rx.c index f421a3d42723..e2126c76d1dc 100644 --- a/drivers/net/mlx4/en_rx.c +++ b/drivers/net/mlx4/en_rx.c @@ -859,8 +859,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) struct mlx4_qp_context context; struct mlx4_en_rss_context *rss_context; void *ptr; - int rss_xor = mdev->profile.rss_xor; - u8 rss_mask = mdev->profile.rss_mask; + u8 rss_mask = 0x3f; int i, qpn; int err = 0; int good_qps = 0; @@ -906,9 +905,10 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) rss_context->base_qpn = cpu_to_be32(ilog2(priv->rx_ring_num) << 24 | (rss_map->base_qpn)); rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn); - rss_context->hash_fn = rss_xor & 0x3; - rss_context->flags = rss_mask << 2; + rss_context->flags = rss_mask; + if (priv->mdev->profile.udp_rss) + rss_context->base_qpn_udp = rss_context->default_qpn; err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context, &rss_map->indir_qp, &rss_map->indir_state); if (err) diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c index 515c6348f32b..b716e1a1b298 100644 --- a/drivers/net/mlx4/fw.c +++ b/drivers/net/mlx4/fw.c @@ -179,6 +179,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAX_GID_OFFSET 0x3b #define QUERY_DEV_CAP_RATE_SUPPORT_OFFSET 0x3c #define QUERY_DEV_CAP_MAX_PKEY_OFFSET 0x3f +#define QUERY_DEV_CAP_UDP_RSS_OFFSET 0x42 #define QUERY_DEV_CAP_ETH_UC_LOOPBACK_OFFSET 0x43 #define QUERY_DEV_CAP_FLAGS_OFFSET 0x44 #define QUERY_DEV_CAP_RSVD_UAR_OFFSET 0x48 @@ -270,6 +271,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->max_msg_sz = 1 << (field & 0x1f); MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET); dev_cap->stat_rate_support = stat_rate; + MLX4_GET(field, outbox, QUERY_DEV_CAP_UDP_RSS_OFFSET); + dev_cap->udp_rss = field & 0x1; MLX4_GET(field, outbox, QUERY_DEV_CAP_ETH_UC_LOOPBACK_OFFSET); dev_cap->loopback_support = field & 0x1; MLX4_GET(dev_cap->flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET); diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h index 443e456c9dab..65cc72eb899d 100644 --- a/drivers/net/mlx4/fw.h +++ b/drivers/net/mlx4/fw.h @@ -78,6 +78,7 @@ struct mlx4_dev_cap { u16 wavelength[MLX4_MAX_PORTS + 1]; u64 trans_code[MLX4_MAX_PORTS + 1]; u16 stat_rate_support; + int udp_rss; int loopback_support; u32 flags; int reserved_uars; diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index f4791fa2472f..569fa3df381f 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -225,6 +225,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.bmme_flags = dev_cap->bmme_flags; dev->caps.reserved_lkey = dev_cap->reserved_lkey; dev->caps.stat_rate_support = dev_cap->stat_rate_support; + dev->caps.udp_rss = dev_cap->udp_rss; dev->caps.loopback_support = dev_cap->loopback_support; dev->caps.max_gso_sz = dev_cap->max_gso_sz; diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h index 5d8f097d7e06..4036a053ee32 100644 --- a/drivers/net/mlx4/mlx4_en.h +++ b/drivers/net/mlx4/mlx4_en.h @@ -318,6 +318,8 @@ struct mlx4_en_port_profile { struct mlx4_en_profile { int rss_xor; + int tcp_rss; + int udp_rss; u8 rss_mask; u32 active_ports; u32 small_pkt_int; @@ -360,6 +362,7 @@ struct mlx4_en_rss_context { u8 hash_fn; u8 flags; __be32 rss_key[10]; + __be32 base_qpn_udp; }; struct mlx4_en_port_state { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 2a36a344fb3d..7338654c02b4 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -233,6 +233,7 @@ struct mlx4_caps { u32 bmme_flags; u32 reserved_lkey; u16 stat_rate_support; + int udp_rss; int loopback_support; u8 port_width_cap[MLX4_MAX_PORTS + 1]; int max_gso_sz; -- cgit v1.2.3 From 2738bd682df546f34654ed3d59dfc9ebe8d04979 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Sat, 21 Aug 2010 16:39:01 -0400 Subject: mac80211: trivial spelling fixes Fix spelling and readability of a few lines of kernel doc: s/issueing/issuing/g s/approriate/appropriate/g s/supported by simply/supported simply by/ s/IEEE80211_HW_BEACON_FILTERING/IEEE80211_HW_BEACON_FILTER/g Signed-off-by: Bob Copeland Signed-off-by: John W. Linville --- include/net/mac80211.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2a1811366076..dcc8c2bf986e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1242,8 +1242,8 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * %IEEE80211_CONF_PS flag enabled means that the powersave mode defined in * IEEE 802.11-2007 section 11.2 is enabled. This is not to be confused * with hardware wakeup and sleep states. Driver is responsible for waking - * up the hardware before issueing commands to the hardware and putting it - * back to sleep at approriate times. + * up the hardware before issuing commands to the hardware and putting it + * back to sleep at appropriate times. * * When PS is enabled, hardware needs to wakeup for beacons and receive the * buffered multicast/broadcast frames after the beacon. Also it must be @@ -1264,7 +1264,7 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * there's data traffic and still saving significantly power in idle * periods. * - * Dynamic powersave is supported by simply mac80211 enabling and disabling + * Dynamic powersave is simply supported by mac80211 enabling and disabling * PS based on traffic. Driver needs to only set %IEEE80211_HW_SUPPORTS_PS * flag and mac80211 will handle everything automatically. Additionally, * hardware having support for the dynamic PS feature may set the @@ -2458,7 +2458,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, * * @vif: &struct ieee80211_vif pointer from the add_interface callback. * - * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTERING and + * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTER and * %IEEE80211_CONF_PS is set, the driver needs to inform whenever the * hardware is not receiving beacons with this function. */ @@ -2469,7 +2469,7 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif); * * @vif: &struct ieee80211_vif pointer from the add_interface callback. * - * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTERING, and + * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTER, and * %IEEE80211_CONF_PS and %IEEE80211_HW_CONNECTION_MONITOR are set, the driver * needs to inform if the connection to the AP has been lost. * -- cgit v1.2.3 From 4c5f7d7a1e6cf20ad515dad8a63c0813fac5bcea Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Sun, 22 Aug 2010 22:46:28 +0300 Subject: wl12xx: change contact person for the include file Luciano should be the contact person for the include/linux/spi/wl12xx.h file. Signed-off-by: Kalle Valo Acked-by: Luciano Coelho Signed-off-by: John W. Linville --- include/linux/spi/wl12xx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/spi/wl12xx.h b/include/linux/spi/wl12xx.h index a223ecbc71ef..a20bccf0b5c2 100644 --- a/include/linux/spi/wl12xx.h +++ b/include/linux/spi/wl12xx.h @@ -3,7 +3,7 @@ * * Copyright (C) 2009 Nokia Corporation * - * Contact: Kalle Valo + * Contact: Luciano Coelho * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License -- cgit v1.2.3 From ad01b7d480a4a135f974afd5c617c417e0b0542f Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 23 Aug 2010 20:40:42 +0000 Subject: stmmac: make ioaddr 'void __iomem *' rather than unsigned long This avoids unnecessary casting and adds the ioaddr in the private structure. This patch also removes many warning when compile the driver. Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/stmmac/common.h | 50 +++++++-------- drivers/net/stmmac/dwmac1000_core.c | 18 +++--- drivers/net/stmmac/dwmac1000_dma.c | 8 +-- drivers/net/stmmac/dwmac100_core.c | 20 +++--- drivers/net/stmmac/dwmac100_dma.c | 8 +-- drivers/net/stmmac/dwmac_dma.h | 16 ++--- drivers/net/stmmac/dwmac_lib.c | 22 +++---- drivers/net/stmmac/enh_desc.c | 2 +- drivers/net/stmmac/norm_desc.c | 2 +- drivers/net/stmmac/stmmac.h | 3 +- drivers/net/stmmac/stmmac_ethtool.c | 21 +++--- drivers/net/stmmac/stmmac_main.c | 124 +++++++++++++++++------------------- drivers/net/stmmac/stmmac_mdio.c | 21 +++--- include/linux/stmmac.h | 2 +- 14 files changed, 153 insertions(+), 164 deletions(-) (limited to 'include') diff --git a/drivers/net/stmmac/common.h b/drivers/net/stmmac/common.h index 66b9da0260fe..e8cbcb5c206e 100644 --- a/drivers/net/stmmac/common.h +++ b/drivers/net/stmmac/common.h @@ -167,7 +167,7 @@ struct stmmac_desc_ops { int (*get_tx_ls) (struct dma_desc *p); /* Return the transmit status looking at the TDES1 */ int (*tx_status) (void *data, struct stmmac_extra_stats *x, - struct dma_desc *p, unsigned long ioaddr); + struct dma_desc *p, void __iomem *ioaddr); /* Get the buffer size from the descriptor */ int (*get_tx_len) (struct dma_desc *p); /* Handle extra events on specific interrupts hw dependent */ @@ -182,44 +182,44 @@ struct stmmac_desc_ops { struct stmmac_dma_ops { /* DMA core initialization */ - int (*init) (unsigned long ioaddr, int pbl, u32 dma_tx, u32 dma_rx); + int (*init) (void __iomem *ioaddr, int pbl, u32 dma_tx, u32 dma_rx); /* Dump DMA registers */ - void (*dump_regs) (unsigned long ioaddr); + void (*dump_regs) (void __iomem *ioaddr); /* Set tx/rx threshold in the csr6 register * An invalid value enables the store-and-forward mode */ - void (*dma_mode) (unsigned long ioaddr, int txmode, int rxmode); + void (*dma_mode) (void __iomem *ioaddr, int txmode, int rxmode); /* To track extra statistic (if supported) */ void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x, - unsigned long ioaddr); - void (*enable_dma_transmission) (unsigned long ioaddr); - void (*enable_dma_irq) (unsigned long ioaddr); - void (*disable_dma_irq) (unsigned long ioaddr); - void (*start_tx) (unsigned long ioaddr); - void (*stop_tx) (unsigned long ioaddr); - void (*start_rx) (unsigned long ioaddr); - void (*stop_rx) (unsigned long ioaddr); - int (*dma_interrupt) (unsigned long ioaddr, + void __iomem *ioaddr); + void (*enable_dma_transmission) (void __iomem *ioaddr); + void (*enable_dma_irq) (void __iomem *ioaddr); + void (*disable_dma_irq) (void __iomem *ioaddr); + void (*start_tx) (void __iomem *ioaddr); + void (*stop_tx) (void __iomem *ioaddr); + void (*start_rx) (void __iomem *ioaddr); + void (*stop_rx) (void __iomem *ioaddr); + int (*dma_interrupt) (void __iomem *ioaddr, struct stmmac_extra_stats *x); }; struct stmmac_ops { /* MAC core initialization */ - void (*core_init) (unsigned long ioaddr) ____cacheline_aligned; + void (*core_init) (void __iomem *ioaddr) ____cacheline_aligned; /* Dump MAC registers */ - void (*dump_regs) (unsigned long ioaddr); + void (*dump_regs) (void __iomem *ioaddr); /* Handle extra events on specific interrupts hw dependent */ - void (*host_irq_status) (unsigned long ioaddr); + void (*host_irq_status) (void __iomem *ioaddr); /* Multicast filter setting */ void (*set_filter) (struct net_device *dev); /* Flow control setting */ - void (*flow_ctrl) (unsigned long ioaddr, unsigned int duplex, + void (*flow_ctrl) (void __iomem *ioaddr, unsigned int duplex, unsigned int fc, unsigned int pause_time); /* Set power management mode (e.g. magic frame) */ - void (*pmt) (unsigned long ioaddr, unsigned long mode); + void (*pmt) (void __iomem *ioaddr, unsigned long mode); /* Set/Get Unicast MAC addresses */ - void (*set_umac_addr) (unsigned long ioaddr, unsigned char *addr, + void (*set_umac_addr) (void __iomem *ioaddr, unsigned char *addr, unsigned int reg_n); - void (*get_umac_addr) (unsigned long ioaddr, unsigned char *addr, + void (*get_umac_addr) (void __iomem *ioaddr, unsigned char *addr, unsigned int reg_n); }; @@ -243,11 +243,11 @@ struct mac_device_info { struct mac_link link; }; -struct mac_device_info *dwmac1000_setup(unsigned long addr); -struct mac_device_info *dwmac100_setup(unsigned long addr); +struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr); +struct mac_device_info *dwmac100_setup(void __iomem *ioaddr); -extern void stmmac_set_mac_addr(unsigned long ioaddr, u8 addr[6], +extern void stmmac_set_mac_addr(void __iomem *ioaddr, u8 addr[6], unsigned int high, unsigned int low); -extern void stmmac_get_mac_addr(unsigned long ioaddr, unsigned char *addr, +extern void stmmac_get_mac_addr(void __iomem *ioaddr, unsigned char *addr, unsigned int high, unsigned int low); -extern void dwmac_dma_flush_tx_fifo(unsigned long ioaddr); +extern void dwmac_dma_flush_tx_fifo(void __iomem *ioaddr); diff --git a/drivers/net/stmmac/dwmac1000_core.c b/drivers/net/stmmac/dwmac1000_core.c index 2b2f5c8caf1c..8bbfc0f48dea 100644 --- a/drivers/net/stmmac/dwmac1000_core.c +++ b/drivers/net/stmmac/dwmac1000_core.c @@ -30,7 +30,7 @@ #include #include "dwmac1000.h" -static void dwmac1000_core_init(unsigned long ioaddr) +static void dwmac1000_core_init(void __iomem *ioaddr) { u32 value = readl(ioaddr + GMAC_CONTROL); value |= GMAC_CORE_INIT; @@ -50,7 +50,7 @@ static void dwmac1000_core_init(unsigned long ioaddr) #endif } -static void dwmac1000_dump_regs(unsigned long ioaddr) +static void dwmac1000_dump_regs(void __iomem *ioaddr) { int i; pr_info("\tDWMAC1000 regs (base addr = 0x%8x)\n", (unsigned int)ioaddr); @@ -62,14 +62,14 @@ static void dwmac1000_dump_regs(unsigned long ioaddr) } } -static void dwmac1000_set_umac_addr(unsigned long ioaddr, unsigned char *addr, +static void dwmac1000_set_umac_addr(void __iomem *ioaddr, unsigned char *addr, unsigned int reg_n) { stmmac_set_mac_addr(ioaddr, addr, GMAC_ADDR_HIGH(reg_n), GMAC_ADDR_LOW(reg_n)); } -static void dwmac1000_get_umac_addr(unsigned long ioaddr, unsigned char *addr, +static void dwmac1000_get_umac_addr(void __iomem *ioaddr, unsigned char *addr, unsigned int reg_n) { stmmac_get_mac_addr(ioaddr, addr, GMAC_ADDR_HIGH(reg_n), @@ -78,7 +78,7 @@ static void dwmac1000_get_umac_addr(unsigned long ioaddr, unsigned char *addr, static void dwmac1000_set_filter(struct net_device *dev) { - unsigned long ioaddr = dev->base_addr; + void __iomem *ioaddr = (void __iomem *) dev->base_addr; unsigned int value = 0; CHIP_DBG(KERN_INFO "%s: # mcasts %d, # unicast %d\n", @@ -139,7 +139,7 @@ static void dwmac1000_set_filter(struct net_device *dev) readl(ioaddr + GMAC_HASH_HIGH), readl(ioaddr + GMAC_HASH_LOW)); } -static void dwmac1000_flow_ctrl(unsigned long ioaddr, unsigned int duplex, +static void dwmac1000_flow_ctrl(void __iomem *ioaddr, unsigned int duplex, unsigned int fc, unsigned int pause_time) { unsigned int flow = 0; @@ -162,7 +162,7 @@ static void dwmac1000_flow_ctrl(unsigned long ioaddr, unsigned int duplex, writel(flow, ioaddr + GMAC_FLOW_CTRL); } -static void dwmac1000_pmt(unsigned long ioaddr, unsigned long mode) +static void dwmac1000_pmt(void __iomem *ioaddr, unsigned long mode) { unsigned int pmt = 0; @@ -178,7 +178,7 @@ static void dwmac1000_pmt(unsigned long ioaddr, unsigned long mode) } -static void dwmac1000_irq_status(unsigned long ioaddr) +static void dwmac1000_irq_status(void __iomem *ioaddr) { u32 intr_status = readl(ioaddr + GMAC_INT_STATUS); @@ -211,7 +211,7 @@ struct stmmac_ops dwmac1000_ops = { .get_umac_addr = dwmac1000_get_umac_addr, }; -struct mac_device_info *dwmac1000_setup(unsigned long ioaddr) +struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr) { struct mac_device_info *mac; u32 uid = readl(ioaddr + GMAC_VERSION); diff --git a/drivers/net/stmmac/dwmac1000_dma.c b/drivers/net/stmmac/dwmac1000_dma.c index 415805057cb0..2ef5a56370e9 100644 --- a/drivers/net/stmmac/dwmac1000_dma.c +++ b/drivers/net/stmmac/dwmac1000_dma.c @@ -29,7 +29,7 @@ #include "dwmac1000.h" #include "dwmac_dma.h" -static int dwmac1000_dma_init(unsigned long ioaddr, int pbl, u32 dma_tx, +static int dwmac1000_dma_init(void __iomem *ioaddr, int pbl, u32 dma_tx, u32 dma_rx) { u32 value = readl(ioaddr + DMA_BUS_MODE); @@ -58,7 +58,7 @@ static int dwmac1000_dma_init(unsigned long ioaddr, int pbl, u32 dma_tx, return 0; } -static void dwmac1000_dma_operation_mode(unsigned long ioaddr, int txmode, +static void dwmac1000_dma_operation_mode(void __iomem *ioaddr, int txmode, int rxmode) { u32 csr6 = readl(ioaddr + DMA_CONTROL); @@ -111,12 +111,12 @@ static void dwmac1000_dma_operation_mode(unsigned long ioaddr, int txmode, /* Not yet implemented --- no RMON module */ static void dwmac1000_dma_diagnostic_fr(void *data, - struct stmmac_extra_stats *x, unsigned long ioaddr) + struct stmmac_extra_stats *x, void __iomem *ioaddr) { return; } -static void dwmac1000_dump_dma_regs(unsigned long ioaddr) +static void dwmac1000_dump_dma_regs(void __iomem *ioaddr) { int i; pr_info(" DMA registers\n"); diff --git a/drivers/net/stmmac/dwmac100_core.c b/drivers/net/stmmac/dwmac100_core.c index 2fb165fa2ba0..135a8082816e 100644 --- a/drivers/net/stmmac/dwmac100_core.c +++ b/drivers/net/stmmac/dwmac100_core.c @@ -31,7 +31,7 @@ #include #include "dwmac100.h" -static void dwmac100_core_init(unsigned long ioaddr) +static void dwmac100_core_init(void __iomem *ioaddr) { u32 value = readl(ioaddr + MAC_CONTROL); @@ -42,12 +42,12 @@ static void dwmac100_core_init(unsigned long ioaddr) #endif } -static void dwmac100_dump_mac_regs(unsigned long ioaddr) +static void dwmac100_dump_mac_regs(void __iomem *ioaddr) { pr_info("\t----------------------------------------------\n" "\t DWMAC 100 CSR (base addr = 0x%8x)\n" "\t----------------------------------------------\n", - (unsigned int)ioaddr); + (unsigned int) ioaddr); pr_info("\tcontrol reg (offset 0x%x): 0x%08x\n", MAC_CONTROL, readl(ioaddr + MAC_CONTROL)); pr_info("\taddr HI (offset 0x%x): 0x%08x\n ", MAC_ADDR_HIGH, @@ -77,18 +77,18 @@ static void dwmac100_dump_mac_regs(unsigned long ioaddr) MMC_LOW_INTR_MASK, readl(ioaddr + MMC_LOW_INTR_MASK)); } -static void dwmac100_irq_status(unsigned long ioaddr) +static void dwmac100_irq_status(void __iomem *ioaddr) { return; } -static void dwmac100_set_umac_addr(unsigned long ioaddr, unsigned char *addr, +static void dwmac100_set_umac_addr(void __iomem *ioaddr, unsigned char *addr, unsigned int reg_n) { stmmac_set_mac_addr(ioaddr, addr, MAC_ADDR_HIGH, MAC_ADDR_LOW); } -static void dwmac100_get_umac_addr(unsigned long ioaddr, unsigned char *addr, +static void dwmac100_get_umac_addr(void __iomem *ioaddr, unsigned char *addr, unsigned int reg_n) { stmmac_get_mac_addr(ioaddr, addr, MAC_ADDR_HIGH, MAC_ADDR_LOW); @@ -96,7 +96,7 @@ static void dwmac100_get_umac_addr(unsigned long ioaddr, unsigned char *addr, static void dwmac100_set_filter(struct net_device *dev) { - unsigned long ioaddr = dev->base_addr; + void __iomem *ioaddr = (void __iomem *) dev->base_addr; u32 value = readl(ioaddr + MAC_CONTROL); if (dev->flags & IFF_PROMISC) { @@ -145,7 +145,7 @@ static void dwmac100_set_filter(struct net_device *dev) readl(ioaddr + MAC_HASH_HIGH), readl(ioaddr + MAC_HASH_LOW)); } -static void dwmac100_flow_ctrl(unsigned long ioaddr, unsigned int duplex, +static void dwmac100_flow_ctrl(void __iomem *ioaddr, unsigned int duplex, unsigned int fc, unsigned int pause_time) { unsigned int flow = MAC_FLOW_CTRL_ENABLE; @@ -158,7 +158,7 @@ static void dwmac100_flow_ctrl(unsigned long ioaddr, unsigned int duplex, /* No PMT module supported for this Ethernet Controller. * Tested on ST platforms only. */ -static void dwmac100_pmt(unsigned long ioaddr, unsigned long mode) +static void dwmac100_pmt(void __iomem *ioaddr, unsigned long mode) { return; } @@ -174,7 +174,7 @@ struct stmmac_ops dwmac100_ops = { .get_umac_addr = dwmac100_get_umac_addr, }; -struct mac_device_info *dwmac100_setup(unsigned long ioaddr) +struct mac_device_info *dwmac100_setup(void __iomem *ioaddr) { struct mac_device_info *mac; diff --git a/drivers/net/stmmac/dwmac100_dma.c b/drivers/net/stmmac/dwmac100_dma.c index 2fece7b72727..c7279d2b946b 100644 --- a/drivers/net/stmmac/dwmac100_dma.c +++ b/drivers/net/stmmac/dwmac100_dma.c @@ -31,7 +31,7 @@ #include "dwmac100.h" #include "dwmac_dma.h" -static int dwmac100_dma_init(unsigned long ioaddr, int pbl, u32 dma_tx, +static int dwmac100_dma_init(void __iomem *ioaddr, int pbl, u32 dma_tx, u32 dma_rx) { u32 value = readl(ioaddr + DMA_BUS_MODE); @@ -58,7 +58,7 @@ static int dwmac100_dma_init(unsigned long ioaddr, int pbl, u32 dma_tx, /* Store and Forward capability is not used at all.. * The transmit threshold can be programmed by * setting the TTC bits in the DMA control register.*/ -static void dwmac100_dma_operation_mode(unsigned long ioaddr, int txmode, +static void dwmac100_dma_operation_mode(void __iomem *ioaddr, int txmode, int rxmode) { u32 csr6 = readl(ioaddr + DMA_CONTROL); @@ -73,7 +73,7 @@ static void dwmac100_dma_operation_mode(unsigned long ioaddr, int txmode, writel(csr6, ioaddr + DMA_CONTROL); } -static void dwmac100_dump_dma_regs(unsigned long ioaddr) +static void dwmac100_dump_dma_regs(void __iomem *ioaddr) { int i; @@ -91,7 +91,7 @@ static void dwmac100_dump_dma_regs(unsigned long ioaddr) /* DMA controller has two counters to track the number of * the receive missed frames. */ static void dwmac100_dma_diagnostic_fr(void *data, struct stmmac_extra_stats *x, - unsigned long ioaddr) + void __iomem *ioaddr) { struct net_device_stats *stats = (struct net_device_stats *)data; u32 csr8 = readl(ioaddr + DMA_MISSED_FRAME_CTR); diff --git a/drivers/net/stmmac/dwmac_dma.h b/drivers/net/stmmac/dwmac_dma.h index 7b815a1b7b8c..da3f5ccf83d3 100644 --- a/drivers/net/stmmac/dwmac_dma.h +++ b/drivers/net/stmmac/dwmac_dma.h @@ -97,12 +97,12 @@ #define DMA_STATUS_TI 0x00000001 /* Transmit Interrupt */ #define DMA_CONTROL_FTF 0x00100000 /* Flush transmit FIFO */ -extern void dwmac_enable_dma_transmission(unsigned long ioaddr); -extern void dwmac_enable_dma_irq(unsigned long ioaddr); -extern void dwmac_disable_dma_irq(unsigned long ioaddr); -extern void dwmac_dma_start_tx(unsigned long ioaddr); -extern void dwmac_dma_stop_tx(unsigned long ioaddr); -extern void dwmac_dma_start_rx(unsigned long ioaddr); -extern void dwmac_dma_stop_rx(unsigned long ioaddr); -extern int dwmac_dma_interrupt(unsigned long ioaddr, +extern void dwmac_enable_dma_transmission(void __iomem *ioaddr); +extern void dwmac_enable_dma_irq(void __iomem *ioaddr); +extern void dwmac_disable_dma_irq(void __iomem *ioaddr); +extern void dwmac_dma_start_tx(void __iomem *ioaddr); +extern void dwmac_dma_stop_tx(void __iomem *ioaddr); +extern void dwmac_dma_start_rx(void __iomem *ioaddr); +extern void dwmac_dma_stop_rx(void __iomem *ioaddr); +extern int dwmac_dma_interrupt(void __iomem *ioaddr, struct stmmac_extra_stats *x); diff --git a/drivers/net/stmmac/dwmac_lib.c b/drivers/net/stmmac/dwmac_lib.c index a85415216ef4..d65fab1ba790 100644 --- a/drivers/net/stmmac/dwmac_lib.c +++ b/drivers/net/stmmac/dwmac_lib.c @@ -32,43 +32,43 @@ #endif /* CSR1 enables the transmit DMA to check for new descriptor */ -void dwmac_enable_dma_transmission(unsigned long ioaddr) +void dwmac_enable_dma_transmission(void __iomem *ioaddr) { writel(1, ioaddr + DMA_XMT_POLL_DEMAND); } -void dwmac_enable_dma_irq(unsigned long ioaddr) +void dwmac_enable_dma_irq(void __iomem *ioaddr) { writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA); } -void dwmac_disable_dma_irq(unsigned long ioaddr) +void dwmac_disable_dma_irq(void __iomem *ioaddr) { writel(0, ioaddr + DMA_INTR_ENA); } -void dwmac_dma_start_tx(unsigned long ioaddr) +void dwmac_dma_start_tx(void __iomem *ioaddr) { u32 value = readl(ioaddr + DMA_CONTROL); value |= DMA_CONTROL_ST; writel(value, ioaddr + DMA_CONTROL); } -void dwmac_dma_stop_tx(unsigned long ioaddr) +void dwmac_dma_stop_tx(void __iomem *ioaddr) { u32 value = readl(ioaddr + DMA_CONTROL); value &= ~DMA_CONTROL_ST; writel(value, ioaddr + DMA_CONTROL); } -void dwmac_dma_start_rx(unsigned long ioaddr) +void dwmac_dma_start_rx(void __iomem *ioaddr) { u32 value = readl(ioaddr + DMA_CONTROL); value |= DMA_CONTROL_SR; writel(value, ioaddr + DMA_CONTROL); } -void dwmac_dma_stop_rx(unsigned long ioaddr) +void dwmac_dma_stop_rx(void __iomem *ioaddr) { u32 value = readl(ioaddr + DMA_CONTROL); value &= ~DMA_CONTROL_SR; @@ -145,7 +145,7 @@ static void show_rx_process_state(unsigned int status) } #endif -int dwmac_dma_interrupt(unsigned long ioaddr, +int dwmac_dma_interrupt(void __iomem *ioaddr, struct stmmac_extra_stats *x) { int ret = 0; @@ -219,7 +219,7 @@ int dwmac_dma_interrupt(unsigned long ioaddr, return ret; } -void dwmac_dma_flush_tx_fifo(unsigned long ioaddr) +void dwmac_dma_flush_tx_fifo(void __iomem *ioaddr) { u32 csr6 = readl(ioaddr + DMA_CONTROL); writel((csr6 | DMA_CONTROL_FTF), ioaddr + DMA_CONTROL); @@ -227,7 +227,7 @@ void dwmac_dma_flush_tx_fifo(unsigned long ioaddr) do {} while ((readl(ioaddr + DMA_CONTROL) & DMA_CONTROL_FTF)); } -void stmmac_set_mac_addr(unsigned long ioaddr, u8 addr[6], +void stmmac_set_mac_addr(void __iomem *ioaddr, u8 addr[6], unsigned int high, unsigned int low) { unsigned long data; @@ -238,7 +238,7 @@ void stmmac_set_mac_addr(unsigned long ioaddr, u8 addr[6], writel(data, ioaddr + low); } -void stmmac_get_mac_addr(unsigned long ioaddr, unsigned char *addr, +void stmmac_get_mac_addr(void __iomem *ioaddr, unsigned char *addr, unsigned int high, unsigned int low) { unsigned int hi_addr, lo_addr; diff --git a/drivers/net/stmmac/enh_desc.c b/drivers/net/stmmac/enh_desc.c index f612f986a7e1..77ff88c3958b 100644 --- a/drivers/net/stmmac/enh_desc.c +++ b/drivers/net/stmmac/enh_desc.c @@ -25,7 +25,7 @@ #include "common.h" static int enh_desc_get_tx_status(void *data, struct stmmac_extra_stats *x, - struct dma_desc *p, unsigned long ioaddr) + struct dma_desc *p, void __iomem *ioaddr) { int ret = 0; struct net_device_stats *stats = (struct net_device_stats *)data; diff --git a/drivers/net/stmmac/norm_desc.c b/drivers/net/stmmac/norm_desc.c index 31ad53643792..51f4440ab98b 100644 --- a/drivers/net/stmmac/norm_desc.c +++ b/drivers/net/stmmac/norm_desc.c @@ -25,7 +25,7 @@ #include "common.h" static int ndesc_get_tx_status(void *data, struct stmmac_extra_stats *x, - struct dma_desc *p, unsigned long ioaddr) + struct dma_desc *p, void __iomem *ioaddr) { int ret = 0; struct net_device_stats *stats = (struct net_device_stats *)data; diff --git a/drivers/net/stmmac/stmmac.h b/drivers/net/stmmac/stmmac.h index ebebc644b1b8..cca53dbac361 100644 --- a/drivers/net/stmmac/stmmac.h +++ b/drivers/net/stmmac/stmmac.h @@ -54,6 +54,7 @@ struct stmmac_priv { unsigned int dma_buf_sz; struct device *device; struct mac_device_info *hw; + void __iomem *ioaddr; struct stmmac_extra_stats xstats; struct napi_struct napi; @@ -65,7 +66,7 @@ struct stmmac_priv { int phy_mask; int (*phy_reset) (void *priv); void (*fix_mac_speed) (void *priv, unsigned int speed); - void (*bus_setup)(unsigned long ioaddr); + void (*bus_setup)(void __iomem *ioaddr); void *bsp_priv; int phy_irq; diff --git a/drivers/net/stmmac/stmmac_ethtool.c b/drivers/net/stmmac/stmmac_ethtool.c index f080509923f0..63b68e61afce 100644 --- a/drivers/net/stmmac/stmmac_ethtool.c +++ b/drivers/net/stmmac/stmmac_ethtool.c @@ -177,21 +177,21 @@ void stmmac_ethtool_gregs(struct net_device *dev, if (!priv->is_gmac) { /* MAC registers */ for (i = 0; i < 12; i++) - reg_space[i] = readl(dev->base_addr + (i * 4)); + reg_space[i] = readl(priv->ioaddr + (i * 4)); /* DMA registers */ for (i = 0; i < 9; i++) reg_space[i + 12] = - readl(dev->base_addr + (DMA_BUS_MODE + (i * 4))); - reg_space[22] = readl(dev->base_addr + DMA_CUR_TX_BUF_ADDR); - reg_space[23] = readl(dev->base_addr + DMA_CUR_RX_BUF_ADDR); + readl(priv->ioaddr + (DMA_BUS_MODE + (i * 4))); + reg_space[22] = readl(priv->ioaddr + DMA_CUR_TX_BUF_ADDR); + reg_space[23] = readl(priv->ioaddr + DMA_CUR_RX_BUF_ADDR); } else { /* MAC registers */ for (i = 0; i < 55; i++) - reg_space[i] = readl(dev->base_addr + (i * 4)); + reg_space[i] = readl(priv->ioaddr + (i * 4)); /* DMA registers */ for (i = 0; i < 22; i++) reg_space[i + 55] = - readl(dev->base_addr + (DMA_BUS_MODE + (i * 4))); + readl(priv->ioaddr + (DMA_BUS_MODE + (i * 4))); } } @@ -263,11 +263,9 @@ stmmac_set_pauseparam(struct net_device *netdev, cmd.phy_address = phy->addr; ret = phy_ethtool_sset(phy, &cmd); } - } else { - unsigned long ioaddr = netdev->base_addr; - priv->hw->mac->flow_ctrl(ioaddr, phy->duplex, + } else + priv->hw->mac->flow_ctrl(priv->ioaddr, phy->duplex, priv->flow_ctrl, priv->pause); - } spin_unlock(&priv->lock); return ret; } @@ -276,12 +274,11 @@ static void stmmac_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *dummy, u64 *data) { struct stmmac_priv *priv = netdev_priv(dev); - unsigned long ioaddr = dev->base_addr; int i; /* Update HW stats if supported */ priv->hw->dma->dma_diagnostic_fr(&dev->stats, (void *) &priv->xstats, - ioaddr); + priv->ioaddr); for (i = 0; i < STMMAC_STATS_LEN; i++) { char *p = (char *)priv + stmmac_gstrings_stats[i].stat_offset; diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c index 86b6c69c068c..c59c1061252a 100644 --- a/drivers/net/stmmac/stmmac_main.c +++ b/drivers/net/stmmac/stmmac_main.c @@ -202,7 +202,6 @@ static void stmmac_adjust_link(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); struct phy_device *phydev = priv->phydev; - unsigned long ioaddr = dev->base_addr; unsigned long flags; int new_state = 0; unsigned int fc = priv->flow_ctrl, pause_time = priv->pause; @@ -215,7 +214,7 @@ static void stmmac_adjust_link(struct net_device *dev) spin_lock_irqsave(&priv->lock, flags); if (phydev->link) { - u32 ctrl = readl(ioaddr + MAC_CTRL_REG); + u32 ctrl = readl(priv->ioaddr + MAC_CTRL_REG); /* Now we make sure that we can be in full duplex mode. * If not, we operate in half-duplex mode. */ @@ -229,7 +228,7 @@ static void stmmac_adjust_link(struct net_device *dev) } /* Flow Control operation */ if (phydev->pause) - priv->hw->mac->flow_ctrl(ioaddr, phydev->duplex, + priv->hw->mac->flow_ctrl(priv->ioaddr, phydev->duplex, fc, pause_time); if (phydev->speed != priv->speed) { @@ -268,7 +267,7 @@ static void stmmac_adjust_link(struct net_device *dev) priv->speed = phydev->speed; } - writel(ctrl, ioaddr + MAC_CTRL_REG); + writel(ctrl, priv->ioaddr + MAC_CTRL_REG); if (!priv->oldlink) { new_state = 1; @@ -345,7 +344,7 @@ static int stmmac_init_phy(struct net_device *dev) return 0; } -static inline void stmmac_mac_enable_rx(unsigned long ioaddr) +static inline void stmmac_mac_enable_rx(void __iomem *ioaddr) { u32 value = readl(ioaddr + MAC_CTRL_REG); value |= MAC_RNABLE_RX; @@ -353,7 +352,7 @@ static inline void stmmac_mac_enable_rx(unsigned long ioaddr) writel(value, ioaddr + MAC_CTRL_REG); } -static inline void stmmac_mac_enable_tx(unsigned long ioaddr) +static inline void stmmac_mac_enable_tx(void __iomem *ioaddr) { u32 value = readl(ioaddr + MAC_CTRL_REG); value |= MAC_ENABLE_TX; @@ -361,14 +360,14 @@ static inline void stmmac_mac_enable_tx(unsigned long ioaddr) writel(value, ioaddr + MAC_CTRL_REG); } -static inline void stmmac_mac_disable_rx(unsigned long ioaddr) +static inline void stmmac_mac_disable_rx(void __iomem *ioaddr) { u32 value = readl(ioaddr + MAC_CTRL_REG); value &= ~MAC_RNABLE_RX; writel(value, ioaddr + MAC_CTRL_REG); } -static inline void stmmac_mac_disable_tx(unsigned long ioaddr) +static inline void stmmac_mac_disable_tx(void __iomem *ioaddr) { u32 value = readl(ioaddr + MAC_CTRL_REG); value &= ~MAC_ENABLE_TX; @@ -577,17 +576,17 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) { if (!priv->is_gmac) { /* MAC 10/100 */ - priv->hw->dma->dma_mode(priv->dev->base_addr, tc, 0); + priv->hw->dma->dma_mode(priv->ioaddr, tc, 0); priv->tx_coe = NO_HW_CSUM; } else { if ((priv->dev->mtu <= ETH_DATA_LEN) && (tx_coe)) { - priv->hw->dma->dma_mode(priv->dev->base_addr, + priv->hw->dma->dma_mode(priv->ioaddr, SF_DMA_MODE, SF_DMA_MODE); tc = SF_DMA_MODE; priv->tx_coe = HW_CSUM; } else { /* Checksum computation is performed in software. */ - priv->hw->dma->dma_mode(priv->dev->base_addr, tc, + priv->hw->dma->dma_mode(priv->ioaddr, tc, SF_DMA_MODE); priv->tx_coe = NO_HW_CSUM; } @@ -603,7 +602,6 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) static void stmmac_tx(struct stmmac_priv *priv) { unsigned int txsize = priv->dma_tx_size; - unsigned long ioaddr = priv->dev->base_addr; while (priv->dirty_tx != priv->cur_tx) { int last; @@ -621,7 +619,7 @@ static void stmmac_tx(struct stmmac_priv *priv) int tx_error = priv->hw->desc->tx_status(&priv->dev->stats, &priv->xstats, p, - ioaddr); + priv->ioaddr); if (likely(tx_error == 0)) { priv->dev->stats.tx_packets++; priv->xstats.tx_pkt_n++; @@ -677,7 +675,7 @@ static inline void stmmac_enable_irq(struct stmmac_priv *priv) priv->tm->timer_start(tmrate); else #endif - priv->hw->dma->enable_dma_irq(priv->dev->base_addr); + priv->hw->dma->enable_dma_irq(priv->ioaddr); } static inline void stmmac_disable_irq(struct stmmac_priv *priv) @@ -687,7 +685,7 @@ static inline void stmmac_disable_irq(struct stmmac_priv *priv) priv->tm->timer_stop(); else #endif - priv->hw->dma->disable_dma_irq(priv->dev->base_addr); + priv->hw->dma->disable_dma_irq(priv->ioaddr); } static int stmmac_has_work(struct stmmac_priv *priv) @@ -742,14 +740,15 @@ static void stmmac_no_timer_stopped(void) */ static void stmmac_tx_err(struct stmmac_priv *priv) { + netif_stop_queue(priv->dev); - priv->hw->dma->stop_tx(priv->dev->base_addr); + priv->hw->dma->stop_tx(priv->ioaddr); dma_free_tx_skbufs(priv); priv->hw->desc->init_tx_desc(priv->dma_tx, priv->dma_tx_size); priv->dirty_tx = 0; priv->cur_tx = 0; - priv->hw->dma->start_tx(priv->dev->base_addr); + priv->hw->dma->start_tx(priv->ioaddr); priv->dev->stats.tx_errors++; netif_wake_queue(priv->dev); @@ -758,11 +757,9 @@ static void stmmac_tx_err(struct stmmac_priv *priv) static void stmmac_dma_interrupt(struct stmmac_priv *priv) { - unsigned long ioaddr = priv->dev->base_addr; int status; - status = priv->hw->dma->dma_interrupt(priv->dev->base_addr, - &priv->xstats); + status = priv->hw->dma->dma_interrupt(priv->ioaddr, &priv->xstats); if (likely(status == handle_tx_rx)) _stmmac_schedule(priv); @@ -770,7 +767,7 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv) /* Try to bump up the dma threshold on this failure */ if (unlikely(tc != SF_DMA_MODE) && (tc <= 256)) { tc += 64; - priv->hw->dma->dma_mode(ioaddr, tc, SF_DMA_MODE); + priv->hw->dma->dma_mode(priv->ioaddr, tc, SF_DMA_MODE); priv->xstats.threshold = tc; } stmmac_tx_err(priv); @@ -790,7 +787,6 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv) static int stmmac_open(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); - unsigned long ioaddr = dev->base_addr; int ret; /* Check that the MAC address is valid. If its not, refuse @@ -846,7 +842,8 @@ static int stmmac_open(struct net_device *dev) init_dma_desc_rings(dev); /* DMA initialization and SW reset */ - if (unlikely(priv->hw->dma->init(ioaddr, priv->pbl, priv->dma_tx_phy, + if (unlikely(priv->hw->dma->init(priv->ioaddr, priv->pbl, + priv->dma_tx_phy, priv->dma_rx_phy) < 0)) { pr_err("%s: DMA initialization failed\n", __func__); @@ -854,22 +851,22 @@ static int stmmac_open(struct net_device *dev) } /* Copy the MAC addr into the HW */ - priv->hw->mac->set_umac_addr(ioaddr, dev->dev_addr, 0); + priv->hw->mac->set_umac_addr(priv->ioaddr, dev->dev_addr, 0); /* If required, perform hw setup of the bus. */ if (priv->bus_setup) - priv->bus_setup(ioaddr); + priv->bus_setup(priv->ioaddr); /* Initialize the MAC Core */ - priv->hw->mac->core_init(ioaddr); + priv->hw->mac->core_init(priv->ioaddr); priv->shutdown = 0; /* Initialise the MMC (if present) to disable all interrupts. */ - writel(0xffffffff, ioaddr + MMC_HIGH_INTR_MASK); - writel(0xffffffff, ioaddr + MMC_LOW_INTR_MASK); + writel(0xffffffff, priv->ioaddr + MMC_HIGH_INTR_MASK); + writel(0xffffffff, priv->ioaddr + MMC_LOW_INTR_MASK); /* Enable the MAC Rx/Tx */ - stmmac_mac_enable_rx(ioaddr); - stmmac_mac_enable_tx(ioaddr); + stmmac_mac_enable_rx(priv->ioaddr); + stmmac_mac_enable_tx(priv->ioaddr); /* Set the HW DMA mode and the COE */ stmmac_dma_operation_mode(priv); @@ -880,16 +877,16 @@ static int stmmac_open(struct net_device *dev) /* Start the ball rolling... */ DBG(probe, DEBUG, "%s: DMA RX/TX processes started...\n", dev->name); - priv->hw->dma->start_tx(ioaddr); - priv->hw->dma->start_rx(ioaddr); + priv->hw->dma->start_tx(priv->ioaddr); + priv->hw->dma->start_rx(priv->ioaddr); #ifdef CONFIG_STMMAC_TIMER priv->tm->timer_start(tmrate); #endif /* Dump DMA/MAC registers */ if (netif_msg_hw(priv)) { - priv->hw->mac->dump_regs(ioaddr); - priv->hw->dma->dump_regs(ioaddr); + priv->hw->mac->dump_regs(priv->ioaddr); + priv->hw->dma->dump_regs(priv->ioaddr); } if (priv->phydev) @@ -933,15 +930,15 @@ static int stmmac_release(struct net_device *dev) free_irq(dev->irq, dev); /* Stop TX/RX DMA and clear the descriptors */ - priv->hw->dma->stop_tx(dev->base_addr); - priv->hw->dma->stop_rx(dev->base_addr); + priv->hw->dma->stop_tx(priv->ioaddr); + priv->hw->dma->stop_rx(priv->ioaddr); /* Release and free the Rx/Tx resources */ free_dma_desc_resources(priv); /* Disable the MAC core */ - stmmac_mac_disable_tx(dev->base_addr); - stmmac_mac_disable_rx(dev->base_addr); + stmmac_mac_disable_tx(priv->ioaddr); + stmmac_mac_disable_rx(priv->ioaddr); netif_carrier_off(dev); @@ -1143,7 +1140,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_bytes += skb->len; - priv->hw->dma->enable_dma_transmission(dev->base_addr); + priv->hw->dma->enable_dma_transmission(priv->ioaddr); return NETDEV_TX_OK; } @@ -1408,11 +1405,9 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id) return IRQ_NONE; } - if (priv->is_gmac) { - unsigned long ioaddr = dev->base_addr; + if (priv->is_gmac) /* To handle GMAC own interrupts */ - priv->hw->mac->host_irq_status(ioaddr); - } + priv->hw->mac->host_irq_status((void __iomem *) dev->base_addr); stmmac_dma_interrupt(priv); @@ -1525,7 +1520,8 @@ static int stmmac_probe(struct net_device *dev) netif_napi_add(dev, &priv->napi, stmmac_poll, 64); /* Get the MAC address */ - priv->hw->mac->get_umac_addr(dev->base_addr, dev->dev_addr, 0); + priv->hw->mac->get_umac_addr((void __iomem *) dev->base_addr, + dev->dev_addr, 0); if (!is_valid_ether_addr(dev->dev_addr)) pr_warning("\tno valid MAC address;" @@ -1555,14 +1551,13 @@ static int stmmac_probe(struct net_device *dev) static int stmmac_mac_device_setup(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); - unsigned long ioaddr = dev->base_addr; struct mac_device_info *device; if (priv->is_gmac) - device = dwmac1000_setup(ioaddr); + device = dwmac1000_setup(priv->ioaddr); else - device = dwmac100_setup(ioaddr); + device = dwmac100_setup(priv->ioaddr); if (!device) return -ENOMEM; @@ -1656,7 +1651,7 @@ static int stmmac_dvr_probe(struct platform_device *pdev) { int ret = 0; struct resource *res; - unsigned int *addr = NULL; + void __iomem *addr = NULL; struct net_device *ndev = NULL; struct stmmac_priv *priv; struct plat_stmmacenet_data *plat_dat; @@ -1711,6 +1706,7 @@ static int stmmac_dvr_probe(struct platform_device *pdev) priv->pbl = plat_dat->pbl; /* TLI */ priv->is_gmac = plat_dat->has_gmac; /* GMAC is on board */ priv->enh_desc = plat_dat->enh_desc; + priv->ioaddr = addr; platform_set_drvdata(pdev, ndev); @@ -1782,11 +1778,11 @@ static int stmmac_dvr_remove(struct platform_device *pdev) pr_info("%s:\n\tremoving driver", __func__); - priv->hw->dma->stop_rx(ndev->base_addr); - priv->hw->dma->stop_tx(ndev->base_addr); + priv->hw->dma->stop_rx(priv->ioaddr); + priv->hw->dma->stop_tx(priv->ioaddr); - stmmac_mac_disable_rx(ndev->base_addr); - stmmac_mac_disable_tx(ndev->base_addr); + stmmac_mac_disable_rx(priv->ioaddr); + stmmac_mac_disable_tx(priv->ioaddr); netif_carrier_off(ndev); @@ -1795,7 +1791,7 @@ static int stmmac_dvr_remove(struct platform_device *pdev) platform_set_drvdata(pdev, NULL); unregister_netdev(ndev); - iounmap((void *)ndev->base_addr); + iounmap((void *)priv->ioaddr); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); release_mem_region(res->start, resource_size(res)); @@ -1830,22 +1826,21 @@ static int stmmac_suspend(struct platform_device *pdev, pm_message_t state) napi_disable(&priv->napi); /* Stop TX/RX DMA */ - priv->hw->dma->stop_tx(dev->base_addr); - priv->hw->dma->stop_rx(dev->base_addr); + priv->hw->dma->stop_tx(priv->ioaddr); + priv->hw->dma->stop_rx(priv->ioaddr); /* Clear the Rx/Tx descriptors */ priv->hw->desc->init_rx_desc(priv->dma_rx, priv->dma_rx_size, dis_ic); priv->hw->desc->init_tx_desc(priv->dma_tx, priv->dma_tx_size); - stmmac_mac_disable_tx(dev->base_addr); + stmmac_mac_disable_tx(priv->ioaddr); if (device_may_wakeup(&(pdev->dev))) { /* Enable Power down mode by programming the PMT regs */ if (priv->wolenabled == PMT_SUPPORTED) - priv->hw->mac->pmt(dev->base_addr, - priv->wolopts); + priv->hw->mac->pmt(priv->ioaddr, priv->wolopts); } else { - stmmac_mac_disable_rx(dev->base_addr); + stmmac_mac_disable_rx(priv->ioaddr); } } else { priv->shutdown = 1; @@ -1863,7 +1858,6 @@ static int stmmac_resume(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); struct stmmac_priv *priv = netdev_priv(dev); - unsigned long ioaddr = dev->base_addr; if (!netif_running(dev)) return 0; @@ -1884,15 +1878,15 @@ static int stmmac_resume(struct platform_device *pdev) * from another devices (e.g. serial console). */ if (device_may_wakeup(&(pdev->dev))) if (priv->wolenabled == PMT_SUPPORTED) - priv->hw->mac->pmt(dev->base_addr, 0); + priv->hw->mac->pmt(priv->ioaddr, 0); netif_device_attach(dev); /* Enable the MAC and DMA */ - stmmac_mac_enable_rx(ioaddr); - stmmac_mac_enable_tx(ioaddr); - priv->hw->dma->start_tx(ioaddr); - priv->hw->dma->start_rx(ioaddr); + stmmac_mac_enable_rx(priv->ioaddr); + stmmac_mac_enable_tx(priv->ioaddr); + priv->hw->dma->start_tx(priv->ioaddr); + priv->hw->dma->start_rx(priv->ioaddr); #ifdef CONFIG_STMMAC_TIMER priv->tm->timer_start(tmrate); diff --git a/drivers/net/stmmac/stmmac_mdio.c b/drivers/net/stmmac/stmmac_mdio.c index 40b2c7929719..03dea1401571 100644 --- a/drivers/net/stmmac/stmmac_mdio.c +++ b/drivers/net/stmmac/stmmac_mdio.c @@ -47,7 +47,6 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg) { struct net_device *ndev = bus->priv; struct stmmac_priv *priv = netdev_priv(ndev); - unsigned long ioaddr = ndev->base_addr; unsigned int mii_address = priv->hw->mii.addr; unsigned int mii_data = priv->hw->mii.data; @@ -56,12 +55,12 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg) ((phyreg << 6) & (0x000007C0))); regValue |= MII_BUSY; /* in case of GMAC */ - do {} while (((readl(ioaddr + mii_address)) & MII_BUSY) == 1); - writel(regValue, ioaddr + mii_address); - do {} while (((readl(ioaddr + mii_address)) & MII_BUSY) == 1); + do {} while (((readl(priv->ioaddr + mii_address)) & MII_BUSY) == 1); + writel(regValue, priv->ioaddr + mii_address); + do {} while (((readl(priv->ioaddr + mii_address)) & MII_BUSY) == 1); /* Read the data from the MII data register */ - data = (int)readl(ioaddr + mii_data); + data = (int)readl(priv->ioaddr + mii_data); return data; } @@ -79,7 +78,6 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, { struct net_device *ndev = bus->priv; struct stmmac_priv *priv = netdev_priv(ndev); - unsigned long ioaddr = ndev->base_addr; unsigned int mii_address = priv->hw->mii.addr; unsigned int mii_data = priv->hw->mii.data; @@ -90,14 +88,14 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, value |= MII_BUSY; /* Wait until any existing MII operation is complete */ - do {} while (((readl(ioaddr + mii_address)) & MII_BUSY) == 1); + do {} while (((readl(priv->ioaddr + mii_address)) & MII_BUSY) == 1); /* Set the MII address register to write */ - writel(phydata, ioaddr + mii_data); - writel(value, ioaddr + mii_address); + writel(phydata, priv->ioaddr + mii_data); + writel(value, priv->ioaddr + mii_address); /* Wait until any existing MII operation is complete */ - do {} while (((readl(ioaddr + mii_address)) & MII_BUSY) == 1); + do {} while (((readl(priv->ioaddr + mii_address)) & MII_BUSY) == 1); return 0; } @@ -111,7 +109,6 @@ static int stmmac_mdio_reset(struct mii_bus *bus) { struct net_device *ndev = bus->priv; struct stmmac_priv *priv = netdev_priv(ndev); - unsigned long ioaddr = ndev->base_addr; unsigned int mii_address = priv->hw->mii.addr; if (priv->phy_reset) { @@ -123,7 +120,7 @@ static int stmmac_mdio_reset(struct mii_bus *bus) * It doesn't complete its reset until at least one clock cycle * on MDC, so perform a dummy mdio read. */ - writel(0, ioaddr + mii_address); + writel(0, priv->ioaddr + mii_address); return 0; } diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 632ff7c03280..a4adf0de6ed6 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -35,7 +35,7 @@ struct plat_stmmacenet_data { int has_gmac; int enh_desc; void (*fix_mac_speed)(void *priv, unsigned int speed); - void (*bus_setup)(unsigned long ioaddr); + void (*bus_setup)(void __iomem *ioaddr); #ifdef CONFIG_STM_DRIVERS struct stm_pad_config *pad_config; #endif -- cgit v1.2.3 From 2971944582ff43b7dedbb460777052243ac9915a Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Mon, 9 Aug 2010 12:54:43 +0100 Subject: ARM: 6307/1: mmci: allow the card detect GPIO value not to be inverted On some platforms, the GPIO value from the gpio_cd pin doesn't need to be inverted to get it active high. Add a cd_invert platform data parameter and change existing platforms using GPIO for CD (only Realview) to enable it. Acked-by: Linus Walleij Signed-off-by: Rabin Vincent Signed-off-by: Russell King --- arch/arm/mach-realview/core.c | 2 ++ drivers/mmc/host/mmci.c | 5 +++-- include/linux/amba/mmci.h | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index 2fa38df28414..07c08151dfe6 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -259,6 +259,7 @@ struct mmci_platform_data realview_mmc0_plat_data = { .status = realview_mmc_status, .gpio_wp = 17, .gpio_cd = 16, + .cd_invert = true, }; struct mmci_platform_data realview_mmc1_plat_data = { @@ -266,6 +267,7 @@ struct mmci_platform_data realview_mmc1_plat_data = { .status = realview_mmc_status, .gpio_wp = 19, .gpio_cd = 18, + .cd_invert = true, }; /* diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index 840b301b5671..9a9aeac50a6c 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -570,12 +570,13 @@ static int mmci_get_ro(struct mmc_host *mmc) static int mmci_get_cd(struct mmc_host *mmc) { struct mmci_host *host = mmc_priv(mmc); + struct mmci_platform_data *plat = host->plat; unsigned int status; if (host->gpio_cd == -ENOSYS) - status = host->plat->status(mmc_dev(host->mmc)); + status = plat->status(mmc_dev(host->mmc)); else - status = !gpio_get_value(host->gpio_cd); + status = !!gpio_get_value(host->gpio_cd) ^ plat->cd_invert; /* * Use positive logic throughout - status is zero for no card, diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h index ca84ce70d5d5..f4ee9acc9721 100644 --- a/include/linux/amba/mmci.h +++ b/include/linux/amba/mmci.h @@ -24,6 +24,7 @@ * whether a card is present in the MMC slot or not * @gpio_wp: read this GPIO pin to see if the card is write protected * @gpio_cd: read this GPIO pin to detect card insertion + * @cd_invert: true if the gpio_cd pin value is active low * @capabilities: the capabilities of the block as implemented in * this platform, signify anything MMC_CAP_* from mmc/host.h */ @@ -35,6 +36,7 @@ struct mmci_platform_data { unsigned int (*status)(struct device *); int gpio_wp; int gpio_cd; + bool cd_invert; unsigned long capabilities; }; -- cgit v1.2.3 From 145ce502e44b57c074c72cfdc855557e19026999 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 24 Aug 2010 13:21:08 +0000 Subject: net/sctp: Use pr_fmt and pr_ Change SCTP_DEBUG_PRINTK and SCTP_DEBUG_PRINTK_IPADDR to use do { print } while (0) guards. Add SCTP_DEBUG_PRINTK_CONT to fix errors in log when lines were continued. Add #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt Add a missing newline in "Failed bind hash alloc" Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 48 ++++++++++++++++++++++++++++++------------------ net/sctp/associola.c | 2 ++ net/sctp/chunk.c | 2 ++ net/sctp/inqueue.c | 2 ++ net/sctp/ipv6.c | 4 +++- net/sctp/objcnt.c | 5 +++-- net/sctp/output.c | 2 ++ net/sctp/outqueue.c | 34 ++++++++++++++++++---------------- net/sctp/probe.c | 4 +++- net/sctp/protocol.c | 17 ++++++++--------- net/sctp/sm_make_chunk.c | 2 ++ net/sctp/sm_sideeffect.c | 21 ++++++++++----------- net/sctp/sm_statefuns.c | 20 ++++++++++---------- net/sctp/sm_statetable.c | 42 +++++++++++++++++++++--------------------- net/sctp/socket.c | 39 +++++++++++++++------------------------ net/sctp/transport.c | 9 +++++---- 16 files changed, 136 insertions(+), 117 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 65946bc43d00..2cb3980b1616 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -275,24 +275,35 @@ struct sctp_mib { /* Print debugging messages. */ #if SCTP_DEBUG extern int sctp_debug_flag; -#define SCTP_DEBUG_PRINTK(whatever...) \ - ((void) (sctp_debug_flag && printk(KERN_DEBUG whatever))) -#define SCTP_DEBUG_PRINTK_IPADDR(lead, trail, leadparm, saddr, otherparms...) \ - if (sctp_debug_flag) { \ - if (saddr->sa.sa_family == AF_INET6) { \ - printk(KERN_DEBUG \ - lead "%pI6" trail, \ - leadparm, \ - &saddr->v6.sin6_addr, \ - otherparms); \ - } else { \ - printk(KERN_DEBUG \ - lead "%pI4" trail, \ - leadparm, \ - &saddr->v4.sin_addr.s_addr, \ - otherparms); \ - } \ - } +#define SCTP_DEBUG_PRINTK(fmt, args...) \ +do { \ + if (sctp_debug_flag) \ + printk(KERN_DEBUG pr_fmt(fmt), ##args); \ +} while (0) +#define SCTP_DEBUG_PRINTK_CONT(fmt, args...) \ +do { \ + if (sctp_debug_flag) \ + pr_cont(fmt, ##args); \ +} while (0) +#define SCTP_DEBUG_PRINTK_IPADDR(fmt_lead, fmt_trail, \ + args_lead, saddr, args_trail...) \ +do { \ + if (sctp_debug_flag) { \ + if (saddr->sa.sa_family == AF_INET6) { \ + printk(KERN_DEBUG \ + pr_fmt(fmt_lead "%pI6" fmt_trail), \ + args_lead, \ + &saddr->v6.sin6_addr, \ + args_trail); \ + } else { \ + printk(KERN_DEBUG \ + pr_fmt(fmt_lead "%pI4" fmt_trail), \ + args_lead, \ + &saddr->v4.sin_addr.s_addr, \ + args_trail); \ + } \ + } \ +} while (0) #define SCTP_ENABLE_DEBUG { sctp_debug_flag = 1; } #define SCTP_DISABLE_DEBUG { sctp_debug_flag = 0; } @@ -306,6 +317,7 @@ extern int sctp_debug_flag; #else /* SCTP_DEBUG */ #define SCTP_DEBUG_PRINTK(whatever...) +#define SCTP_DEBUG_PRINTK_CONT(fmt, args...) #define SCTP_DEBUG_PRINTK_IPADDR(whatever...) #define SCTP_ENABLE_DEBUG #define SCTP_DISABLE_DEBUG diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 0b85e5256434..5f1fb8bd862d 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -48,6 +48,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 476caaf100ed..6c8556459a75 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -37,6 +37,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index ccb6dc48d15b..397296fb156f 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -43,6 +43,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 732689140fb8..95e0c8eda1a0 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -47,6 +47,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -336,7 +338,7 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk, memcpy(saddr, baddr, sizeof(union sctp_addr)); SCTP_DEBUG_PRINTK("saddr: %pI6\n", &saddr->v6.sin6_addr); } else { - printk(KERN_ERR "%s: asoc:%p Could not find a valid source " + pr_err("%s: asoc:%p Could not find a valid source " "address for the dest:%pI6\n", __func__, asoc, &daddr->v6.sin6_addr); } diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c index f73ec0ea93ba..8ef8e7d9eb61 100644 --- a/net/sctp/objcnt.c +++ b/net/sctp/objcnt.c @@ -38,6 +38,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include @@ -134,8 +136,7 @@ void sctp_dbg_objcnt_init(void) ent = proc_create("sctp_dbg_objcnt", 0, proc_net_sctp, &sctp_objcnt_ops); if (!ent) - printk(KERN_WARNING - "sctp_dbg_objcnt: Unable to create /proc entry.\n"); + pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n"); } /* Cleanup the objcount entry in the proc filesystem. */ diff --git a/net/sctp/output.c b/net/sctp/output.c index a646681f5acd..901764b17aee 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -41,6 +41,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index c04b2eb59186..8c6d379b4bb6 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -46,6 +46,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include /* For struct list_head */ #include @@ -1463,23 +1465,23 @@ static void sctp_check_transmitted(struct sctp_outq *q, /* Display the end of the * current range. */ - SCTP_DEBUG_PRINTK("-%08x", - dbg_last_ack_tsn); + SCTP_DEBUG_PRINTK_CONT("-%08x", + dbg_last_ack_tsn); } /* Start a new range. */ - SCTP_DEBUG_PRINTK(",%08x", tsn); + SCTP_DEBUG_PRINTK_CONT(",%08x", tsn); dbg_ack_tsn = tsn; break; case 1: /* The last TSN was NOT ACKed. */ if (dbg_last_kept_tsn != dbg_kept_tsn) { /* Display the end of current range. */ - SCTP_DEBUG_PRINTK("-%08x", - dbg_last_kept_tsn); + SCTP_DEBUG_PRINTK_CONT("-%08x", + dbg_last_kept_tsn); } - SCTP_DEBUG_PRINTK("\n"); + SCTP_DEBUG_PRINTK_CONT("\n"); /* FALL THROUGH... */ default: @@ -1526,18 +1528,18 @@ static void sctp_check_transmitted(struct sctp_outq *q, break; if (dbg_last_kept_tsn != dbg_kept_tsn) - SCTP_DEBUG_PRINTK("-%08x", - dbg_last_kept_tsn); + SCTP_DEBUG_PRINTK_CONT("-%08x", + dbg_last_kept_tsn); - SCTP_DEBUG_PRINTK(",%08x", tsn); + SCTP_DEBUG_PRINTK_CONT(",%08x", tsn); dbg_kept_tsn = tsn; break; case 0: if (dbg_last_ack_tsn != dbg_ack_tsn) - SCTP_DEBUG_PRINTK("-%08x", - dbg_last_ack_tsn); - SCTP_DEBUG_PRINTK("\n"); + SCTP_DEBUG_PRINTK_CONT("-%08x", + dbg_last_ack_tsn); + SCTP_DEBUG_PRINTK_CONT("\n"); /* FALL THROUGH... */ default: @@ -1556,17 +1558,17 @@ static void sctp_check_transmitted(struct sctp_outq *q, switch (dbg_prt_state) { case 0: if (dbg_last_ack_tsn != dbg_ack_tsn) { - SCTP_DEBUG_PRINTK("-%08x\n", dbg_last_ack_tsn); + SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_ack_tsn); } else { - SCTP_DEBUG_PRINTK("\n"); + SCTP_DEBUG_PRINTK_CONT("\n"); } break; case 1: if (dbg_last_kept_tsn != dbg_kept_tsn) { - SCTP_DEBUG_PRINTK("-%08x\n", dbg_last_kept_tsn); + SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_kept_tsn); } else { - SCTP_DEBUG_PRINTK("\n"); + SCTP_DEBUG_PRINTK_CONT("\n"); } } #endif /* SCTP_DEBUG */ diff --git a/net/sctp/probe.c b/net/sctp/probe.c index db3a42b8b349..2e63e9dc010e 100644 --- a/net/sctp/probe.c +++ b/net/sctp/probe.c @@ -22,6 +22,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -192,7 +194,7 @@ static __init int sctpprobe_init(void) if (ret) goto remove_proc; - pr_info("SCTP probe registered (port=%d)\n", port); + pr_info("probe registered (port=%d)\n", port); return 0; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 5027b83f1cc0..f774e657641a 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -46,6 +46,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -707,8 +709,7 @@ static int sctp_ctl_sock_init(void) &init_net); if (err < 0) { - printk(KERN_ERR - "SCTP: Failed to create the SCTP control socket.\n"); + pr_err("Failed to create the SCTP control socket\n"); return err; } return 0; @@ -1206,7 +1207,7 @@ SCTP_STATIC __init int sctp_init(void) __get_free_pages(GFP_ATOMIC, order); } while (!sctp_assoc_hashtable && --order > 0); if (!sctp_assoc_hashtable) { - printk(KERN_ERR "SCTP: Failed association hash alloc.\n"); + pr_err("Failed association hash alloc\n"); status = -ENOMEM; goto err_ahash_alloc; } @@ -1220,7 +1221,7 @@ SCTP_STATIC __init int sctp_init(void) sctp_ep_hashtable = (struct sctp_hashbucket *) kmalloc(64 * sizeof(struct sctp_hashbucket), GFP_KERNEL); if (!sctp_ep_hashtable) { - printk(KERN_ERR "SCTP: Failed endpoint_hash alloc.\n"); + pr_err("Failed endpoint_hash alloc\n"); status = -ENOMEM; goto err_ehash_alloc; } @@ -1239,7 +1240,7 @@ SCTP_STATIC __init int sctp_init(void) __get_free_pages(GFP_ATOMIC, order); } while (!sctp_port_hashtable && --order > 0); if (!sctp_port_hashtable) { - printk(KERN_ERR "SCTP: Failed bind hash alloc."); + pr_err("Failed bind hash alloc\n"); status = -ENOMEM; goto err_bhash_alloc; } @@ -1248,8 +1249,7 @@ SCTP_STATIC __init int sctp_init(void) INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain); } - printk(KERN_INFO "SCTP: Hash tables configured " - "(established %d bind %d)\n", + pr_info("Hash tables configured (established %d bind %d)\n", sctp_assoc_hashsize, sctp_port_hashsize); /* Disable ADDIP by default. */ @@ -1290,8 +1290,7 @@ SCTP_STATIC __init int sctp_init(void) /* Initialize the control inode/socket for handling OOTB packets. */ if ((status = sctp_ctl_sock_init())) { - printk (KERN_ERR - "SCTP: Failed to initialize the SCTP control sock.\n"); + pr_err("Failed to initialize the SCTP control sock\n"); goto err_ctl_sock_init; } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 246f92924658..2cc46f0962ca 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -50,6 +50,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index f5e5e27cac5e..b21b218d564f 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -47,6 +47,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -1146,26 +1148,23 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, case SCTP_DISPOSITION_VIOLATION: if (net_ratelimit()) - printk(KERN_ERR "sctp protocol violation state %d " - "chunkid %d\n", state, subtype.chunk); + pr_err("protocol violation state %d chunkid %d\n", + state, subtype.chunk); break; case SCTP_DISPOSITION_NOT_IMPL: - printk(KERN_WARNING "sctp unimplemented feature in state %d, " - "event_type %d, event_id %d\n", - state, event_type, subtype.chunk); + pr_warn("unimplemented feature in state %d, event_type %d, event_id %d\n", + state, event_type, subtype.chunk); break; case SCTP_DISPOSITION_BUG: - printk(KERN_ERR "sctp bug in state %d, " - "event_type %d, event_id %d\n", + pr_err("bug in state %d, event_type %d, event_id %d\n", state, event_type, subtype.chunk); BUG(); break; default: - printk(KERN_ERR "sctp impossible disposition %d " - "in state %d, event_type %d, event_id %d\n", + pr_err("impossible disposition %d in state %d, event_type %d, event_id %d\n", status, state, event_type, subtype.chunk); BUG(); break; @@ -1679,8 +1678,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_cmd_send_asconf(asoc); break; default: - printk(KERN_WARNING "Impossible command: %u, %p\n", - cmd->verb, cmd->obj.ptr); + pr_warn("Impossible command: %u, %p\n", + cmd->verb, cmd->obj.ptr); break; } diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 24b2cd555637..8b284436be65 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -50,6 +50,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -1138,18 +1140,16 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, if (unlikely(!link)) { if (from_addr.sa.sa_family == AF_INET6) { if (net_ratelimit()) - printk(KERN_WARNING - "%s association %p could not find address %pI6\n", - __func__, - asoc, - &from_addr.v6.sin6_addr); + pr_warn("%s association %p could not find address %pI6\n", + __func__, + asoc, + &from_addr.v6.sin6_addr); } else { if (net_ratelimit()) - printk(KERN_WARNING - "%s association %p could not find address %pI4\n", - __func__, - asoc, - &from_addr.v4.sin_addr.s_addr); + pr_warn("%s association %p could not find address %pI4\n", + __func__, + asoc, + &from_addr.v4.sin_addr.s_addr); } return SCTP_DISPOSITION_DISCARD; } diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 6d9b3aafcc5d..546d4387fb3c 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -46,6 +46,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -66,15 +68,19 @@ static const sctp_sm_table_entry_t bug = { .name = "sctp_sf_bug" }; -#define DO_LOOKUP(_max, _type, _table) \ - if ((event_subtype._type > (_max))) { \ - printk(KERN_WARNING \ - "sctp table %p possible attack:" \ - " event %d exceeds max %d\n", \ - _table, event_subtype._type, _max); \ - return &bug; \ - } \ - return &_table[event_subtype._type][(int)state]; +#define DO_LOOKUP(_max, _type, _table) \ +({ \ + const sctp_sm_table_entry_t *rtn; \ + \ + if ((event_subtype._type > (_max))) { \ + pr_warn("table %p possible attack: event %d exceeds max %d\n", \ + _table, event_subtype._type, _max); \ + rtn = &bug; \ + } else \ + rtn = &_table[event_subtype._type][(int)state]; \ + \ + rtn; \ +}) const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, sctp_state_t state, @@ -83,21 +89,15 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, switch (event_type) { case SCTP_EVENT_T_CHUNK: return sctp_chunk_event_lookup(event_subtype.chunk, state); - break; case SCTP_EVENT_T_TIMEOUT: - DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout, - timeout_event_table); - break; - + return DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout, + timeout_event_table); case SCTP_EVENT_T_OTHER: - DO_LOOKUP(SCTP_EVENT_OTHER_MAX, other, other_event_table); - break; - + return DO_LOOKUP(SCTP_EVENT_OTHER_MAX, other, + other_event_table); case SCTP_EVENT_T_PRIMITIVE: - DO_LOOKUP(SCTP_EVENT_PRIMITIVE_MAX, primitive, - primitive_event_table); - break; - + return DO_LOOKUP(SCTP_EVENT_PRIMITIVE_MAX, primitive, + primitive_event_table); default: /* Yikes! We got an illegal event type. */ return &bug; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ca44917872d2..f4bec2772351 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -57,6 +57,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -2458,9 +2460,8 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk, if (params.sack_delay == 0 && params.sack_freq == 0) return 0; } else if (optlen == sizeof(struct sctp_assoc_value)) { - printk(KERN_WARNING "SCTP: Use of struct sctp_assoc_value " - "in delayed_ack socket option deprecated\n"); - printk(KERN_WARNING "SCTP: Use struct sctp_sack_info instead\n"); + pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n"); + pr_warn("Use struct sctp_sack_info instead\n"); if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; @@ -2868,10 +2869,8 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int val; if (optlen == sizeof(int)) { - printk(KERN_WARNING - "SCTP: Use of int in maxseg socket option deprecated\n"); - printk(KERN_WARNING - "SCTP: Use struct sctp_assoc_value instead\n"); + pr_warn("Use of int in maxseg socket option deprecated\n"); + pr_warn("Use struct sctp_assoc_value instead\n"); if (copy_from_user(&val, optval, optlen)) return -EFAULT; params.assoc_id = 0; @@ -3121,10 +3120,8 @@ static int sctp_setsockopt_maxburst(struct sock *sk, int assoc_id = 0; if (optlen == sizeof(int)) { - printk(KERN_WARNING - "SCTP: Use of int in max_burst socket option deprecated\n"); - printk(KERN_WARNING - "SCTP: Use struct sctp_assoc_value instead\n"); + pr_warn("Use of int in max_burst socket option deprecated\n"); + pr_warn("Use struct sctp_assoc_value instead\n"); if (copy_from_user(&val, optval, optlen)) return -EFAULT; } else if (optlen == sizeof(struct sctp_assoc_value)) { @@ -4281,9 +4278,8 @@ static int sctp_getsockopt_delayed_ack(struct sock *sk, int len, if (copy_from_user(¶ms, optval, len)) return -EFAULT; } else if (len == sizeof(struct sctp_assoc_value)) { - printk(KERN_WARNING "SCTP: Use of struct sctp_assoc_value " - "in delayed_ack socket option deprecated\n"); - printk(KERN_WARNING "SCTP: Use struct sctp_sack_info instead\n"); + pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n"); + pr_warn("Use struct sctp_sack_info instead\n"); if (copy_from_user(¶ms, optval, len)) return -EFAULT; } else @@ -4929,10 +4925,8 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len, struct sctp_association *asoc; if (len == sizeof(int)) { - printk(KERN_WARNING - "SCTP: Use of int in maxseg socket option deprecated\n"); - printk(KERN_WARNING - "SCTP: Use struct sctp_assoc_value instead\n"); + pr_warn("Use of int in maxseg socket option deprecated\n"); + pr_warn("Use struct sctp_assoc_value instead\n"); params.assoc_id = 0; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); @@ -5023,10 +5017,8 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, struct sctp_association *asoc; if (len == sizeof(int)) { - printk(KERN_WARNING - "SCTP: Use of int in max_burst socket option deprecated\n"); - printk(KERN_WARNING - "SCTP: Use struct sctp_assoc_value instead\n"); + pr_warn("Use of int in max_burst socket option deprecated\n"); + pr_warn("Use struct sctp_assoc_value instead\n"); params.assoc_id = 0; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); @@ -5586,8 +5578,7 @@ SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog) tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) { if (net_ratelimit()) { - printk(KERN_INFO - "SCTP: failed to load transform for %s: %ld\n", + pr_info("failed to load transform for %s: %ld\n", sctp_hmac_alg, PTR_ERR(tfm)); } return -ENOSYS; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 132046cb82fc..d3ae493d234a 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -48,6 +48,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -244,10 +246,9 @@ void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) struct dst_entry *dst; if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { - printk(KERN_WARNING "%s: Reported pmtu %d too low, " - "using default minimum of %d\n", - __func__, pmtu, - SCTP_DEFAULT_MINSEGMENT); + pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n", + __func__, pmtu, + SCTP_DEFAULT_MINSEGMENT); /* Use default minimum segment size and disable * pmtu discovery on this transport. */ -- cgit v1.2.3 From 480125ba49ba62be93beea37770f266846e077ab Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 26 Aug 2010 13:57:57 -0400 Subject: x86, iommu: Make all IOMMU's detection routines return a value. We return 1 if the IOMMU has been detected. Zero or an error number if we failed to find it. This is in preperation of using the IOMMU_INIT so that we can detect whether an IOMMU is present. I have not tested this for regression on Calgary, nor on AMD Vi chipsets as I don't have that hardware. CC: Muli Ben-Yehuda CC: "Jon D. Mason" CC: "Darrick J. Wong" CC: Jesse Barnes CC: David Woodhouse CC: Chris Wright CC: Yinghai Lu CC: Joerg Roedel CC: H. Peter Anvin CC: Fujita Tomonori Signed-off-by: Konrad Rzeszutek Wilk LKML-Reference: <1282845485-8991-3-git-send-email-konrad.wilk@oracle.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/amd_iommu.h | 4 ++-- arch/x86/include/asm/calgary.h | 4 ++-- arch/x86/include/asm/gart.h | 5 +++-- arch/x86/kernel/amd_iommu_init.c | 8 +++++--- arch/x86/kernel/aperture_64.c | 11 +++++++---- arch/x86/kernel/pci-calgary_64.c | 15 ++++++++------- drivers/pci/dmar.c | 4 +++- include/linux/dmar.h | 6 +++--- 8 files changed, 33 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index 5af2982133b5..2798142cdb49 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -24,11 +24,11 @@ #ifdef CONFIG_AMD_IOMMU -extern void amd_iommu_detect(void); +extern int amd_iommu_detect(void); #else -static inline void amd_iommu_detect(void) { } +static inline int amd_iommu_detect(void) { return -ENODEV; } #endif diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h index 0918654305af..0d467b338835 100644 --- a/arch/x86/include/asm/calgary.h +++ b/arch/x86/include/asm/calgary.h @@ -62,9 +62,9 @@ struct cal_chipset_ops { extern int use_calgary; #ifdef CONFIG_CALGARY_IOMMU -extern void detect_calgary(void); +extern int detect_calgary(void); #else -static inline void detect_calgary(void) { return; } +static inline int detect_calgary(void) { return -ENODEV; } #endif #endif /* _ASM_X86_CALGARY_H */ diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h index 4ac5b0f33fc1..d7d1d4c438a4 100644 --- a/arch/x86/include/asm/gart.h +++ b/arch/x86/include/asm/gart.h @@ -37,7 +37,7 @@ extern int gart_iommu_aperture_disabled; extern void early_gart_iommu_check(void); extern int gart_iommu_init(void); extern void __init gart_parse_options(char *); -extern void gart_iommu_hole_init(void); +extern int gart_iommu_hole_init(void); #else #define gart_iommu_aperture 0 @@ -50,8 +50,9 @@ static inline void early_gart_iommu_check(void) static inline void gart_parse_options(char *options) { } -static inline void gart_iommu_hole_init(void) +static inline int gart_iommu_hole_init(void) { + return -ENODEV; } #endif diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 3cc63e2b8dd4..0b9e2dc4fc9a 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1382,13 +1382,13 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table) return 0; } -void __init amd_iommu_detect(void) +int __init amd_iommu_detect(void) { if (no_iommu || (iommu_detected && !gart_iommu_aperture)) - return; + return -ENODEV; if (amd_iommu_disabled) - return; + return -ENODEV; if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { iommu_detected = 1; @@ -1397,7 +1397,9 @@ void __init amd_iommu_detect(void) /* Make sure ACS will be enabled */ pci_request_acs(); + return 1; } + return -ENODEV; } /**************************************************************************** diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index a2e0caf26e17..afa0dab3302f 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -371,7 +371,7 @@ void __init early_gart_iommu_check(void) static int __initdata printed_gart_size_msg; -void __init gart_iommu_hole_init(void) +int __init gart_iommu_hole_init(void) { u32 agp_aper_base = 0, agp_aper_order = 0; u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; @@ -381,7 +381,7 @@ void __init gart_iommu_hole_init(void) if (gart_iommu_aperture_disabled || !fix_aperture || !early_pci_allowed()) - return; + return -ENODEV; printk(KERN_INFO "Checking aperture...\n"); @@ -463,8 +463,9 @@ out: unsigned long n = (32 * 1024 * 1024) << last_aper_order; insert_aperture_resource((u32)last_aper_base, n); + return 1; } - return; + return 0; } if (!fallback_aper_force) { @@ -500,7 +501,7 @@ out: panic("Not enough memory for aperture"); } } else { - return; + return 0; } /* Fix up the north bridges */ @@ -524,4 +525,6 @@ out: } set_up_gart_resume(aper_order, aper_alloc); + + return 1; } diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 078d4ec1a9d9..28c6b389fee6 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -1364,7 +1364,7 @@ static int __init calgary_iommu_init(void) return 0; } -void __init detect_calgary(void) +int __init detect_calgary(void) { int bus; void *tbl; @@ -1378,13 +1378,13 @@ void __init detect_calgary(void) * another HW IOMMU already, bail out. */ if (no_iommu || iommu_detected) - return; + return -ENODEV; if (!use_calgary) - return; + return -ENODEV; if (!early_pci_allowed()) - return; + return -ENODEV; printk(KERN_DEBUG "Calgary: detecting Calgary via BIOS EBDA area\n"); @@ -1410,13 +1410,13 @@ void __init detect_calgary(void) if (!rio_table_hdr) { printk(KERN_DEBUG "Calgary: Unable to locate Rio Grande table " "in EBDA - bailing!\n"); - return; + return -ENODEV; } ret = build_detail_arrays(); if (ret) { printk(KERN_DEBUG "Calgary: build_detail_arrays ret %d\n", ret); - return; + return -ENOMEM; } specified_table_size = determine_tce_table_size((is_kdump_kernel() ? @@ -1464,7 +1464,7 @@ void __init detect_calgary(void) x86_init.iommu.iommu_init = calgary_iommu_init; } - return; + return calgary_found; cleanup: for (--bus; bus >= 0; --bus) { @@ -1473,6 +1473,7 @@ cleanup: if (info->tce_space) free_tce_table(info->tce_space); } + return -ENOMEM; } static int __init calgary_parse_options(char *p) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 0a19708074c2..5fa64ea5416f 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -687,7 +687,7 @@ failed: return 0; } -void __init detect_intel_iommu(void) +int __init detect_intel_iommu(void) { int ret; @@ -723,6 +723,8 @@ void __init detect_intel_iommu(void) } early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size); dmar_tbl = NULL; + + return (ret ? 1 : -ENODEV); } diff --git a/include/linux/dmar.h b/include/linux/dmar.h index d7cecc90ed34..a20602041511 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -57,15 +57,15 @@ extern int dmar_table_init(void); extern int dmar_dev_scope_init(void); /* Intel IOMMU detection */ -extern void detect_intel_iommu(void); +extern int detect_intel_iommu(void); extern int enable_drhd_fault_handling(void); extern int parse_ioapics_under_ir(void); extern int alloc_iommu(struct dmar_drhd_unit *); #else -static inline void detect_intel_iommu(void) +static inline int detect_intel_iommu(void) { - return; + return -ENODEV; } static inline int dmar_table_init(void) -- cgit v1.2.3 From 40d0802b3eb47d57e2d57a5244a18cbbe9632e13 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Aug 2010 22:03:08 -0700 Subject: gro: __napi_gro_receive() optimizations compare_ether_header() can have a special implementation on 64 bit arches if CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is defined. __napi_gro_receive() and vlan_gro_common() can avoid a conditional branch to perform device match. On x86_64, __napi_gro_receive() has now 38 instructions instead of 53 As gcc-4.4.3 still choose to not inline it, add inline keyword to this performance critical function. Signed-off-by: Eric Dumazet CC: Herbert Xu Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 18 +++++++++++++++++- net/8021q/vlan_core.c | 9 ++++++--- net/core/dev.c | 10 ++++++---- 3 files changed, 29 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 2308fbb4523a..fb6aa6070921 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -237,13 +237,29 @@ static inline bool is_etherdev_addr(const struct net_device *dev, * entry points. */ -static inline int compare_ether_header(const void *a, const void *b) +static inline unsigned long compare_ether_header(const void *a, const void *b) { +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + unsigned long fold; + + /* + * We want to compare 14 bytes: + * [a0 ... a13] ^ [b0 ... b13] + * Use two long XOR, ORed together, with an overlap of two bytes. + * [a0 a1 a2 a3 a4 a5 a6 a7 ] ^ [b0 b1 b2 b3 b4 b5 b6 b7 ] | + * [a6 a7 a8 a9 a10 a11 a12 a13] ^ [b6 b7 b8 b9 b10 b11 b12 b13] + * This means the [a6 a7] ^ [b6 b7] part is done two times. + */ + fold = *(unsigned long *)a ^ *(unsigned long *)b; + fold |= *(unsigned long *)(a + 6) ^ *(unsigned long *)(b + 6); + return fold; +#else u32 *a32 = (u32 *)((u8 *)a + 2); u32 *b32 = (u32 *)((u8 *)b + 2); return (*(u16 *)a ^ *(u16 *)b) | (a32[0] ^ b32[0]) | (a32[1] ^ b32[1]) | (a32[2] ^ b32[2]); +#endif } #endif /* _LINUX_ETHERDEVICE_H */ diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 07eeb5b99dce..3438c01bbacf 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -105,9 +105,12 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, goto drop; for (p = napi->gro_list; p; p = p->next) { - NAPI_GRO_CB(p)->same_flow = - p->dev == skb->dev && !compare_ether_header( - skb_mac_header(p), skb_gro_mac_header(skb)); + unsigned long diffs; + + diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; + diffs |= compare_ether_header(skb_mac_header(p), + skb_gro_mac_header(skb)); + NAPI_GRO_CB(p)->same_flow = !diffs; NAPI_GRO_CB(p)->flush = 0; } diff --git a/net/core/dev.c b/net/core/dev.c index 859e30ff044a..63bd20a75929 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3169,16 +3169,18 @@ normal: } EXPORT_SYMBOL(dev_gro_receive); -static gro_result_t +static inline gro_result_t __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff *p; for (p = napi->gro_list; p; p = p->next) { - NAPI_GRO_CB(p)->same_flow = - (p->dev == skb->dev) && - !compare_ether_header(skb_mac_header(p), + unsigned long diffs; + + diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; + diffs |= compare_ether_header(skb_mac_header(p), skb_gro_mac_header(skb)); + NAPI_GRO_CB(p)->same_flow = !diffs; NAPI_GRO_CB(p)->flush = 0; } -- cgit v1.2.3 From 8789d459bc5e837bf37d261453df96ef54018d7b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Aug 2010 13:30:26 +0200 Subject: mac80211: allow scan to complete from any context The ieee80211_scan_completed() function was a frequent source of potential deadlocks, since it is called by drivers but may call back into drivers, so drivers had to make sure to call it without any locks held, which frequently lead to more complex code in drivers. Avoid that problem by allowing the function to be called in any context, and queueing the actual work it does. Also update the documentation for it to indicate this. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 3 ++- net/mac80211/ieee80211_i.h | 6 ++++++ net/mac80211/scan.c | 34 ++++++++++++++++++++++++++-------- 3 files changed, 34 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index dcc8c2bf986e..8f97548b6d80 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2268,7 +2268,8 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw); * * When hardware scan offload is used (i.e. the hw_scan() callback is * assigned) this function needs to be called by the driver to notify - * mac80211 that the scan finished. + * mac80211 that the scan finished. This function can be called from + * any context, including hardirq context. * * @hw: the hardware that finished the scan * @aborted: set to true if scan was aborted diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 9e225f01497b..31713320258c 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -596,11 +596,17 @@ enum queue_stop_reason { * determine if we are on the operating channel or not * @SCAN_OFF_CHANNEL: We're off our operating channel for scanning, * gets only set in conjunction with SCAN_SW_SCANNING + * @SCAN_COMPLETED: Set for our scan work function when the driver reported + * that the scan completed. + * @SCAN_ABORTED: Set for our scan work function when the driver reported + * a scan complete for an aborted scan. */ enum { SCAN_SW_SCANNING, SCAN_HW_SCANNING, SCAN_OFF_CHANNEL, + SCAN_COMPLETED, + SCAN_ABORTED, }; /** diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 31f233f7f51a..d60389ba9b95 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -248,13 +248,11 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) return true; } -void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) +static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) { struct ieee80211_local *local = hw_to_local(hw); bool was_hw_scan; - trace_api_scan_completed(local, aborted); - mutex_lock(&local->mtx); /* @@ -312,6 +310,18 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) ieee80211_mesh_notify_scan_completed(local); ieee80211_queue_work(&local->hw, &local->work_work); } + +void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) +{ + struct ieee80211_local *local = hw_to_local(hw); + + trace_api_scan_completed(local, aborted); + + set_bit(SCAN_COMPLETED, &local->scanning); + if (aborted) + set_bit(SCAN_ABORTED, &local->scanning); + ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0); +} EXPORT_SYMBOL(ieee80211_scan_completed); static int ieee80211_start_sw_scan(struct ieee80211_local *local) @@ -449,7 +459,7 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local, /* if no more bands/channels left, complete scan and advance to the idle state */ if (local->scan_channel_idx >= local->scan_req->n_channels) { - ieee80211_scan_completed(&local->hw, false); + __ieee80211_scan_completed(&local->hw, false); return 1; } @@ -641,6 +651,14 @@ void ieee80211_scan_work(struct work_struct *work) struct ieee80211_sub_if_data *sdata = local->scan_sdata; unsigned long next_delay = 0; + if (test_and_clear_bit(SCAN_COMPLETED, &local->scanning)) { + bool aborted; + + aborted = test_and_clear_bit(SCAN_ABORTED, &local->scanning); + __ieee80211_scan_completed(&local->hw, aborted); + return; + } + mutex_lock(&local->mtx); if (!sdata || !local->scan_req) { mutex_unlock(&local->mtx); @@ -651,7 +669,7 @@ void ieee80211_scan_work(struct work_struct *work) int rc = drv_hw_scan(local, sdata, local->hw_scan_req); mutex_unlock(&local->mtx); if (rc) - ieee80211_scan_completed(&local->hw, true); + __ieee80211_scan_completed(&local->hw, true); return; } @@ -666,7 +684,7 @@ void ieee80211_scan_work(struct work_struct *work) mutex_unlock(&local->mtx); if (rc) - ieee80211_scan_completed(&local->hw, true); + __ieee80211_scan_completed(&local->hw, true); return; } @@ -676,7 +694,7 @@ void ieee80211_scan_work(struct work_struct *work) * Avoid re-scheduling when the sdata is going away. */ if (!ieee80211_sdata_running(sdata)) { - ieee80211_scan_completed(&local->hw, true); + __ieee80211_scan_completed(&local->hw, true); return; } @@ -783,5 +801,5 @@ void ieee80211_scan_cancel(struct ieee80211_local *local) mutex_unlock(&local->mtx); if (abortscan) - ieee80211_scan_completed(&local->hw, true); + __ieee80211_scan_completed(&local->hw, true); } -- cgit v1.2.3 From c0692b8fe29fb4d4dad33487aabf3ed7e1e880c0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 27 Aug 2010 14:26:53 +0300 Subject: cfg80211: allow changing port control protocol Some vendor specified mechanisms for 802.1X-style functionality use a different protocol than EAP (even if EAP is vendor-extensible). Allow setting the ethertype for the protocol when a driver has support for this. The default if unspecified is EAP, of course. Note: This is suitable only for station mode, not for AP implementation. Signed-off-by: Johannes Berg Signed-off-by: Juuso Oikarinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 16 +++++++++++++++- include/net/cfg80211.h | 24 +++++++++++++++++------- net/wireless/nl80211.c | 25 ++++++++++++++++++++++--- net/wireless/wext-sme.c | 2 ++ 4 files changed, 56 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index ec1690da7845..31603e8b5581 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -295,7 +295,9 @@ * auth and assoc steps. For this, you need to specify the SSID in a * %NL80211_ATTR_SSID attribute, and can optionally specify the association * IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_MAC, - * %NL80211_ATTR_WIPHY_FREQ and %NL80211_ATTR_CONTROL_PORT. + * %NL80211_ATTR_WIPHY_FREQ, %NL80211_ATTR_CONTROL_PORT, + * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE and + * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT. * It is also sent as an event, with the BSSID and response IEs when the * connection is established or failed to be established. This can be * determined by the STATUS_CODE attribute. @@ -686,6 +688,15 @@ enum nl80211_commands { * request, the driver will assume that the port is unauthorized until * authorized by user space. Otherwise, port is marked authorized by * default in station mode. + * @NL80211_ATTR_CONTROL_PORT_ETHERTYPE: A 16-bit value indicating the + * ethertype that will be used for key negotiation. It can be + * specified with the associate and connect commands. If it is not + * specified, the value defaults to 0x888E (PAE, 802.1X). This + * attribute is also used as a flag in the wiphy information to + * indicate that protocols other than PAE are supported. + * @NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT: When included along with + * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE, indicates that the custom + * ethertype frames used for key negotiation must not be encrypted. * * @NL80211_ATTR_TESTDATA: Testmode data blob, passed through to the driver. * We recommend using nested, driver-specific attributes within this. @@ -951,6 +962,9 @@ enum nl80211_attrs { NL80211_ATTR_RX_FRAME_TYPES, NL80211_ATTR_FRAME_TYPE, + NL80211_ATTR_CONTROL_PORT_ETHERTYPE, + NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f2740537b5d6..4c8c727d0cca 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -763,6 +763,10 @@ const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie); * sets/clears %NL80211_STA_FLAG_AUTHORIZED. If true, the driver is * required to assume that the port is unauthorized until authorized by * user space. Otherwise, port is marked authorized by default. + * @control_port_ethertype: the control port protocol that should be + * allowed through even on unauthorized ports + * @control_port_no_encrypt: TRUE to prevent encryption of control port + * protocol frames. */ struct cfg80211_crypto_settings { u32 wpa_versions; @@ -772,6 +776,8 @@ struct cfg80211_crypto_settings { int n_akm_suites; u32 akm_suites[NL80211_MAX_NR_AKM_SUITES]; bool control_port; + __be16 control_port_ethertype; + bool control_port_no_encrypt; }; /** @@ -1293,15 +1299,19 @@ struct cfg80211_ops { * @WIPHY_FLAG_4ADDR_AP: supports 4addr mode even on AP (with a single station * on a VLAN interface) * @WIPHY_FLAG_4ADDR_STATION: supports 4addr mode even as a station + * @WIPHY_FLAG_CONTROL_PORT_PROTOCOL: This device supports setting the + * control port protocol ethertype. The device also honours the + * control_port_no_encrypt flag. */ enum wiphy_flags { - WIPHY_FLAG_CUSTOM_REGULATORY = BIT(0), - WIPHY_FLAG_STRICT_REGULATORY = BIT(1), - WIPHY_FLAG_DISABLE_BEACON_HINTS = BIT(2), - WIPHY_FLAG_NETNS_OK = BIT(3), - WIPHY_FLAG_PS_ON_BY_DEFAULT = BIT(4), - WIPHY_FLAG_4ADDR_AP = BIT(5), - WIPHY_FLAG_4ADDR_STATION = BIT(6), + WIPHY_FLAG_CUSTOM_REGULATORY = BIT(0), + WIPHY_FLAG_STRICT_REGULATORY = BIT(1), + WIPHY_FLAG_DISABLE_BEACON_HINTS = BIT(2), + WIPHY_FLAG_NETNS_OK = BIT(3), + WIPHY_FLAG_PS_ON_BY_DEFAULT = BIT(4), + WIPHY_FLAG_4ADDR_AP = BIT(5), + WIPHY_FLAG_4ADDR_STATION = BIT(6), + WIPHY_FLAG_CONTROL_PORT_PROTOCOL = BIT(7), }; struct mac_address { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 49f5ca35e787..85a23de7bff3 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -136,6 +136,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { .len = sizeof(struct nl80211_sta_flag_update), }, [NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG }, + [NL80211_ATTR_CONTROL_PORT_ETHERTYPE] = { .type = NLA_U16 }, + [NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG }, [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG }, [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 }, [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, @@ -474,6 +476,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_PMKIDS, dev->wiphy.max_num_pmkids); + if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) + NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE); + nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); if (!nl_modes) goto nla_put_failure; @@ -3691,7 +3696,8 @@ unlock_rtnl: return err; } -static int nl80211_crypto_settings(struct genl_info *info, +static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, + struct genl_info *info, struct cfg80211_crypto_settings *settings, int cipher_limit) { @@ -3699,6 +3705,19 @@ static int nl80211_crypto_settings(struct genl_info *info, settings->control_port = info->attrs[NL80211_ATTR_CONTROL_PORT]; + if (info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]) { + u16 proto; + proto = nla_get_u16( + info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]); + settings->control_port_ethertype = cpu_to_be16(proto); + if (!(rdev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) && + proto != ETH_P_PAE) + return -EINVAL; + if (info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT]) + settings->control_port_no_encrypt = true; + } else + settings->control_port_ethertype = cpu_to_be16(ETH_P_PAE); + if (info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]) { void *data; int len, i; @@ -3826,7 +3845,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_PREV_BSSID]) prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]); - err = nl80211_crypto_settings(info, &crypto, 1); + err = nl80211_crypto_settings(rdev, info, &crypto, 1); if (!err) err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid, ssid, ssid_len, ie, ie_len, use_mfp, @@ -4303,7 +4322,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) connect.privacy = info->attrs[NL80211_ATTR_PRIVACY]; - err = nl80211_crypto_settings(info, &connect.crypto, + err = nl80211_crypto_settings(rdev, info, &connect.crypto, NL80211_MAX_NR_CIPHER_SUITES); if (err) return err; diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index 9818198add8a..6fffe62d7c25 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -197,6 +197,8 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, wdev->wext.connect.ssid_len = len; wdev->wext.connect.crypto.control_port = false; + wdev->wext.connect.crypto.control_port_ethertype = + cpu_to_be16(ETH_P_PAE); err = cfg80211_mgd_wext_connect(rdev, wdev); out: -- cgit v1.2.3 From 34d4bc4d41d282a66dafe1b01a7d46bad468cefb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 27 Aug 2010 12:35:58 +0200 Subject: mac80211: support runtime interface type changes Add support to mac80211 for changing the interface type even when the interface is UP, if the driver supports it. To achieve this * add a new driver callback for switching, * split some of the interface up/down code out into new functions (do_open/do_stop), and * maintain an own __SDATA_RUNNING bit that will not be set during interface type, so that any other code doesn't use the interface. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 9 +++ net/mac80211/cfg.c | 3 - net/mac80211/driver-ops.h | 14 ++++ net/mac80211/driver-trace.h | 25 +++++++ net/mac80211/ieee80211_i.h | 14 +++- net/mac80211/iface.c | 157 ++++++++++++++++++++++++++++++++++---------- 6 files changed, 185 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8f97548b6d80..f91fc331369b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1537,6 +1537,12 @@ enum ieee80211_ampdu_mlme_action { * negative error code (which will be seen in userspace.) * Must be implemented and can sleep. * + * @change_interface: Called when a netdevice changes type. This callback + * is optional, but only if it is supported can interface types be + * switched while the interface is UP. The callback may sleep. + * Note that while an interface is being switched, it will not be + * found by the interface iteration callbacks. + * * @remove_interface: Notifies a driver that an interface is going down. * The @stop callback is called after this if it is the last interface * and no monitor interfaces are present. @@ -1693,6 +1699,9 @@ struct ieee80211_ops { void (*stop)(struct ieee80211_hw *hw); int (*add_interface)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); + int (*change_interface)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + enum nl80211_iftype new_type); void (*remove_interface)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); int (*config)(struct ieee80211_hw *hw, u32 changed); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f82b18e996b2..5de1ca3f17b9 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -52,9 +52,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); int ret; - if (ieee80211_sdata_running(sdata)) - return -EBUSY; - ret = ieee80211_if_change_type(sdata, type); if (ret) return ret; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 14123dce544b..6064b7b09e01 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -54,6 +54,20 @@ static inline int drv_add_interface(struct ieee80211_local *local, return ret; } +static inline int drv_change_interface(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + enum nl80211_iftype type) +{ + int ret; + + might_sleep(); + + trace_drv_change_interface(local, sdata, type); + ret = local->ops->change_interface(&local->hw, &sdata->vif, type); + trace_drv_return_int(local, ret); + return ret; +} + static inline void drv_remove_interface(struct ieee80211_local *local, struct ieee80211_vif *vif) { diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index b5a95582d816..f6f3d89e43fa 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -136,6 +136,31 @@ TRACE_EVENT(drv_add_interface, ) ); +TRACE_EVENT(drv_change_interface, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + enum nl80211_iftype type), + + TP_ARGS(local, sdata, type), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(u32, new_type) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->new_type = type; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT " new type:%d", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->new_type + ) +); + TRACE_EVENT(drv_remove_interface, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f64837788681..d529bd5eab47 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -472,6 +472,16 @@ enum ieee80211_sub_if_data_flags { IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3), }; +/** + * enum ieee80211_sdata_state_bits - virtual interface state bits + * @SDATA_STATE_RUNNING: virtual interface is up & running; this + * mirrors netif_running() but is separate for interface type + * change handling while the interface is up + */ +enum ieee80211_sdata_state_bits { + SDATA_STATE_RUNNING, +}; + struct ieee80211_sub_if_data { struct list_head list; @@ -485,6 +495,8 @@ struct ieee80211_sub_if_data { unsigned int flags; + unsigned long state; + int drop_unencrypted; char name[IFNAMSIZ]; @@ -1087,7 +1099,7 @@ void ieee80211_recalc_idle(struct ieee80211_local *local); static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata) { - return netif_running(sdata->dev); + return test_bit(SDATA_STATE_RUNNING, &sdata->state); } /* tx handling */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index cba3d806d722..c1cc200ac81f 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -148,7 +148,12 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata, return 0; } -static int ieee80211_open(struct net_device *dev) +/* + * NOTE: Be very careful when changing this function, it must NOT return + * an error on interface type changes that have been pre-checked, so most + * checks should be in ieee80211_check_concurrent_iface. + */ +static int ieee80211_do_open(struct net_device *dev, bool coming_up) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -157,15 +162,6 @@ static int ieee80211_open(struct net_device *dev) int res; u32 hw_reconf_flags = 0; - /* fail early if user set an invalid address */ - if (!is_zero_ether_addr(dev->dev_addr) && - !is_valid_ether_addr(dev->dev_addr)) - return -EADDRNOTAVAIL; - - res = ieee80211_check_concurrent_iface(sdata, sdata->vif.type); - if (res) - return res; - switch (sdata->vif.type) { case NL80211_IFTYPE_WDS: if (!is_valid_ether_addr(sdata->u.wds.remote_addr)) @@ -258,9 +254,11 @@ static int ieee80211_open(struct net_device *dev) netif_carrier_on(dev); break; default: - res = drv_add_interface(local, &sdata->vif); - if (res) - goto err_stop; + if (coming_up) { + res = drv_add_interface(local, &sdata->vif); + if (res) + goto err_stop; + } if (ieee80211_vif_is_mesh(&sdata->vif)) { local->fif_other_bss++; @@ -316,7 +314,9 @@ static int ieee80211_open(struct net_device *dev) hw_reconf_flags |= __ieee80211_recalc_idle(local); mutex_unlock(&local->mtx); - local->open_count++; + if (coming_up) + local->open_count++; + if (hw_reconf_flags) { ieee80211_hw_config(local, hw_reconf_flags); /* @@ -331,6 +331,8 @@ static int ieee80211_open(struct net_device *dev) netif_tx_start_all_queues(dev); + set_bit(SDATA_STATE_RUNNING, &sdata->state); + return 0; err_del_interface: drv_remove_interface(local, &sdata->vif); @@ -344,19 +346,38 @@ static int ieee80211_open(struct net_device *dev) return res; } -static int ieee80211_stop(struct net_device *dev) +static int ieee80211_open(struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + int err; + + /* fail early if user set an invalid address */ + if (!is_zero_ether_addr(dev->dev_addr) && + !is_valid_ether_addr(dev->dev_addr)) + return -EADDRNOTAVAIL; + + err = ieee80211_check_concurrent_iface(sdata, sdata->vif.type); + if (err) + return err; + + return ieee80211_do_open(dev, true); +} + +static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, + bool going_down) +{ struct ieee80211_local *local = sdata->local; unsigned long flags; struct sk_buff *skb, *tmp; u32 hw_reconf_flags = 0; int i; + clear_bit(SDATA_STATE_RUNNING, &sdata->state); + /* * Stop TX on this interface first. */ - netif_tx_stop_all_queues(dev); + netif_tx_stop_all_queues(sdata->dev); /* * Purge work for this interface. @@ -394,11 +415,12 @@ static int ieee80211_stop(struct net_device *dev) if (sdata->vif.type == NL80211_IFTYPE_AP) local->fif_pspoll--; - netif_addr_lock_bh(dev); + netif_addr_lock_bh(sdata->dev); spin_lock_bh(&local->filter_lock); - __hw_addr_unsync(&local->mc_list, &dev->mc, dev->addr_len); + __hw_addr_unsync(&local->mc_list, &sdata->dev->mc, + sdata->dev->addr_len); spin_unlock_bh(&local->filter_lock); - netif_addr_unlock_bh(dev); + netif_addr_unlock_bh(sdata->dev); ieee80211_configure_filter(local); @@ -432,7 +454,8 @@ static int ieee80211_stop(struct net_device *dev) WARN_ON(!list_empty(&sdata->u.ap.vlans)); } - local->open_count--; + if (going_down) + local->open_count--; switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: @@ -504,7 +527,8 @@ static int ieee80211_stop(struct net_device *dev) */ ieee80211_free_keys(sdata); - drv_remove_interface(local, &sdata->vif); + if (going_down) + drv_remove_interface(local, &sdata->vif); } sdata->bss = NULL; @@ -540,6 +564,13 @@ static int ieee80211_stop(struct net_device *dev) } } spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); +} + +static int ieee80211_stop(struct net_device *dev) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + ieee80211_do_stop(sdata, true); return 0; } @@ -857,9 +888,72 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, ieee80211_debugfs_add_netdev(sdata); } +static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata, + enum nl80211_iftype type) +{ + struct ieee80211_local *local = sdata->local; + int ret, err; + + ASSERT_RTNL(); + + if (!local->ops->change_interface) + return -EBUSY; + + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + /* + * Could maybe also all others here? + * Just not sure how that interacts + * with the RX/config path e.g. for + * mesh. + */ + break; + default: + return -EBUSY; + } + + switch (type) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + /* + * Could probably support everything + * but WDS here (WDS do_open can fail + * under memory pressure, which this + * code isn't prepared to handle). + */ + break; + default: + return -EBUSY; + } + + ret = ieee80211_check_concurrent_iface(sdata, type); + if (ret) + return ret; + + ieee80211_do_stop(sdata, false); + + ieee80211_teardown_sdata(sdata->dev); + + ret = drv_change_interface(local, sdata, type); + if (ret) + type = sdata->vif.type; + + ieee80211_setup_sdata(sdata, type); + + err = ieee80211_do_open(sdata->dev, false); + WARN(err, "type change: do_open returned %d", err); + + return ret; +} + int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, enum nl80211_iftype type) { + int ret; + ASSERT_RTNL(); if (type == sdata->vif.type) @@ -870,18 +964,15 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, type == NL80211_IFTYPE_ADHOC) return -EOPNOTSUPP; - /* - * We could, here, on changes between IBSS/STA/MESH modes, - * invoke an MLME function instead that disassociates etc. - * and goes into the requested mode. - */ - - if (ieee80211_sdata_running(sdata)) - return -EBUSY; - - /* Purge and reset type-dependent state. */ - ieee80211_teardown_sdata(sdata->dev); - ieee80211_setup_sdata(sdata, type); + if (ieee80211_sdata_running(sdata)) { + ret = ieee80211_runtime_change_iftype(sdata, type); + if (ret) + return ret; + } else { + /* Purge and reset type-dependent state. */ + ieee80211_teardown_sdata(sdata->dev); + ieee80211_setup_sdata(sdata, type); + } /* reset some values that shouldn't be kept across type changes */ sdata->vif.bss_conf.basic_rates = -- cgit v1.2.3 From 7950c407c0288b223a200c1bba8198941599ca37 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:14 -0700 Subject: memblock: Add memblock_free/reserve_reserved_regions() So we can avoid export memblock_reserved_init_regions() Suggested by Ben. -v2: use __init_memblock attribute Signed-off-by: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- include/linux/memblock.h | 2 ++ mm/memblock.c | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 4df09bdcae42..7d285271130d 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -48,6 +48,8 @@ extern int memblock_can_resize; if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) u64 memblock_find_in_range(u64 start, u64 end, u64 size, u64 align); +int memblock_free_reserved_regions(void); +int memblock_reserve_reserved_regions(void); extern void __init memblock_init(void); extern void __init memblock_analyze(void); diff --git a/mm/memblock.c b/mm/memblock.c index b7ab10a2ef46..65e3ba8d09fb 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -170,6 +170,30 @@ u64 __init_memblock memblock_find_in_range(u64 start, u64 end, u64 size, u64 ali return memblock_find_base(size, align, start, end); } +/* + * Free memblock.reserved.regions + */ +int __init_memblock memblock_free_reserved_regions(void) +{ + if (memblock.reserved.regions == memblock_reserved_init_regions) + return 0; + + return memblock_free(__pa(memblock.reserved.regions), + sizeof(struct memblock_region) * memblock.reserved.max); +} + +/* + * Reserve memblock.reserved.regions + */ +int __init_memblock memblock_reserve_reserved_regions(void) +{ + if (memblock.reserved.regions == memblock_reserved_init_regions) + return 0; + + return memblock_reserve(__pa(memblock.reserved.regions), + sizeof(struct memblock_region) * memblock.reserved.max); +} + static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) { unsigned long i; -- cgit v1.2.3 From edbe7d23b4482e7f33179290bcff3b1feae1c5f3 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:16 -0700 Subject: memblock: Add find_memory_core_early() According to node range in early_node_map[] with __memblock_find_in_range to find free range. Will be used by memblock_x86_find_in_range_node() memblock_x86_find_in_range_node will be used to find right buffer for NODE_DATA Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- include/linux/mm.h | 2 ++ mm/page_alloc.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index a2b48041b910..993e85f0afcb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1164,6 +1164,8 @@ extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); int add_from_early_node_map(struct range *range, int az, int nr_range, int nid); +u64 __init find_memory_core_early(int nid, u64 size, u64 align, + u64 goal, u64 limit); void *__alloc_memory_core_early(int nodeid, u64 size, u64 align, u64 goal, u64 limit); typedef int (*work_fn_t)(unsigned long, unsigned long, void *); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9bd339eb04c6..8c9b34674d83 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -3612,6 +3613,41 @@ void __init free_bootmem_with_active_regions(int nid, } } +#ifdef CONFIG_HAVE_MEMBLOCK +u64 __init find_memory_core_early(int nid, u64 size, u64 align, + u64 goal, u64 limit) +{ + int i; + + /* Need to go over early_node_map to find out good range for node */ + for_each_active_range_index_in_nid(i, nid) { + u64 addr; + u64 ei_start, ei_last; + u64 final_start, final_end; + + ei_last = early_node_map[i].end_pfn; + ei_last <<= PAGE_SHIFT; + ei_start = early_node_map[i].start_pfn; + ei_start <<= PAGE_SHIFT; + + final_start = max(ei_start, goal); + final_end = min(ei_last, limit); + + if (final_start >= final_end) + continue; + + addr = memblock_find_in_range(final_start, final_end, size, align); + + if (addr == MEMBLOCK_ERROR) + continue; + + return addr; + } + + return MEMBLOCK_ERROR; +} +#endif + int __init add_from_early_node_map(struct range *range, int az, int nr_range, int nid) { -- cgit v1.2.3 From a587d2daebcd2bc159d4348b6a7b028950a6d803 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:18 -0700 Subject: x86: Remove not used early_res code and some functions in e820.c that are not used anymore Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/e820.h | 14 -- arch/x86/kernel/e820.c | 52 ---- include/linux/early_res.h | 23 -- kernel/Makefile | 1 - kernel/early_res.c | 590 -------------------------------------------- 5 files changed, 680 deletions(-) delete mode 100644 include/linux/early_res.h delete mode 100644 kernel/early_res.c (limited to 'include') diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 388fed291467..718646384e03 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -112,31 +112,17 @@ static inline void early_memtest(unsigned long start, unsigned long end) } #endif -extern unsigned long end_user_pfn; - -extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); -extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); -extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); - extern unsigned long e820_end_of_ram_pfn(void); extern unsigned long e820_end_of_low_ram_pfn(void); -extern void e820_register_active_regions(int nid, unsigned long start_pfn, - unsigned long end_pfn); -extern u64 e820_hole_size(u64 start, u64 end); - extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); void memblock_x86_fill(void); - extern void finish_e820_parsing(void); extern void e820_reserve_resources(void); extern void e820_reserve_resources_late(void); extern void setup_memory_map(void); extern char *default_machine_specific_memory_setup(void); -void reserve_early(u64 start, u64 end, char *name); -void free_early(u64 start, u64 end); - /* * Returns true iff the specified range [s,e) is completely contained inside * the ISA region. diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index a9221d18a5ed..d5fd89462d79 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -738,32 +738,6 @@ static int __init e820_mark_nvs_memory(void) core_initcall(e820_mark_nvs_memory); #endif -/* - * Find a free area with specified alignment in a specific range. - */ -u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) -{ - u64 mem = memblock_find_in_range(start, end, size, align); - - if (mem == MEMBLOCK_ERROR) - return -1ULL; - - return mem; -} - -/* - * Find next free range after *start - */ -u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) -{ - u64 mem = memblock_x86_find_in_range_size(start, sizep, align); - - if (mem == MEMBLOCK_ERROR) - return -1ULL - - return mem; -} - /* * pre allocated 4k and reserved it in memblock and e820_saved */ @@ -856,32 +830,6 @@ unsigned long __init e820_end_of_low_ram_pfn(void) return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM); } -/* Walk the e820 map and register active regions within a node */ -void __init e820_register_active_regions(int nid, unsigned long start_pfn, - unsigned long last_pfn) -{ - memblock_x86_register_active_regions(nid, start_pfn, last_pfn); -} - -/* - * Find the hole size (in bytes) in the memory range. - * @start: starting address of the memory range to scan - * @end: ending address of the memory range to scan - */ -u64 __init e820_hole_size(u64 start, u64 end) -{ - return memblock_x86_hole_size(start, end); -} - -void reserve_early(u64 start, u64 end, char *name) -{ - memblock_x86_reserve_range(start, end, name); -} -void free_early(u64 start, u64 end) -{ - memblock_x86_free_range(start, end); -} - static void early_panic(char *msg) { early_printk(msg); diff --git a/include/linux/early_res.h b/include/linux/early_res.h deleted file mode 100644 index 29c09f57a13c..000000000000 --- a/include/linux/early_res.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef _LINUX_EARLY_RES_H -#define _LINUX_EARLY_RES_H -#ifdef __KERNEL__ - -extern void reserve_early(u64 start, u64 end, char *name); -extern void reserve_early_overlap_ok(u64 start, u64 end, char *name); -extern void free_early(u64 start, u64 end); -void free_early_partial(u64 start, u64 end); -extern void early_res_to_bootmem(u64 start, u64 end); - -void reserve_early_without_check(u64 start, u64 end, char *name); -u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end, - u64 size, u64 align); -u64 find_early_area_size(u64 ei_start, u64 ei_last, u64 start, - u64 *sizep, u64 align); -u64 find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align); -u64 get_max_mapped(void); -#include -int get_free_all_memory_range(struct range **rangep, int nodeid); - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_EARLY_RES_H */ diff --git a/kernel/Makefile b/kernel/Makefile index 057472fbc272..80e61c3e44ae 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -11,7 +11,6 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ async.o range.o -obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o obj-y += groups.o ifdef CONFIG_FUNCTION_TRACER diff --git a/kernel/early_res.c b/kernel/early_res.c deleted file mode 100644 index 7bfae887f211..000000000000 --- a/kernel/early_res.c +++ /dev/null @@ -1,590 +0,0 @@ -/* - * early_res, could be used to replace bootmem - */ -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Early reserved memory areas. - */ -/* - * need to make sure this one is bigger enough before - * find_fw_memmap_area could be used - */ -#define MAX_EARLY_RES_X 32 - -struct early_res { - u64 start, end; - char name[15]; - char overlap_ok; -}; -static struct early_res early_res_x[MAX_EARLY_RES_X] __initdata; - -static int max_early_res __initdata = MAX_EARLY_RES_X; -static struct early_res *early_res __initdata = &early_res_x[0]; -static int early_res_count __initdata; - -static int __init find_overlapped_early(u64 start, u64 end) -{ - int i; - struct early_res *r; - - for (i = 0; i < max_early_res && early_res[i].end; i++) { - r = &early_res[i]; - if (end > r->start && start < r->end) - break; - } - - return i; -} - -/* - * Drop the i-th range from the early reservation map, - * by copying any higher ranges down one over it, and - * clearing what had been the last slot. - */ -static void __init drop_range(int i) -{ - int j; - - for (j = i + 1; j < max_early_res && early_res[j].end; j++) - ; - - memmove(&early_res[i], &early_res[i + 1], - (j - 1 - i) * sizeof(struct early_res)); - - early_res[j - 1].end = 0; - early_res_count--; -} - -static void __init drop_range_partial(int i, u64 start, u64 end) -{ - u64 common_start, common_end; - u64 old_start, old_end; - - old_start = early_res[i].start; - old_end = early_res[i].end; - common_start = max(old_start, start); - common_end = min(old_end, end); - - /* no overlap ? */ - if (common_start >= common_end) - return; - - if (old_start < common_start) { - /* make head segment */ - early_res[i].end = common_start; - if (old_end > common_end) { - char name[15]; - - /* - * Save a local copy of the name, since the - * early_res array could get resized inside - * reserve_early_without_check() -> - * __check_and_double_early_res(), which would - * make the current name pointer invalid. - */ - strncpy(name, early_res[i].name, - sizeof(early_res[i].name) - 1); - /* add another for left over on tail */ - reserve_early_without_check(common_end, old_end, name); - } - return; - } else { - if (old_end > common_end) { - /* reuse the entry for tail left */ - early_res[i].start = common_end; - return; - } - /* all covered */ - drop_range(i); - } -} - -/* - * Split any existing ranges that: - * 1) are marked 'overlap_ok', and - * 2) overlap with the stated range [start, end) - * into whatever portion (if any) of the existing range is entirely - * below or entirely above the stated range. Drop the portion - * of the existing range that overlaps with the stated range, - * which will allow the caller of this routine to then add that - * stated range without conflicting with any existing range. - */ -static void __init drop_overlaps_that_are_ok(u64 start, u64 end) -{ - int i; - struct early_res *r; - u64 lower_start, lower_end; - u64 upper_start, upper_end; - char name[15]; - - for (i = 0; i < max_early_res && early_res[i].end; i++) { - r = &early_res[i]; - - /* Continue past non-overlapping ranges */ - if (end <= r->start || start >= r->end) - continue; - - /* - * Leave non-ok overlaps as is; let caller - * panic "Overlapping early reservations" - * when it hits this overlap. - */ - if (!r->overlap_ok) - return; - - /* - * We have an ok overlap. We will drop it from the early - * reservation map, and add back in any non-overlapping - * portions (lower or upper) as separate, overlap_ok, - * non-overlapping ranges. - */ - - /* 1. Note any non-overlapping (lower or upper) ranges. */ - strncpy(name, r->name, sizeof(name) - 1); - - lower_start = lower_end = 0; - upper_start = upper_end = 0; - if (r->start < start) { - lower_start = r->start; - lower_end = start; - } - if (r->end > end) { - upper_start = end; - upper_end = r->end; - } - - /* 2. Drop the original ok overlapping range */ - drop_range(i); - - i--; /* resume for-loop on copied down entry */ - - /* 3. Add back in any non-overlapping ranges. */ - if (lower_end) - reserve_early_overlap_ok(lower_start, lower_end, name); - if (upper_end) - reserve_early_overlap_ok(upper_start, upper_end, name); - } -} - -static void __init __reserve_early(u64 start, u64 end, char *name, - int overlap_ok) -{ - int i; - struct early_res *r; - - i = find_overlapped_early(start, end); - if (i >= max_early_res) - panic("Too many early reservations"); - r = &early_res[i]; - if (r->end) - panic("Overlapping early reservations " - "%llx-%llx %s to %llx-%llx %s\n", - start, end - 1, name ? name : "", r->start, - r->end - 1, r->name); - r->start = start; - r->end = end; - r->overlap_ok = overlap_ok; - if (name) - strncpy(r->name, name, sizeof(r->name) - 1); - early_res_count++; -} - -/* - * A few early reservtations come here. - * - * The 'overlap_ok' in the name of this routine does -not- mean it - * is ok for these reservations to overlap an earlier reservation. - * Rather it means that it is ok for subsequent reservations to - * overlap this one. - * - * Use this entry point to reserve early ranges when you are doing - * so out of "Paranoia", reserving perhaps more memory than you need, - * just in case, and don't mind a subsequent overlapping reservation - * that is known to be needed. - * - * The drop_overlaps_that_are_ok() call here isn't really needed. - * It would be needed if we had two colliding 'overlap_ok' - * reservations, so that the second such would not panic on the - * overlap with the first. We don't have any such as of this - * writing, but might as well tolerate such if it happens in - * the future. - */ -void __init reserve_early_overlap_ok(u64 start, u64 end, char *name) -{ - drop_overlaps_that_are_ok(start, end); - __reserve_early(start, end, name, 1); -} - -static void __init __check_and_double_early_res(u64 ex_start, u64 ex_end) -{ - u64 start, end, size, mem; - struct early_res *new; - - /* do we have enough slots left ? */ - if ((max_early_res - early_res_count) > max(max_early_res/8, 2)) - return; - - /* double it */ - mem = -1ULL; - size = sizeof(struct early_res) * max_early_res * 2; - if (early_res == early_res_x) - start = 0; - else - start = early_res[0].end; - end = ex_start; - if (start + size < end) - mem = find_fw_memmap_area(start, end, size, - sizeof(struct early_res)); - if (mem == -1ULL) { - start = ex_end; - end = get_max_mapped(); - if (start + size < end) - mem = find_fw_memmap_area(start, end, size, - sizeof(struct early_res)); - } - if (mem == -1ULL) - panic("can not find more space for early_res array"); - - new = __va(mem); - /* save the first one for own */ - new[0].start = mem; - new[0].end = mem + size; - new[0].overlap_ok = 0; - /* copy old to new */ - if (early_res == early_res_x) { - memcpy(&new[1], &early_res[0], - sizeof(struct early_res) * max_early_res); - memset(&new[max_early_res+1], 0, - sizeof(struct early_res) * (max_early_res - 1)); - early_res_count++; - } else { - memcpy(&new[1], &early_res[1], - sizeof(struct early_res) * (max_early_res - 1)); - memset(&new[max_early_res], 0, - sizeof(struct early_res) * max_early_res); - } - memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res); - early_res = new; - max_early_res *= 2; - printk(KERN_DEBUG "early_res array is doubled to %d at [%llx - %llx]\n", - max_early_res, mem, mem + size - 1); -} - -/* - * Most early reservations come here. - * - * We first have drop_overlaps_that_are_ok() drop any pre-existing - * 'overlap_ok' ranges, so that we can then reserve this memory - * range without risk of panic'ing on an overlapping overlap_ok - * early reservation. - */ -void __init reserve_early(u64 start, u64 end, char *name) -{ - if (start >= end) - return; - - __check_and_double_early_res(start, end); - - drop_overlaps_that_are_ok(start, end); - __reserve_early(start, end, name, 0); -} - -void __init reserve_early_without_check(u64 start, u64 end, char *name) -{ - struct early_res *r; - - if (start >= end) - return; - - __check_and_double_early_res(start, end); - - r = &early_res[early_res_count]; - - r->start = start; - r->end = end; - r->overlap_ok = 0; - if (name) - strncpy(r->name, name, sizeof(r->name) - 1); - early_res_count++; -} - -void __init free_early(u64 start, u64 end) -{ - struct early_res *r; - int i; - - kmemleak_free_part(__va(start), end - start); - - i = find_overlapped_early(start, end); - r = &early_res[i]; - if (i >= max_early_res || r->end != end || r->start != start) - panic("free_early on not reserved area: %llx-%llx!", - start, end - 1); - - drop_range(i); -} - -void __init free_early_partial(u64 start, u64 end) -{ - struct early_res *r; - int i; - - kmemleak_free_part(__va(start), end - start); - - if (start == end) - return; - - if (WARN_ONCE(start > end, " wrong range [%#llx, %#llx]\n", start, end)) - return; - -try_next: - i = find_overlapped_early(start, end); - if (i >= max_early_res) - return; - - r = &early_res[i]; - /* hole ? */ - if (r->end >= end && r->start <= start) { - drop_range_partial(i, start, end); - return; - } - - drop_range_partial(i, start, end); - goto try_next; -} - -#ifdef CONFIG_NO_BOOTMEM -static void __init subtract_early_res(struct range *range, int az) -{ - int i, count; - u64 final_start, final_end; - int idx = 0; - - count = 0; - for (i = 0; i < max_early_res && early_res[i].end; i++) - count++; - - /* need to skip first one ?*/ - if (early_res != early_res_x) - idx = 1; - -#define DEBUG_PRINT_EARLY_RES 1 - -#if DEBUG_PRINT_EARLY_RES - printk(KERN_INFO "Subtract (%d early reservations)\n", count); -#endif - for (i = idx; i < count; i++) { - struct early_res *r = &early_res[i]; -#if DEBUG_PRINT_EARLY_RES - printk(KERN_INFO " #%d [%010llx - %010llx] %15s\n", i, - r->start, r->end, r->name); -#endif - final_start = PFN_DOWN(r->start); - final_end = PFN_UP(r->end); - if (final_start >= final_end) - continue; - subtract_range(range, az, final_start, final_end); - } - -} - -int __init get_free_all_memory_range(struct range **rangep, int nodeid) -{ - int i, count; - u64 start = 0, end; - u64 size; - u64 mem; - struct range *range; - int nr_range; - - count = 0; - for (i = 0; i < max_early_res && early_res[i].end; i++) - count++; - - count *= 2; - - size = sizeof(struct range) * count; - end = get_max_mapped(); -#ifdef MAX_DMA32_PFN - if (end > (MAX_DMA32_PFN << PAGE_SHIFT)) - start = MAX_DMA32_PFN << PAGE_SHIFT; -#endif - mem = find_fw_memmap_area(start, end, size, sizeof(struct range)); - if (mem == -1ULL) - panic("can not find more space for range free"); - - range = __va(mem); - /* use early_node_map[] and early_res to get range array at first */ - memset(range, 0, size); - nr_range = 0; - - /* need to go over early_node_map to find out good range for node */ - nr_range = add_from_early_node_map(range, count, nr_range, nodeid); -#ifdef CONFIG_X86_32 - subtract_range(range, count, max_low_pfn, -1ULL); -#endif - subtract_early_res(range, count); - nr_range = clean_sort_range(range, count); - - /* need to clear it ? */ - if (nodeid == MAX_NUMNODES) { - memset(&early_res[0], 0, - sizeof(struct early_res) * max_early_res); - early_res = NULL; - max_early_res = 0; - } - - *rangep = range; - return nr_range; -} -#else -void __init early_res_to_bootmem(u64 start, u64 end) -{ - int i, count; - u64 final_start, final_end; - int idx = 0; - - count = 0; - for (i = 0; i < max_early_res && early_res[i].end; i++) - count++; - - /* need to skip first one ?*/ - if (early_res != early_res_x) - idx = 1; - - printk(KERN_INFO "(%d/%d early reservations) ==> bootmem [%010llx - %010llx]\n", - count - idx, max_early_res, start, end); - for (i = idx; i < count; i++) { - struct early_res *r = &early_res[i]; - printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, - r->start, r->end, r->name); - final_start = max(start, r->start); - final_end = min(end, r->end); - if (final_start >= final_end) { - printk(KERN_CONT "\n"); - continue; - } - printk(KERN_CONT " ==> [%010llx - %010llx]\n", - final_start, final_end); - reserve_bootmem_generic(final_start, final_end - final_start, - BOOTMEM_DEFAULT); - } - /* clear them */ - memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res); - early_res = NULL; - max_early_res = 0; - early_res_count = 0; -} -#endif - -/* Check for already reserved areas */ -static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) -{ - int i; - u64 addr = *addrp; - int changed = 0; - struct early_res *r; -again: - i = find_overlapped_early(addr, addr + size); - r = &early_res[i]; - if (i < max_early_res && r->end) { - *addrp = addr = round_up(r->end, align); - changed = 1; - goto again; - } - return changed; -} - -/* Check for already reserved areas */ -static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) -{ - int i; - u64 addr = *addrp, last; - u64 size = *sizep; - int changed = 0; -again: - last = addr + size; - for (i = 0; i < max_early_res && early_res[i].end; i++) { - struct early_res *r = &early_res[i]; - if (last > r->start && addr < r->start) { - size = r->start - addr; - changed = 1; - goto again; - } - if (last > r->end && addr < r->end) { - addr = round_up(r->end, align); - size = last - addr; - changed = 1; - goto again; - } - if (last <= r->end && addr >= r->start) { - (*sizep)++; - return 0; - } - } - if (changed) { - *addrp = addr; - *sizep = size; - } - return changed; -} - -/* - * Find a free area with specified alignment in a specific range. - * only with the area.between start to end is active range from early_node_map - * so they are good as RAM - */ -u64 __init find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end, - u64 size, u64 align) -{ - u64 addr, last; - - addr = round_up(ei_start, align); - if (addr < start) - addr = round_up(start, align); - if (addr >= ei_last) - goto out; - while (bad_addr(&addr, size, align) && addr+size <= ei_last) - ; - last = addr + size; - if (last > ei_last) - goto out; - if (last > end) - goto out; - - return addr; - -out: - return -1ULL; -} - -u64 __init find_early_area_size(u64 ei_start, u64 ei_last, u64 start, - u64 *sizep, u64 align) -{ - u64 addr, last; - - addr = round_up(ei_start, align); - if (addr < start) - addr = round_up(start, align); - if (addr >= ei_last) - goto out; - *sizep = ei_last - addr; - while (bad_addr_size(&addr, sizep, align) && addr + *sizep <= ei_last) - ; - last = addr + *sizep; - if (last > ei_last) - goto out; - - return addr; - -out: - return -1ULL; -} -- cgit v1.2.3 From 1da3f87ebb7edb3e0b829ec4bbe5fb3d9d93986f Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:24 +0200 Subject: drm: kill kernel_context_switch callbacks Not used by any in-kernel driver. So drop it. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_lock.c | 18 ------------------ include/drm/drmP.h | 3 --- 2 files changed, 21 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c index 9bf93bc9a32c..609f2d504f72 100644 --- a/drivers/gpu/drm/drm_lock.c +++ b/drivers/gpu/drm/drm_lock.c @@ -136,12 +136,6 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv) } } - if (dev->driver->kernel_context_switch && - dev->last_context != lock->context) { - dev->driver->kernel_context_switch(dev, dev->last_context, - lock->context); - } - return 0; } @@ -159,7 +153,6 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv) int drm_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_lock *lock = data; - struct drm_master *master = file_priv->master; if (lock->context == DRM_KERNEL_CONTEXT) { DRM_ERROR("Process %d using kernel context %d\n", @@ -169,17 +162,6 @@ int drm_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv) atomic_inc(&dev->counts[_DRM_STAT_UNLOCKS]); - /* kernel_context_switch isn't used by any of the x86 drm - * modules but is required by the Sparc driver. - */ - if (dev->driver->kernel_context_switch_unlock) - dev->driver->kernel_context_switch_unlock(dev); - else { - if (drm_lock_free(&master->lock, lock->context)) { - /* FIXME: Should really bail out here. */ - } - } - unblock_all_signals(); return 0; } diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 7809d230adee..15ea8c44f28d 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -703,9 +703,6 @@ struct drm_driver { int (*dma_quiescent) (struct drm_device *); int (*context_ctor) (struct drm_device *dev, int context); int (*context_dtor) (struct drm_device *dev, int context); - int (*kernel_context_switch) (struct drm_device *dev, int old, - int new); - void (*kernel_context_switch_unlock) (struct drm_device *dev); /** * get_vblank_counter - get raw hardware vblank counter -- cgit v1.2.3 From be72ae26b11478c00c64858c86b647b438791671 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:26 +0200 Subject: drm: kill procfs callbacks Not used by any driver (rightly so!). Kill them. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_proc.c | 13 ------------- include/drm/drmP.h | 2 -- 2 files changed, 15 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_proc.c b/drivers/gpu/drm/drm_proc.c index a9ba6b69ad35..e571de536dc5 100644 --- a/drivers/gpu/drm/drm_proc.c +++ b/drivers/gpu/drm/drm_proc.c @@ -151,7 +151,6 @@ fail: int drm_proc_init(struct drm_minor *minor, int minor_id, struct proc_dir_entry *root) { - struct drm_device *dev = minor->dev; char name[64]; int ret; @@ -172,14 +171,6 @@ int drm_proc_init(struct drm_minor *minor, int minor_id, return ret; } - if (dev->driver->proc_init) { - ret = dev->driver->proc_init(minor); - if (ret) { - DRM_ERROR("DRM: Driver failed to initialize " - "/proc/dri.\n"); - return ret; - } - } return 0; } @@ -216,15 +207,11 @@ int drm_proc_remove_files(struct drm_info_list *files, int count, */ int drm_proc_cleanup(struct drm_minor *minor, struct proc_dir_entry *root) { - struct drm_device *dev = minor->dev; char name[64]; if (!root || !minor->proc_root) return 0; - if (dev->driver->proc_cleanup) - dev->driver->proc_cleanup(minor); - drm_proc_remove_files(drm_proc_list, DRM_PROC_ENTRIES, minor); sprintf(name, "%d", minor->index); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 15ea8c44f28d..0d7af3f39652 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -792,8 +792,6 @@ struct drm_driver { void (*master_drop)(struct drm_device *dev, struct drm_file *file_priv, bool from_release); - int (*proc_init)(struct drm_minor *minor); - void (*proc_cleanup)(struct drm_minor *minor); int (*debugfs_init)(struct drm_minor *minor); void (*debugfs_cleanup)(struct drm_minor *minor); -- cgit v1.2.3 From 23ddc0243d7313942b94f1a2e44e6394f7bb996e Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:25 +0200 Subject: drm: kill dma_ready callbacks Not used by any driver. So drop it. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_lock.c | 3 --- include/drm/drmP.h | 1 - 2 files changed, 4 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c index 609f2d504f72..d9146f240d33 100644 --- a/drivers/gpu/drm/drm_lock.c +++ b/drivers/gpu/drm/drm_lock.c @@ -124,9 +124,6 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv) block_all_signals(drm_notifier, &dev->sigdata, &dev->sigmask); } - if (dev->driver->dma_ready && (lock->flags & _DRM_LOCK_READY)) - dev->driver->dma_ready(dev); - if (dev->driver->dma_quiescent && (lock->flags & _DRM_LOCK_QUIESCENT)) { if (dev->driver->dma_quiescent(dev)) { diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 0d7af3f39652..d5a2b8869246 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -699,7 +699,6 @@ struct drm_driver { int (*suspend) (struct drm_device *, pm_message_t state); int (*resume) (struct drm_device *); int (*dma_ioctl) (struct drm_device *dev, void *data, struct drm_file *file_priv); - void (*dma_ready) (struct drm_device *); int (*dma_quiescent) (struct drm_device *); int (*context_ctor) (struct drm_device *dev, int context); int (*context_dtor) (struct drm_device *dev, int context); -- cgit v1.2.3 From fd2e7931cdefa8e9acf63f0a4efd61ae0f89e77b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:33 +0200 Subject: drm: kill gem_free_object_unlocked driver callback Not used by any current driver. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_gem.c | 4 +--- include/drm/drmP.h | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index bf92d07510df..cff7317d3830 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -474,9 +474,7 @@ drm_gem_object_free_unlocked(struct kref *kref) struct drm_gem_object *obj = (struct drm_gem_object *) kref; struct drm_device *dev = obj->dev; - if (dev->driver->gem_free_object_unlocked != NULL) - dev->driver->gem_free_object_unlocked(obj); - else if (dev->driver->gem_free_object != NULL) { + if (dev->driver->gem_free_object != NULL) { mutex_lock(&dev->struct_mutex); dev->driver->gem_free_object(obj); mutex_unlock(&dev->struct_mutex); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index d5a2b8869246..6dbdbf45cd1a 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -802,7 +802,6 @@ struct drm_driver { */ int (*gem_init_object) (struct drm_gem_object *obj); void (*gem_free_object) (struct drm_gem_object *obj); - void (*gem_free_object_unlocked) (struct drm_gem_object *obj); /* vga arb irq handler */ void (*vgaarb_irq)(struct drm_device *dev, bool state); -- cgit v1.2.3 From b3da8f7d2d1fa81fb65cb3f5d9e50dde40a83182 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:29 +0200 Subject: drm: kill context_ctor callback It's not used by any driver. The destructor callback is unfortunately used by the via driver in a rather convoluted piece of code used to reimplement something resembling broken futexes. I didn't dare to touch this code. But at least kill the needless NULL assignemt in the sis driver. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_context.c | 8 -------- drivers/gpu/drm/sis/sis_drv.c | 1 - include/drm/drmP.h | 1 - 3 files changed, 10 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_context.c b/drivers/gpu/drm/drm_context.c index 2607753a320b..6d440fb894cf 100644 --- a/drivers/gpu/drm/drm_context.c +++ b/drivers/gpu/drm/drm_context.c @@ -333,14 +333,6 @@ int drm_addctx(struct drm_device *dev, void *data, return -ENOMEM; } - if (ctx->handle != DRM_KERNEL_CONTEXT) { - if (dev->driver->context_ctor) - if (!dev->driver->context_ctor(dev, ctx->handle)) { - DRM_DEBUG("Running out of ctxs or memory.\n"); - return -ENOMEM; - } - } - ctx_entry = kmalloc(sizeof(*ctx_entry), GFP_KERNEL); if (!ctx_entry) { DRM_DEBUG("out of memory\n"); diff --git a/drivers/gpu/drm/sis/sis_drv.c b/drivers/gpu/drm/sis/sis_drv.c index 776bf9e9ea1a..2d1292131500 100644 --- a/drivers/gpu/drm/sis/sis_drv.c +++ b/drivers/gpu/drm/sis/sis_drv.c @@ -67,7 +67,6 @@ static struct drm_driver driver = { .driver_features = DRIVER_USE_AGP | DRIVER_USE_MTRR, .load = sis_driver_load, .unload = sis_driver_unload, - .context_dtor = NULL, .dma_quiescent = sis_idle, .reclaim_buffers = NULL, .reclaim_buffers_idlelocked = sis_reclaim_buffers_locked, diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 6dbdbf45cd1a..eb4f7edcc314 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -700,7 +700,6 @@ struct drm_driver { int (*resume) (struct drm_device *); int (*dma_ioctl) (struct drm_device *dev, void *data, struct drm_file *file_priv); int (*dma_quiescent) (struct drm_device *); - int (*context_ctor) (struct drm_device *dev, int context); int (*context_dtor) (struct drm_device *dev, int context); /** -- cgit v1.2.3 From a2a273c94357ffd24e635cf9ec9b2e5c6f02b63b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:30 +0200 Subject: drm: don't export drm_get_drawable_info Not used by any in-tree user. So drop it. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_drawable.c | 3 +-- include/drm/drmP.h | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_drawable.c b/drivers/gpu/drm/drm_drawable.c index c53c9768cc11..170e53178d8b 100644 --- a/drivers/gpu/drm/drm_drawable.c +++ b/drivers/gpu/drm/drm_drawable.c @@ -173,11 +173,10 @@ error: /** * Caller must hold the drawable spinlock! */ -struct drm_drawable_info *drm_get_drawable_info(struct drm_device *dev, drm_drawable_t id) +static struct drm_drawable_info *drm_get_drawable_info(struct drm_device *dev, drm_drawable_t id) { return idr_find(&dev->drw_idr, id); } -EXPORT_SYMBOL(drm_get_drawable_info); static int drm_drawable_free(int idr, void *p, void *data) { diff --git a/include/drm/drmP.h b/include/drm/drmP.h index eb4f7edcc314..7a5c91c2aa60 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1238,8 +1238,6 @@ extern int drm_rmdraw(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int drm_update_drawable_info(struct drm_device *dev, void *data, struct drm_file *file_priv); -extern struct drm_drawable_info *drm_get_drawable_info(struct drm_device *dev, - drm_drawable_t id); extern void drm_drawable_free_all(struct drm_device *dev); /* Authentication IOCTL support (drm_auth.h) */ -- cgit v1.2.3 From 690bb51b54a986e48c7b8b2dba51a3cd262a7266 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:35 +0200 Subject: drm: drop return value of drm_free_agp No caller (rightly) cares about it, so drop it. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_memory.c | 4 ++-- include/drm/drmP.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_memory.c b/drivers/gpu/drm/drm_memory.c index 7732268eced2..70ca27edc3c9 100644 --- a/drivers/gpu/drm/drm_memory.c +++ b/drivers/gpu/drm/drm_memory.c @@ -106,9 +106,9 @@ DRM_AGP_MEM *drm_alloc_agp(struct drm_device * dev, int pages, u32 type) } /** Wrapper around agp_free_memory() */ -int drm_free_agp(DRM_AGP_MEM * handle, int pages) +void drm_free_agp(DRM_AGP_MEM * handle, int pages) { - return drm_agp_free_memory(handle) ? 0 : -EINVAL; + drm_agp_free_memory(handle); } EXPORT_SYMBOL(drm_free_agp); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 7a5c91c2aa60..7fd1870b6a70 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1178,8 +1178,8 @@ extern int drm_mem_info(char *buf, char **start, off_t offset, int request, int *eof, void *data); extern void *drm_realloc(void *oldpt, size_t oldsize, size_t size, int area); +extern void drm_free_agp(DRM_AGP_MEM * handle, int pages); extern DRM_AGP_MEM *drm_alloc_agp(struct drm_device *dev, int pages, u32 type); -extern int drm_free_agp(DRM_AGP_MEM * handle, int pages); extern int drm_bind_agp(DRM_AGP_MEM * handle, unsigned int start); extern DRM_AGP_MEM *drm_agp_bind_pages(struct drm_device *dev, struct page **pages, -- cgit v1.2.3 From 793a97e4cc38f834e0488ccc1ecbfe52ff6f5b84 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:27 +0200 Subject: drm: kill drm_map_ofs callbacks All drivers happily copy&pasted the default implementation without checking whether this callback is used at all. It's not. Sigh. Kill it. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_vm.c | 7 ------- drivers/gpu/drm/i810/i810_drv.c | 1 - drivers/gpu/drm/i830/i830_drv.c | 1 - drivers/gpu/drm/i915/i915_drv.c | 1 - drivers/gpu/drm/mga/mga_drv.c | 1 - drivers/gpu/drm/nouveau/nouveau_drv.c | 1 - drivers/gpu/drm/r128/r128_drv.c | 1 - drivers/gpu/drm/radeon/radeon_drv.c | 2 -- drivers/gpu/drm/savage/savage_drv.c | 1 - drivers/gpu/drm/sis/sis_drv.c | 1 - drivers/gpu/drm/tdfx/tdfx_drv.c | 1 - drivers/gpu/drm/via/via_drv.c | 1 - drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 1 - include/drm/drmP.h | 2 -- 14 files changed, 22 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c index fda67468e603..2fea2e63a313 100644 --- a/drivers/gpu/drm/drm_vm.c +++ b/drivers/gpu/drm/drm_vm.c @@ -515,13 +515,6 @@ static int drm_mmap_dma(struct file *filp, struct vm_area_struct *vma) return 0; } -resource_size_t drm_core_get_map_ofs(struct drm_local_map * map) -{ - return map->offset; -} - -EXPORT_SYMBOL(drm_core_get_map_ofs); - resource_size_t drm_core_get_reg_ofs(struct drm_device *dev) { #ifdef __alpha__ diff --git a/drivers/gpu/drm/i810/i810_drv.c b/drivers/gpu/drm/i810/i810_drv.c index b4250b2cac1f..084a85c3d5d5 100644 --- a/drivers/gpu/drm/i810/i810_drv.c +++ b/drivers/gpu/drm/i810/i810_drv.c @@ -52,7 +52,6 @@ static struct drm_driver driver = { .device_is_agp = i810_driver_device_is_agp, .reclaim_buffers_locked = i810_driver_reclaim_buffers_locked, .dma_quiescent = i810_driver_dma_quiescent, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = i810_ioctls, .fops = { diff --git a/drivers/gpu/drm/i830/i830_drv.c b/drivers/gpu/drm/i830/i830_drv.c index a5c66aa82f0c..16352954311c 100644 --- a/drivers/gpu/drm/i830/i830_drv.c +++ b/drivers/gpu/drm/i830/i830_drv.c @@ -57,7 +57,6 @@ static struct drm_driver driver = { .device_is_agp = i830_driver_device_is_agp, .reclaim_buffers_locked = i830_driver_reclaim_buffers_locked, .dma_quiescent = i830_driver_dma_quiescent, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, #if USE_IRQS .irq_preinstall = i830_driver_irq_preinstall, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 00befce8fbb7..d079f7b86cca 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -524,7 +524,6 @@ static struct drm_driver driver = { .irq_uninstall = i915_driver_irq_uninstall, .irq_handler = i915_driver_irq_handler, .reclaim_buffers = drm_core_reclaim_buffers, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .master_create = i915_master_create, .master_destroy = i915_master_destroy, diff --git a/drivers/gpu/drm/mga/mga_drv.c b/drivers/gpu/drm/mga/mga_drv.c index 26d0d8ced80d..e9c0cbc21fe2 100644 --- a/drivers/gpu/drm/mga/mga_drv.c +++ b/drivers/gpu/drm/mga/mga_drv.c @@ -60,7 +60,6 @@ static struct drm_driver driver = { .irq_uninstall = mga_driver_irq_uninstall, .irq_handler = mga_driver_irq_handler, .reclaim_buffers = drm_core_reclaim_buffers, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = mga_ioctls, .dma_ioctl = mga_dma_buffers, diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c index 1de5eb53e016..0d64259b21cc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.c +++ b/drivers/gpu/drm/nouveau/nouveau_drv.c @@ -379,7 +379,6 @@ static struct drm_driver driver = { .irq_uninstall = nouveau_irq_uninstall, .irq_handler = nouveau_irq_handler, .reclaim_buffers = drm_core_reclaim_buffers, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = nouveau_ioctls, .fops = { diff --git a/drivers/gpu/drm/r128/r128_drv.c b/drivers/gpu/drm/r128/r128_drv.c index 1e2971f13aa1..42ec20a10eed 100644 --- a/drivers/gpu/drm/r128/r128_drv.c +++ b/drivers/gpu/drm/r128/r128_drv.c @@ -56,7 +56,6 @@ static struct drm_driver driver = { .irq_uninstall = r128_driver_irq_uninstall, .irq_handler = r128_driver_irq_handler, .reclaim_buffers = drm_core_reclaim_buffers, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = r128_ioctls, .dma_ioctl = r128_cce_buffers, diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 795403b0e2cd..8fd89bb8e610 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -203,7 +203,6 @@ static struct drm_driver driver_old = { .irq_uninstall = radeon_driver_irq_uninstall, .irq_handler = radeon_driver_irq_handler, .reclaim_buffers = drm_core_reclaim_buffers, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = radeon_ioctls, .dma_ioctl = radeon_cp_buffers, @@ -290,7 +289,6 @@ static struct drm_driver kms_driver = { .irq_uninstall = radeon_driver_irq_uninstall_kms, .irq_handler = radeon_driver_irq_handler_kms, .reclaim_buffers = drm_core_reclaim_buffers, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = radeon_ioctls_kms, .gem_init_object = radeon_gem_object_init, diff --git a/drivers/gpu/drm/savage/savage_drv.c b/drivers/gpu/drm/savage/savage_drv.c index 021de44c15ab..f539996ba230 100644 --- a/drivers/gpu/drm/savage/savage_drv.c +++ b/drivers/gpu/drm/savage/savage_drv.c @@ -42,7 +42,6 @@ static struct drm_driver driver = { .lastclose = savage_driver_lastclose, .unload = savage_driver_unload, .reclaim_buffers = savage_reclaim_buffers, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = savage_ioctls, .dma_ioctl = savage_bci_buffers, diff --git a/drivers/gpu/drm/sis/sis_drv.c b/drivers/gpu/drm/sis/sis_drv.c index 2d1292131500..72b0a74f265f 100644 --- a/drivers/gpu/drm/sis/sis_drv.c +++ b/drivers/gpu/drm/sis/sis_drv.c @@ -71,7 +71,6 @@ static struct drm_driver driver = { .reclaim_buffers = NULL, .reclaim_buffers_idlelocked = sis_reclaim_buffers_locked, .lastclose = sis_lastclose, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = sis_ioctls, .fops = { diff --git a/drivers/gpu/drm/tdfx/tdfx_drv.c b/drivers/gpu/drm/tdfx/tdfx_drv.c index ec5a43e65722..38a562647d6f 100644 --- a/drivers/gpu/drm/tdfx/tdfx_drv.c +++ b/drivers/gpu/drm/tdfx/tdfx_drv.c @@ -42,7 +42,6 @@ static struct pci_device_id pciidlist[] = { static struct drm_driver driver = { .driver_features = DRIVER_USE_MTRR, .reclaim_buffers = drm_core_reclaim_buffers, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .fops = { .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/via/via_drv.c b/drivers/gpu/drm/via/via_drv.c index 7a1b210401e0..0b9ad8bbb031 100644 --- a/drivers/gpu/drm/via/via_drv.c +++ b/drivers/gpu/drm/via/via_drv.c @@ -51,7 +51,6 @@ static struct drm_driver driver = { .reclaim_buffers_locked = NULL, .reclaim_buffers_idlelocked = via_reclaim_buffers_locked, .lastclose = via_lastclose, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = via_ioctls, .fops = { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 72ec2e2b6e97..4a832de43dd5 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -723,7 +723,6 @@ static struct drm_driver driver = { .irq_uninstall = vmw_irq_uninstall, .irq_handler = vmw_irq_handler, .reclaim_buffers_locked = NULL, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = vmw_ioctls, .num_ioctls = DRM_ARRAY_SIZE(vmw_ioctls), diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 7fd1870b6a70..70a14a4faa1e 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -772,7 +772,6 @@ struct drm_driver { struct drm_file *file_priv); void (*reclaim_buffers_idlelocked) (struct drm_device *dev, struct drm_file *file_priv); - resource_size_t (*get_map_ofs) (struct drm_local_map * map); resource_size_t (*get_reg_ofs) (struct drm_device *dev); void (*set_version) (struct drm_device *dev, struct drm_set_version *sv); @@ -1167,7 +1166,6 @@ extern int drm_release(struct inode *inode, struct file *filp); extern int drm_mmap(struct file *filp, struct vm_area_struct *vma); extern int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma); extern void drm_vm_open_locked(struct vm_area_struct *vma); -extern resource_size_t drm_core_get_map_ofs(struct drm_local_map * map); extern resource_size_t drm_core_get_reg_ofs(struct drm_device *dev); extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait); -- cgit v1.2.3 From 4ac5ec40ec70022e4dea8cc6254d2dadd1e43d57 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:34 +0200 Subject: drm: don't export dri1 locking functions Only used by ioctl, not by any in-tree drivers. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_lock.c | 10 +++------- include/drm/drmP.h | 1 - 2 files changed, 3 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c index d9146f240d33..1e28b9072068 100644 --- a/drivers/gpu/drm/drm_lock.c +++ b/drivers/gpu/drm/drm_lock.c @@ -37,6 +37,8 @@ static int drm_notifier(void *priv); +static int drm_lock_take(struct drm_lock_data *lock_data, unsigned int context); + /** * Lock ioctl. * @@ -172,6 +174,7 @@ int drm_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv) * * Attempt to mark the lock as held by the given context, via the \p cmpxchg instruction. */ +static int drm_lock_take(struct drm_lock_data *lock_data, unsigned int context) { @@ -208,7 +211,6 @@ int drm_lock_take(struct drm_lock_data *lock_data, } return 0; } -EXPORT_SYMBOL(drm_lock_take); /** * This takes a lock forcibly and hands it to context. Should ONLY be used @@ -276,7 +278,6 @@ int drm_lock_free(struct drm_lock_data *lock_data, unsigned int context) wake_up_interruptible(&lock_data->lock_queue); return 0; } -EXPORT_SYMBOL(drm_lock_free); /** * If we get here, it means that the process has called DRM_IOCTL_LOCK @@ -339,7 +340,6 @@ void drm_idlelock_take(struct drm_lock_data *lock_data) } spin_unlock_bh(&lock_data->spinlock); } -EXPORT_SYMBOL(drm_idlelock_take); void drm_idlelock_release(struct drm_lock_data *lock_data) { @@ -359,8 +359,6 @@ void drm_idlelock_release(struct drm_lock_data *lock_data) } spin_unlock_bh(&lock_data->spinlock); } -EXPORT_SYMBOL(drm_idlelock_release); - int drm_i_have_hw_lock(struct drm_device *dev, struct drm_file *file_priv) { @@ -369,5 +367,3 @@ int drm_i_have_hw_lock(struct drm_device *dev, struct drm_file *file_priv) _DRM_LOCK_IS_HELD(master->lock.hw_lock->lock) && master->lock.file_priv == file_priv); } - -EXPORT_SYMBOL(drm_i_have_hw_lock); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 70a14a4faa1e..45d09639e9d2 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1252,7 +1252,6 @@ extern int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int drm_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv); -extern int drm_lock_take(struct drm_lock_data *lock_data, unsigned int context); extern int drm_lock_free(struct drm_lock_data *lock_data, unsigned int context); extern void drm_idlelock_take(struct drm_lock_data *lock_data); extern void drm_idlelock_release(struct drm_lock_data *lock_data); -- cgit v1.2.3 From 8f879194f88742d9c452f669482b6d6abdc1e1e7 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:31 +0200 Subject: drm: replace drawable ioctl by noops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The information supplied by userspace through these ioctls is only accessible by dev->drw_idr. But there's no in-tree user of that. Also userspace does not really care about return values of these ioctls, either. Only hw/xfree86/dri/dri.c from the xserver actually checks the return from adddraw and keeps on trying to create a kernel drawable every time somebody creates a dri drawable. But since that's now a noop, who cares. Therefore it's safe to replace these three ioctls with noops and rip out the implementation. Signed-off-by: Daniel Vetter Reviewed-by: Kristian Høgsberg Reviewed-by: Michel Dänzer Signed-off-by: Dave Airlie --- drivers/gpu/drm/Makefile | 2 +- drivers/gpu/drm/drm_drawable.c | 197 ----------------------------------------- drivers/gpu/drm/drm_drv.c | 8 +- drivers/gpu/drm/drm_stub.c | 3 - include/drm/drmP.h | 15 ---- 5 files changed, 4 insertions(+), 221 deletions(-) delete mode 100644 drivers/gpu/drm/drm_drawable.c (limited to 'include') diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index f3a23a329f4e..997c43d04909 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -5,7 +5,7 @@ ccflags-y := -Iinclude/drm drm-y := drm_auth.o drm_buffer.o drm_bufs.o drm_cache.o \ - drm_context.o drm_dma.o drm_drawable.o \ + drm_context.o drm_dma.o \ drm_drv.o drm_fops.o drm_gem.o drm_ioctl.o drm_irq.o \ drm_lock.o drm_memory.o drm_proc.o drm_stub.o drm_vm.o \ drm_agpsupport.o drm_scatter.o ati_pcigart.o drm_pci.o \ diff --git a/drivers/gpu/drm/drm_drawable.c b/drivers/gpu/drm/drm_drawable.c deleted file mode 100644 index 170e53178d8b..000000000000 --- a/drivers/gpu/drm/drm_drawable.c +++ /dev/null @@ -1,197 +0,0 @@ -/** - * \file drm_drawable.c - * IOCTLs for drawables - * - * \author Rickard E. (Rik) Faith - * \author Gareth Hughes - * \author Michel Dänzer - */ - -/* - * Created: Tue Feb 2 08:37:54 1999 by faith@valinux.com - * - * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, North Dakota. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "drmP.h" - -/** - * Allocate drawable ID and memory to store information about it. - */ -int drm_adddraw(struct drm_device *dev, void *data, struct drm_file *file_priv) -{ - unsigned long irqflags; - struct drm_draw *draw = data; - int new_id = 0; - int ret; - -again: - if (idr_pre_get(&dev->drw_idr, GFP_KERNEL) == 0) { - DRM_ERROR("Out of memory expanding drawable idr\n"); - return -ENOMEM; - } - - spin_lock_irqsave(&dev->drw_lock, irqflags); - ret = idr_get_new_above(&dev->drw_idr, NULL, 1, &new_id); - if (ret == -EAGAIN) { - spin_unlock_irqrestore(&dev->drw_lock, irqflags); - goto again; - } - - spin_unlock_irqrestore(&dev->drw_lock, irqflags); - - draw->handle = new_id; - - DRM_DEBUG("%d\n", draw->handle); - - return 0; -} - -/** - * Free drawable ID and memory to store information about it. - */ -int drm_rmdraw(struct drm_device *dev, void *data, struct drm_file *file_priv) -{ - struct drm_draw *draw = data; - unsigned long irqflags; - struct drm_drawable_info *info; - - spin_lock_irqsave(&dev->drw_lock, irqflags); - - info = drm_get_drawable_info(dev, draw->handle); - if (info == NULL) { - spin_unlock_irqrestore(&dev->drw_lock, irqflags); - return -EINVAL; - } - kfree(info->rects); - kfree(info); - - idr_remove(&dev->drw_idr, draw->handle); - - spin_unlock_irqrestore(&dev->drw_lock, irqflags); - DRM_DEBUG("%d\n", draw->handle); - return 0; -} - -int drm_update_drawable_info(struct drm_device *dev, void *data, struct drm_file *file_priv) -{ - struct drm_update_draw *update = data; - unsigned long irqflags; - struct drm_clip_rect *rects; - struct drm_drawable_info *info; - int err; - - info = idr_find(&dev->drw_idr, update->handle); - if (!info) { - info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) - return -ENOMEM; - if (IS_ERR(idr_replace(&dev->drw_idr, info, update->handle))) { - DRM_ERROR("No such drawable %d\n", update->handle); - kfree(info); - return -EINVAL; - } - } - - switch (update->type) { - case DRM_DRAWABLE_CLIPRECTS: - if (update->num == 0) - rects = NULL; - else if (update->num != info->num_rects) { - rects = kmalloc(update->num * - sizeof(struct drm_clip_rect), - GFP_KERNEL); - } else - rects = info->rects; - - if (update->num && !rects) { - DRM_ERROR("Failed to allocate cliprect memory\n"); - err = -ENOMEM; - goto error; - } - - if (update->num && DRM_COPY_FROM_USER(rects, - (struct drm_clip_rect __user *) - (unsigned long)update->data, - update->num * - sizeof(*rects))) { - DRM_ERROR("Failed to copy cliprects from userspace\n"); - err = -EFAULT; - goto error; - } - - spin_lock_irqsave(&dev->drw_lock, irqflags); - - if (rects != info->rects) { - kfree(info->rects); - } - - info->rects = rects; - info->num_rects = update->num; - - spin_unlock_irqrestore(&dev->drw_lock, irqflags); - - DRM_DEBUG("Updated %d cliprects for drawable %d\n", - info->num_rects, update->handle); - break; - default: - DRM_ERROR("Invalid update type %d\n", update->type); - return -EINVAL; - } - - return 0; - -error: - if (rects != info->rects) - kfree(rects); - - return err; -} - -/** - * Caller must hold the drawable spinlock! - */ -static struct drm_drawable_info *drm_get_drawable_info(struct drm_device *dev, drm_drawable_t id) -{ - return idr_find(&dev->drw_idr, id); -} - -static int drm_drawable_free(int idr, void *p, void *data) -{ - struct drm_drawable_info *info = p; - - if (info) { - kfree(info->rects); - kfree(info); - } - - return 0; -} - -void drm_drawable_free_all(struct drm_device *dev) -{ - idr_for_each(&dev->drw_idr, drm_drawable_free, NULL); - idr_remove_all(&dev->drw_idr); -} diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 84da748555bc..a35a41002c33 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -91,8 +91,8 @@ static struct drm_ioctl_desc drm_ioctls[] = { DRM_IOCTL_DEF(DRM_IOCTL_NEW_CTX, drm_newctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF(DRM_IOCTL_RES_CTX, drm_resctx, DRM_AUTH), - DRM_IOCTL_DEF(DRM_IOCTL_ADD_DRAW, drm_adddraw, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_IOCTL_RM_DRAW, drm_rmdraw, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF(DRM_IOCTL_ADD_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF(DRM_IOCTL_RM_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF(DRM_IOCTL_LOCK, drm_lock, DRM_AUTH), DRM_IOCTL_DEF(DRM_IOCTL_UNLOCK, drm_unlock, DRM_AUTH), @@ -127,7 +127,7 @@ static struct drm_ioctl_desc drm_ioctls[] = { DRM_IOCTL_DEF(DRM_IOCTL_MODESET_CTL, drm_modeset_ctl, 0), - DRM_IOCTL_DEF(DRM_IOCTL_UPDATE_DRAW, drm_update_drawable_info, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF(DRM_IOCTL_UPDATE_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF(DRM_IOCTL_GEM_CLOSE, drm_gem_close_ioctl, DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH|DRM_UNLOCKED), @@ -180,8 +180,6 @@ int drm_lastclose(struct drm_device * dev) mutex_lock(&dev->struct_mutex); - /* Free drawable information memory */ - drm_drawable_free_all(dev); del_timer(&dev->timer); /* Clear AGP information */ diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c index d1ad57450df1..f797ae9da77c 100644 --- a/drivers/gpu/drm/drm_stub.c +++ b/drivers/gpu/drm/drm_stub.c @@ -240,14 +240,11 @@ int drm_fill_in_dev(struct drm_device *dev, INIT_LIST_HEAD(&dev->vblank_event_list); spin_lock_init(&dev->count_lock); - spin_lock_init(&dev->drw_lock); spin_lock_init(&dev->event_lock); init_timer(&dev->timer); mutex_init(&dev->struct_mutex); mutex_init(&dev->ctxlist_mutex); - idr_init(&dev->drw_idr); - if (drm_ht_create(&dev->map_hash, 12)) { return -ENOMEM; } diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 45d09639e9d2..989cefe33c7b 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1037,12 +1037,6 @@ struct drm_device { struct drm_minor *control; /**< Control node for card */ struct drm_minor *primary; /**< render type primary screen head */ - /** \name Drawable information */ - /*@{ */ - spinlock_t drw_lock; - struct idr drw_idr; - /*@} */ - struct drm_mode_config mode_config; /**< Current mode config */ /** \name GEM information */ @@ -1229,15 +1223,6 @@ extern int drm_setsareactx(struct drm_device *dev, void *data, extern int drm_getsareactx(struct drm_device *dev, void *data, struct drm_file *file_priv); - /* Drawable IOCTL support (drm_drawable.h) */ -extern int drm_adddraw(struct drm_device *dev, void *data, - struct drm_file *file_priv); -extern int drm_rmdraw(struct drm_device *dev, void *data, - struct drm_file *file_priv); -extern int drm_update_drawable_info(struct drm_device *dev, void *data, - struct drm_file *file_priv); -extern void drm_drawable_free_all(struct drm_device *dev); - /* Authentication IOCTL support (drm_auth.h) */ extern int drm_getmagic(struct drm_device *dev, void *data, struct drm_file *file_priv); -- cgit v1.2.3 From 89c372647d1d698a96e2189ef4312a977b939839 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:36 +0200 Subject: drm: kill agp indirection mess There's no point in jumping through two indirections. So kill one and call the kernels agp functions directly. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_agpsupport.c | 40 ++++------------------------------------ drivers/gpu/drm/drm_memory.c | 12 +++--------- include/drm/drmP.h | 5 ----- 3 files changed, 7 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_agpsupport.c b/drivers/gpu/drm/drm_agpsupport.c index ba38e0147220..252fdb98b73a 100644 --- a/drivers/gpu/drm/drm_agpsupport.c +++ b/drivers/gpu/drm/drm_agpsupport.c @@ -193,7 +193,7 @@ int drm_agp_enable_ioctl(struct drm_device *dev, void *data, * \return zero on success or a negative number on failure. * * Verifies the AGP device is present and has been acquired, allocates the - * memory via alloc_agp() and creates a drm_agp_mem entry for it. + * memory via agp_allocate_memory() and creates a drm_agp_mem entry for it. */ int drm_agp_alloc(struct drm_device *dev, struct drm_agp_buffer *request) { @@ -211,7 +211,7 @@ int drm_agp_alloc(struct drm_device *dev, struct drm_agp_buffer *request) pages = (request->size + PAGE_SIZE - 1) / PAGE_SIZE; type = (u32) request->type; - if (!(memory = drm_alloc_agp(dev, pages, type))) { + if (!(memory = agp_allocate_memory(dev->agp->bridge, pages, type))) { kfree(entry); return -ENOMEM; } @@ -423,38 +423,6 @@ struct drm_agp_head *drm_agp_init(struct drm_device *dev) return head; } -/** Calls agp_allocate_memory() */ -DRM_AGP_MEM *drm_agp_allocate_memory(struct agp_bridge_data * bridge, - size_t pages, u32 type) -{ - return agp_allocate_memory(bridge, pages, type); -} - -/** Calls agp_free_memory() */ -int drm_agp_free_memory(DRM_AGP_MEM * handle) -{ - if (!handle) - return 0; - agp_free_memory(handle); - return 1; -} - -/** Calls agp_bind_memory() */ -int drm_agp_bind_memory(DRM_AGP_MEM * handle, off_t start) -{ - if (!handle) - return -EINVAL; - return agp_bind_memory(handle, start); -} - -/** Calls agp_unbind_memory() */ -int drm_agp_unbind_memory(DRM_AGP_MEM * handle) -{ - if (!handle) - return -EINVAL; - return agp_unbind_memory(handle); -} - /** * Binds a collection of pages into AGP memory at the given offset, returning * the AGP memory structure containing them. @@ -474,7 +442,7 @@ drm_agp_bind_pages(struct drm_device *dev, DRM_DEBUG("\n"); - mem = drm_agp_allocate_memory(dev->agp->bridge, num_pages, + mem = agp_allocate_memory(dev->agp->bridge, num_pages, type); if (mem == NULL) { DRM_ERROR("Failed to allocate memory for %ld pages\n", @@ -487,7 +455,7 @@ drm_agp_bind_pages(struct drm_device *dev, mem->page_count = num_pages; mem->is_flushed = true; - ret = drm_agp_bind_memory(mem, gtt_offset / PAGE_SIZE); + ret = agp_bind_memory(mem, gtt_offset / PAGE_SIZE); if (ret != 0) { DRM_ERROR("Failed to bind AGP memory: %d\n", ret); agp_free_memory(mem); diff --git a/drivers/gpu/drm/drm_memory.c b/drivers/gpu/drm/drm_memory.c index 70ca27edc3c9..c9b805000a11 100644 --- a/drivers/gpu/drm/drm_memory.c +++ b/drivers/gpu/drm/drm_memory.c @@ -99,29 +99,23 @@ static void *agp_remap(unsigned long offset, unsigned long size, return addr; } -/** Wrapper around agp_allocate_memory() */ -DRM_AGP_MEM *drm_alloc_agp(struct drm_device * dev, int pages, u32 type) -{ - return drm_agp_allocate_memory(dev->agp->bridge, pages, type); -} - /** Wrapper around agp_free_memory() */ void drm_free_agp(DRM_AGP_MEM * handle, int pages) { - drm_agp_free_memory(handle); + agp_free_memory(handle); } EXPORT_SYMBOL(drm_free_agp); /** Wrapper around agp_bind_memory() */ int drm_bind_agp(DRM_AGP_MEM * handle, unsigned int start) { - return drm_agp_bind_memory(handle, start); + return agp_bind_memory(handle, start); } /** Wrapper around agp_unbind_memory() */ int drm_unbind_agp(DRM_AGP_MEM * handle) { - return drm_agp_unbind_memory(handle); + return agp_unbind_memory(handle); } EXPORT_SYMBOL(drm_unbind_agp); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 989cefe33c7b..ffe6035cf471 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1171,7 +1171,6 @@ extern int drm_mem_info(char *buf, char **start, off_t offset, extern void *drm_realloc(void *oldpt, size_t oldsize, size_t size, int area); extern void drm_free_agp(DRM_AGP_MEM * handle, int pages); -extern DRM_AGP_MEM *drm_alloc_agp(struct drm_device *dev, int pages, u32 type); extern int drm_bind_agp(DRM_AGP_MEM * handle, unsigned int start); extern DRM_AGP_MEM *drm_agp_bind_pages(struct drm_device *dev, struct page **pages, @@ -1331,10 +1330,6 @@ extern int drm_agp_unbind_ioctl(struct drm_device *dev, void *data, extern int drm_agp_bind(struct drm_device *dev, struct drm_agp_binding *request); extern int drm_agp_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -extern DRM_AGP_MEM *drm_agp_allocate_memory(struct agp_bridge_data *bridge, size_t pages, u32 type); -extern int drm_agp_free_memory(DRM_AGP_MEM * handle); -extern int drm_agp_bind_memory(DRM_AGP_MEM * handle, off_t start); -extern int drm_agp_unbind_memory(DRM_AGP_MEM * handle); extern void drm_agp_chipset_flush(struct drm_device *dev); /* Stub support (drm_stub.h) */ -- cgit v1.2.3 From df8fcb09667c1b2c9dcf65de23f0bfa851e8138e Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:32 +0200 Subject: drm: kill dev->timer Totally unused. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_drv.c | 2 -- drivers/gpu/drm/drm_stub.c | 1 - include/drm/drmP.h | 1 - 3 files changed, 4 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index a35a41002c33..5ff75a3a6b9d 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -180,8 +180,6 @@ int drm_lastclose(struct drm_device * dev) mutex_lock(&dev->struct_mutex); - del_timer(&dev->timer); - /* Clear AGP information */ if (drm_core_has_AGP(dev) && dev->agp && !drm_core_check_feature(dev, DRIVER_MODESET)) { diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c index f797ae9da77c..cdc89ee042cc 100644 --- a/drivers/gpu/drm/drm_stub.c +++ b/drivers/gpu/drm/drm_stub.c @@ -241,7 +241,6 @@ int drm_fill_in_dev(struct drm_device *dev, spin_lock_init(&dev->count_lock); spin_lock_init(&dev->event_lock); - init_timer(&dev->timer); mutex_init(&dev->struct_mutex); mutex_init(&dev->ctxlist_mutex); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index ffe6035cf471..757b63a23b14 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -964,7 +964,6 @@ struct drm_device { __volatile__ long context_flag; /**< Context swapping flag */ __volatile__ long interrupt_flag; /**< Interruption handler flag */ __volatile__ long dma_flag; /**< DMA dispatch flag */ - struct timer_list timer; /**< Timer for delaying ctx switch */ wait_queue_head_t context_wait; /**< Processes waiting on ctx switch */ int last_checked; /**< Last context checked for DMA */ int last_context; /**< Last current context */ -- cgit v1.2.3 From cbc60ca04b342a4e1f2a1086a7277c077f07dbed Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:28 +0200 Subject: drm: kill get_reg_ofs callback Every driver used the default implementation. Fold that one into the only callsite and drop the callback. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_vm.c | 6 ++---- drivers/gpu/drm/i810/i810_drv.c | 1 - drivers/gpu/drm/i830/i830_drv.c | 1 - drivers/gpu/drm/i915/i915_drv.c | 1 - drivers/gpu/drm/mga/mga_drv.c | 1 - drivers/gpu/drm/nouveau/nouveau_drv.c | 1 - drivers/gpu/drm/r128/r128_drv.c | 1 - drivers/gpu/drm/radeon/radeon_drv.c | 2 -- drivers/gpu/drm/savage/savage_drv.c | 1 - drivers/gpu/drm/sis/sis_drv.c | 1 - drivers/gpu/drm/tdfx/tdfx_drv.c | 1 - drivers/gpu/drm/via/via_drv.c | 1 - drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 1 - include/drm/drmP.h | 2 -- 14 files changed, 2 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c index 2fea2e63a313..ee879d6bb522 100644 --- a/drivers/gpu/drm/drm_vm.c +++ b/drivers/gpu/drm/drm_vm.c @@ -515,7 +515,7 @@ static int drm_mmap_dma(struct file *filp, struct vm_area_struct *vma) return 0; } -resource_size_t drm_core_get_reg_ofs(struct drm_device *dev) +static resource_size_t drm_core_get_reg_ofs(struct drm_device *dev) { #ifdef __alpha__ return dev->hose->dense_mem_base - dev->hose->mem_space->start; @@ -524,8 +524,6 @@ resource_size_t drm_core_get_reg_ofs(struct drm_device *dev) #endif } -EXPORT_SYMBOL(drm_core_get_reg_ofs); - /** * mmap DMA memory. * @@ -612,7 +610,7 @@ int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma) #endif case _DRM_FRAME_BUFFER: case _DRM_REGISTERS: - offset = dev->driver->get_reg_ofs(dev); + offset = drm_core_get_reg_ofs(dev); vma->vm_flags |= VM_IO; /* not in core dump */ vma->vm_page_prot = drm_io_prot(map->type, vma); #if !defined(__arm__) diff --git a/drivers/gpu/drm/i810/i810_drv.c b/drivers/gpu/drm/i810/i810_drv.c index 084a85c3d5d5..1c73b0c43c1e 100644 --- a/drivers/gpu/drm/i810/i810_drv.c +++ b/drivers/gpu/drm/i810/i810_drv.c @@ -52,7 +52,6 @@ static struct drm_driver driver = { .device_is_agp = i810_driver_device_is_agp, .reclaim_buffers_locked = i810_driver_reclaim_buffers_locked, .dma_quiescent = i810_driver_dma_quiescent, - .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = i810_ioctls, .fops = { .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/i830/i830_drv.c b/drivers/gpu/drm/i830/i830_drv.c index 16352954311c..7140ffc12eee 100644 --- a/drivers/gpu/drm/i830/i830_drv.c +++ b/drivers/gpu/drm/i830/i830_drv.c @@ -57,7 +57,6 @@ static struct drm_driver driver = { .device_is_agp = i830_driver_device_is_agp, .reclaim_buffers_locked = i830_driver_reclaim_buffers_locked, .dma_quiescent = i830_driver_dma_quiescent, - .get_reg_ofs = drm_core_get_reg_ofs, #if USE_IRQS .irq_preinstall = i830_driver_irq_preinstall, .irq_postinstall = i830_driver_irq_postinstall, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index d079f7b86cca..e6afa68775b0 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -524,7 +524,6 @@ static struct drm_driver driver = { .irq_uninstall = i915_driver_irq_uninstall, .irq_handler = i915_driver_irq_handler, .reclaim_buffers = drm_core_reclaim_buffers, - .get_reg_ofs = drm_core_get_reg_ofs, .master_create = i915_master_create, .master_destroy = i915_master_destroy, #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/mga/mga_drv.c b/drivers/gpu/drm/mga/mga_drv.c index e9c0cbc21fe2..65ea42cf1795 100644 --- a/drivers/gpu/drm/mga/mga_drv.c +++ b/drivers/gpu/drm/mga/mga_drv.c @@ -60,7 +60,6 @@ static struct drm_driver driver = { .irq_uninstall = mga_driver_irq_uninstall, .irq_handler = mga_driver_irq_handler, .reclaim_buffers = drm_core_reclaim_buffers, - .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = mga_ioctls, .dma_ioctl = mga_dma_buffers, .fops = { diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c index 0d64259b21cc..209912a1b7a5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.c +++ b/drivers/gpu/drm/nouveau/nouveau_drv.c @@ -379,7 +379,6 @@ static struct drm_driver driver = { .irq_uninstall = nouveau_irq_uninstall, .irq_handler = nouveau_irq_handler, .reclaim_buffers = drm_core_reclaim_buffers, - .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = nouveau_ioctls, .fops = { .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/r128/r128_drv.c b/drivers/gpu/drm/r128/r128_drv.c index 42ec20a10eed..67309f84f16d 100644 --- a/drivers/gpu/drm/r128/r128_drv.c +++ b/drivers/gpu/drm/r128/r128_drv.c @@ -56,7 +56,6 @@ static struct drm_driver driver = { .irq_uninstall = r128_driver_irq_uninstall, .irq_handler = r128_driver_irq_handler, .reclaim_buffers = drm_core_reclaim_buffers, - .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = r128_ioctls, .dma_ioctl = r128_cce_buffers, .fops = { diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 8fd89bb8e610..663cdc10a5c2 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -203,7 +203,6 @@ static struct drm_driver driver_old = { .irq_uninstall = radeon_driver_irq_uninstall, .irq_handler = radeon_driver_irq_handler, .reclaim_buffers = drm_core_reclaim_buffers, - .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = radeon_ioctls, .dma_ioctl = radeon_cp_buffers, .fops = { @@ -289,7 +288,6 @@ static struct drm_driver kms_driver = { .irq_uninstall = radeon_driver_irq_uninstall_kms, .irq_handler = radeon_driver_irq_handler_kms, .reclaim_buffers = drm_core_reclaim_buffers, - .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = radeon_ioctls_kms, .gem_init_object = radeon_gem_object_init, .gem_free_object = radeon_gem_object_free, diff --git a/drivers/gpu/drm/savage/savage_drv.c b/drivers/gpu/drm/savage/savage_drv.c index f539996ba230..c0385633667d 100644 --- a/drivers/gpu/drm/savage/savage_drv.c +++ b/drivers/gpu/drm/savage/savage_drv.c @@ -42,7 +42,6 @@ static struct drm_driver driver = { .lastclose = savage_driver_lastclose, .unload = savage_driver_unload, .reclaim_buffers = savage_reclaim_buffers, - .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = savage_ioctls, .dma_ioctl = savage_bci_buffers, .fops = { diff --git a/drivers/gpu/drm/sis/sis_drv.c b/drivers/gpu/drm/sis/sis_drv.c index 72b0a74f265f..4d9f311d249d 100644 --- a/drivers/gpu/drm/sis/sis_drv.c +++ b/drivers/gpu/drm/sis/sis_drv.c @@ -71,7 +71,6 @@ static struct drm_driver driver = { .reclaim_buffers = NULL, .reclaim_buffers_idlelocked = sis_reclaim_buffers_locked, .lastclose = sis_lastclose, - .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = sis_ioctls, .fops = { .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/tdfx/tdfx_drv.c b/drivers/gpu/drm/tdfx/tdfx_drv.c index 38a562647d6f..e0768adbeccd 100644 --- a/drivers/gpu/drm/tdfx/tdfx_drv.c +++ b/drivers/gpu/drm/tdfx/tdfx_drv.c @@ -42,7 +42,6 @@ static struct pci_device_id pciidlist[] = { static struct drm_driver driver = { .driver_features = DRIVER_USE_MTRR, .reclaim_buffers = drm_core_reclaim_buffers, - .get_reg_ofs = drm_core_get_reg_ofs, .fops = { .owner = THIS_MODULE, .open = drm_open, diff --git a/drivers/gpu/drm/via/via_drv.c b/drivers/gpu/drm/via/via_drv.c index 0b9ad8bbb031..02f733db61c1 100644 --- a/drivers/gpu/drm/via/via_drv.c +++ b/drivers/gpu/drm/via/via_drv.c @@ -51,7 +51,6 @@ static struct drm_driver driver = { .reclaim_buffers_locked = NULL, .reclaim_buffers_idlelocked = via_reclaim_buffers_locked, .lastclose = via_lastclose, - .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = via_ioctls, .fops = { .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 4a832de43dd5..e645f44e4302 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -723,7 +723,6 @@ static struct drm_driver driver = { .irq_uninstall = vmw_irq_uninstall, .irq_handler = vmw_irq_handler, .reclaim_buffers_locked = NULL, - .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = vmw_ioctls, .num_ioctls = DRM_ARRAY_SIZE(vmw_ioctls), .dma_quiescent = NULL, /*vmw_dma_quiescent, */ diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 757b63a23b14..30e827aeba02 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -772,7 +772,6 @@ struct drm_driver { struct drm_file *file_priv); void (*reclaim_buffers_idlelocked) (struct drm_device *dev, struct drm_file *file_priv); - resource_size_t (*get_reg_ofs) (struct drm_device *dev); void (*set_version) (struct drm_device *dev, struct drm_set_version *sv); @@ -1159,7 +1158,6 @@ extern int drm_release(struct inode *inode, struct file *filp); extern int drm_mmap(struct file *filp, struct vm_area_struct *vma); extern int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma); extern void drm_vm_open_locked(struct vm_area_struct *vma); -extern resource_size_t drm_core_get_reg_ofs(struct drm_device *dev); extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait); /* Memory management support (drm_memory.h) */ -- cgit v1.2.3 From 409456b10f87b28303643fec37543103f9ada00c Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sun, 29 Aug 2010 21:57:55 -0700 Subject: net: fix datapath typo Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/if.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/if.h b/include/linux/if.h index 6ed43c1f07ab..123959927745 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -76,7 +76,7 @@ #define IFF_MACVLAN_PORT 0x4000 /* device used as macvlan port */ #define IFF_BRIDGE_PORT 0x8000 /* device used as bridge port */ #define IFF_OVS_DATAPATH 0x10000 /* device used as Open vSwitch - * dapath port */ + * datapath port */ #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 -- cgit v1.2.3 From fcaf780b2ad352edaeb1d1c07a6da053266b1eed Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 24 Aug 2010 23:22:57 -0300 Subject: i7300_edac: start a driver for i7300 chipset (Clarksboro) Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/Kconfig | 7 + drivers/edac/Makefile | 1 + drivers/edac/i7300_edac.c | 1373 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci_ids.h | 19 +- 4 files changed, 1392 insertions(+), 8 deletions(-) create mode 100644 drivers/edac/i7300_edac.c (limited to 'include') diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 70bb350de996..4573ccc11f93 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -199,6 +199,13 @@ config EDAC_I5100 Support for error detection and correction the Intel San Clemente MCH. +config EDAC_I7300 + tristate "Intel Clarksboro MCH" + depends on EDAC_MM_EDAC && X86 && PCI + help + Support for error detection and correction the Intel + Clarksboro MCH (Intel 7300 chipset). + config EDAC_MPC85XX tristate "Freescale MPC83xx / MPC85xx" depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || PPC_85xx) diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index ca6b1bb24ccc..bca4369d6b7a 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_EDAC_CPC925) += cpc925_edac.o obj-$(CONFIG_EDAC_I5000) += i5000_edac.o obj-$(CONFIG_EDAC_I5100) += i5100_edac.o obj-$(CONFIG_EDAC_I5400) += i5400_edac.o +obj-$(CONFIG_EDAC_I7300) += i7300_edac.o obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o obj-$(CONFIG_EDAC_E752X) += e752x_edac.o diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c new file mode 100644 index 000000000000..eb3f30e96ee3 --- /dev/null +++ b/drivers/edac/i7300_edac.c @@ -0,0 +1,1373 @@ +/* + * Intel 7300 class Memory Controllers kernel module (Clarksboro) + * + * This file may be distributed under the terms of the + * GNU General Public License version 2 only. + * + * Copyright (c) 2010 by: + * Mauro Carvalho Chehab + * + * Red Hat Inc. http://www.redhat.com + * + * Intel 7300 Chipset Memory Controller Hub (MCH) - Datasheet + * http://www.intel.com/Assets/PDF/datasheet/318082.pdf + * + * TODO: The chipset allow checking for PCI Express errors also. Currently, + * the driver covers only memory error errors + * + * This driver uses "csrows" EDAC attribute to represent DIMM slot# + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "edac_core.h" + +/* + * Alter this version for the I7300 module when modifications are made + */ +#define I7300_REVISION " Ver: 1.0.0 " __DATE__ + +#define EDAC_MOD_STR "i7300_edac" + +#define i7300_printk(level, fmt, arg...) \ + edac_printk(level, "i7300", fmt, ##arg) + +#define i7300_mc_printk(mci, level, fmt, arg...) \ + edac_mc_chipset_printk(mci, level, "i7300", fmt, ##arg) + +/* + * Memory topology is organized as: + * Branch 0 - 2 channels: channels 0 and 1 (FDB0 PCI dev 21.0) + * Branch 1 - 2 channels: channels 2 and 3 (FDB1 PCI dev 22.0) + * Each channel can have to 8 DIMM sets (called as SLOTS) + * Slots should generally be filled in pairs + * Except on Single Channel mode of operation + * just slot 0/channel0 filled on this mode + * On normal operation mode, the two channels on a branch should be + filled together for the same SLOT# + * When in mirrored mode, Branch 1 replicate memory at Branch 0, so, the four + * channels on both branches should be filled + */ + +/* Limits for i7300 */ +#define MAX_SLOTS 8 +#define MAX_BRANCHES 2 +#define MAX_CH_PER_BRANCH 2 +#define MAX_CHANNELS (MAX_CH_PER_BRANCH * MAX_BRANCHES) +#define MAX_MIR 3 + +#define to_channel(ch, branch) ((((branch)) << 1) | (ch)) + +#define to_csrow(slot, ch, branch) \ + (to_channel(ch, branch) | ((slot) << 2)) + + +/* Device 16, + * Function 0: System Address (not documented) + * Function 1: Memory Branch Map, Control, Errors Register + * Function 2: FSB Error Registers + * + * All 3 functions of Device 16 (0,1,2) share the SAME DID and + * uses PCI_DEVICE_ID_INTEL_I7300_MCH_ERR for device 16 (0,1,2), + * PCI_DEVICE_ID_INTEL_I7300_MCH_FB0 and PCI_DEVICE_ID_INTEL_I7300_MCH_FB1 + * for device 21 (0,1). + */ + + /* OFFSETS for Function 0 */ +#define AMBASE 0x48 /* AMB Mem Mapped Reg Region Base */ +#define MAXCH 0x56 /* Max Channel Number */ +#define MAXDIMMPERCH 0x57 /* Max DIMM PER Channel Number */ + + /* OFFSETS for Function 1 */ +#define TOLM 0x6C +#define REDMEMB 0x7C + +#define MIR0 0x80 +#define MIR1 0x84 +#define MIR2 0x88 + +#if 0 +#define AMIR0 0x8c +#define AMIR1 0x90 +#define AMIR2 0x94 + +/*TODO: double check it */ +#define REC_ECC_LOCATOR_ODD(x) ((x) & 0x3fe00) /* bits [17:9] indicate ODD, [8:0] indicate EVEN */ + + /* Fatal error registers */ +#define FERR_FAT_FBD 0x98 + +/*TODO: double check it */ +#define FERR_FAT_FBDCHAN (3<<28) /* channel index where the highest-order error occurred */ + +#define NERR_FAT_FBD 0x9c +#define FERR_NF_FBD 0xa0 + + /* Non-fatal error register */ +#define NERR_NF_FBD 0xa4 + + /* Enable error mask */ +#define EMASK_FBD 0xa8 + +#define ERR0_FBD 0xac +#define ERR1_FBD 0xb0 +#define ERR2_FBD 0xb4 +#define MCERR_FBD 0xb8 + +#endif + +/* TODO: Dev 16 fn1 allows memory error injection - offsets 0x100-0x10b */ + + /* TODO: OFFSETS for Device 16 Function 2 */ + +/* + * Device 21, + * Function 0: Memory Map Branch 0 + * + * Device 22, + * Function 0: Memory Map Branch 1 + */ + + /* OFFSETS for Function 0 */ + +/* + * Note: Other Intel EDAC drivers use AMBPRESENT to identify if the available + * memory. From datasheet item 7.3.1 (FB-DIMM technology & organization), it + * seems that we cannot use this information directly for the same usage. + * Each memory slot may have up to 2 AMB interfaces, one for income and another + * for outcome interface to the next slot. + * For now, the driver just stores the AMB present registers, but rely only at + * the MTR info to detect memory. + * Datasheet is also not clear about how to map each AMBPRESENT registers to + * one of the 4 available channels. + */ +#define AMBPRESENT_0 0x64 +#define AMBPRESENT_1 0x66 + +const static u16 mtr_regs [MAX_SLOTS] = { + 0x80, 0x84, 0x88, 0x8c, + 0x82, 0x86, 0x8a, 0x8e +}; + +/* Defines to extract the vaious fields from the + * MTRx - Memory Technology Registers + */ +#define MTR_DIMMS_PRESENT(mtr) ((mtr) & (1 << 8)) +#define MTR_DIMMS_ETHROTTLE(mtr) ((mtr) & (1 << 7)) +#define MTR_DRAM_WIDTH(mtr) (((mtr) & (1 << 6)) ? 8 : 4) +#define MTR_DRAM_BANKS(mtr) (((mtr) & (1 << 5)) ? 8 : 4) +#define MTR_DIMM_RANKS(mtr) (((mtr) & (1 << 4)) ? 1 : 0) +#define MTR_DIMM_ROWS(mtr) (((mtr) >> 2) & 0x3) +#define MTR_DRAM_BANKS_ADDR_BITS 2 +#define MTR_DIMM_ROWS_ADDR_BITS(mtr) (MTR_DIMM_ROWS(mtr) + 13) +#define MTR_DIMM_COLS(mtr) ((mtr) & 0x3) +#define MTR_DIMM_COLS_ADDR_BITS(mtr) (MTR_DIMM_COLS(mtr) + 10) + +#if 0 + /* OFFSETS for Function 1 */ + +/* TODO */ +#define NRECFGLOG 0x74 +#define RECFGLOG 0x78 +#define NRECMEMA 0xbe +#define NRECMEMB 0xc0 +#define NRECFB_DIMMA 0xc4 +#define NRECFB_DIMMB 0xc8 +#define NRECFB_DIMMC 0xcc +#define NRECFB_DIMMD 0xd0 +#define NRECFB_DIMME 0xd4 +#define NRECFB_DIMMF 0xd8 +#define REDMEMA 0xdC +#define RECMEMA 0xf0 +#define RECMEMB 0xf4 +#define RECFB_DIMMA 0xf8 +#define RECFB_DIMMB 0xec +#define RECFB_DIMMC 0xf0 +#define RECFB_DIMMD 0xf4 +#define RECFB_DIMME 0xf8 +#define RECFB_DIMMF 0xfC + +/* This applies to FERR_NF_FB-DIMM as well as FERR_FAT_FB-DIMM */ +static inline int extract_fbdchan_indx(u32 x) +{ + return (x>>28) & 0x3; +} +#endif + +#ifdef CONFIG_EDAC_DEBUG +/* MTR NUMROW */ +static const char *numrow_toString[] = { + "8,192 - 13 rows", + "16,384 - 14 rows", + "32,768 - 15 rows", + "65,536 - 16 rows" +}; + +/* MTR NUMCOL */ +static const char *numcol_toString[] = { + "1,024 - 10 columns", + "2,048 - 11 columns", + "4,096 - 12 columns", + "reserved" +}; +#endif + +#if 0 + +/* + * Error indicator bits and masks + * Error masks are according with Table 5-17 of i7300 datasheet + */ + +enum error_mask { + EMASK_M1 = 1<<0, /* Memory Write error on non-redundant retry */ + EMASK_M2 = 1<<1, /* Memory or FB-DIMM configuration CRC read error */ + EMASK_M3 = 1<<2, /* Reserved */ + EMASK_M4 = 1<<3, /* Uncorrectable Data ECC on Replay */ + EMASK_M5 = 1<<4, /* Aliased Uncorrectable Non-Mirrored Demand Data ECC */ + EMASK_M6 = 1<<5, /* Unsupported on i7300 */ + EMASK_M7 = 1<<6, /* Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */ + EMASK_M8 = 1<<7, /* Aliased Uncorrectable Patrol Data ECC */ + EMASK_M9 = 1<<8, /* Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC */ + EMASK_M10 = 1<<9, /* Unsupported on i7300 */ + EMASK_M11 = 1<<10, /* Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */ + EMASK_M12 = 1<<11, /* Non-Aliased Uncorrectable Patrol Data ECC */ + EMASK_M13 = 1<<12, /* Memory Write error on first attempt */ + EMASK_M14 = 1<<13, /* FB-DIMM Configuration Write error on first attempt */ + EMASK_M15 = 1<<14, /* Memory or FB-DIMM configuration CRC read error */ + EMASK_M16 = 1<<15, /* Channel Failed-Over Occurred */ + EMASK_M17 = 1<<16, /* Correctable Non-Mirrored Demand Data ECC */ + EMASK_M18 = 1<<17, /* Unsupported on i7300 */ + EMASK_M19 = 1<<18, /* Correctable Resilver- or Spare-Copy Data ECC */ + EMASK_M20 = 1<<19, /* Correctable Patrol Data ECC */ + EMASK_M21 = 1<<20, /* FB-DIMM Northbound parity error on FB-DIMM Sync Status */ + EMASK_M22 = 1<<21, /* SPD protocol Error */ + EMASK_M23 = 1<<22, /* Non-Redundant Fast Reset Timeout */ + EMASK_M24 = 1<<23, /* Refresh error */ + EMASK_M25 = 1<<24, /* Memory Write error on redundant retry */ + EMASK_M26 = 1<<25, /* Redundant Fast Reset Timeout */ + EMASK_M27 = 1<<26, /* Correctable Counter Threshold Exceeded */ + EMASK_M28 = 1<<27, /* DIMM-Spare Copy Completed */ + EMASK_M29 = 1<<28, /* DIMM-Isolation Completed */ +}; + +/* + * Names to translate bit error into something useful + */ +static const char *error_name[] = { + [0] = "Memory Write error on non-redundant retry", + [1] = "Memory or FB-DIMM configuration CRC read error", + /* Reserved */ + [3] = "Uncorrectable Data ECC on Replay", + [4] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC", + /* M6 Unsupported on i7300 */ + [6] = "Aliased Uncorrectable Resilver- or Spare-Copy Data ECC", + [7] = "Aliased Uncorrectable Patrol Data ECC", + [8] = "Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC", + /* M10 Unsupported on i7300 */ + [10] = "Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC", + [11] = "Non-Aliased Uncorrectable Patrol Data ECC", + [12] = "Memory Write error on first attempt", + [13] = "FB-DIMM Configuration Write error on first attempt", + [14] = "Memory or FB-DIMM configuration CRC read error", + [15] = "Channel Failed-Over Occurred", + [16] = "Correctable Non-Mirrored Demand Data ECC", + /* M18 Unsupported on i7300 */ + [18] = "Correctable Resilver- or Spare-Copy Data ECC", + [19] = "Correctable Patrol Data ECC", + [20] = "FB-DIMM Northbound parity error on FB-DIMM Sync Status", + [21] = "SPD protocol Error", + [22] = "Non-Redundant Fast Reset Timeout", + [23] = "Refresh error", + [24] = "Memory Write error on redundant retry", + [25] = "Redundant Fast Reset Timeout", + [26] = "Correctable Counter Threshold Exceeded", + [27] = "DIMM-Spare Copy Completed", + [28] = "DIMM-Isolation Completed", +}; + +/* Fatal errors */ +#define ERROR_FAT_MASK (EMASK_M1 | \ + EMASK_M2 | \ + EMASK_M23) + +/* Correctable errors */ +#define ERROR_NF_CORRECTABLE (EMASK_M27 | \ + EMASK_M20 | \ + EMASK_M19 | \ + EMASK_M18 | \ + EMASK_M17 | \ + EMASK_M16) +#define ERROR_NF_DIMM_SPARE (EMASK_M29 | \ + EMASK_M28) +#define ERROR_NF_SPD_PROTOCOL (EMASK_M22) +#define ERROR_NF_NORTH_CRC (EMASK_M21) + +/* Recoverable errors */ +#define ERROR_NF_RECOVERABLE (EMASK_M26 | \ + EMASK_M25 | \ + EMASK_M24 | \ + EMASK_M15 | \ + EMASK_M14 | \ + EMASK_M13 | \ + EMASK_M12 | \ + EMASK_M11 | \ + EMASK_M9 | \ + EMASK_M8 | \ + EMASK_M7 | \ + EMASK_M5) + +/* uncorrectable errors */ +#define ERROR_NF_UNCORRECTABLE (EMASK_M4) + +/* mask to all non-fatal errors */ +#define ERROR_NF_MASK (ERROR_NF_CORRECTABLE | \ + ERROR_NF_UNCORRECTABLE | \ + ERROR_NF_RECOVERABLE | \ + ERROR_NF_DIMM_SPARE | \ + ERROR_NF_SPD_PROTOCOL | \ + ERROR_NF_NORTH_CRC) + +/* + * Define error masks for the several registers + */ + +/* Enable all fatal and non fatal errors */ +#define ENABLE_EMASK_ALL (ERROR_FAT_MASK | ERROR_NF_MASK) + +/* mask for fatal error registers */ +#define FERR_FAT_MASK ERROR_FAT_MASK + +/* masks for non-fatal error register */ +static inline int to_nf_mask(unsigned int mask) +{ + return (mask & EMASK_M29) | (mask >> 3); +}; + +static inline int from_nf_ferr(unsigned int mask) +{ + return (mask & EMASK_M29) | /* Bit 28 */ + (mask & ((1 << 28) - 1) << 3); /* Bits 0 to 27 */ +}; + +#define FERR_NF_MASK to_nf_mask(ERROR_NF_MASK) +#define FERR_NF_CORRECTABLE to_nf_mask(ERROR_NF_CORRECTABLE) +#define FERR_NF_DIMM_SPARE to_nf_mask(ERROR_NF_DIMM_SPARE) +#define FERR_NF_SPD_PROTOCOL to_nf_mask(ERROR_NF_SPD_PROTOCOL) +#define FERR_NF_NORTH_CRC to_nf_mask(ERROR_NF_NORTH_CRC) +#define FERR_NF_RECOVERABLE to_nf_mask(ERROR_NF_RECOVERABLE) +#define FERR_NF_UNCORRECTABLE to_nf_mask(ERROR_NF_UNCORRECTABLE) + +#endif + +/* Device name and register DID (Device ID) */ +struct i7300_dev_info { + const char *ctl_name; /* name for this device */ + u16 fsb_mapping_errors; /* DID for the branchmap,control */ +}; + +/* Table of devices attributes supported by this driver */ +static const struct i7300_dev_info i7300_devs[] = { + { + .ctl_name = "I7300", + .fsb_mapping_errors = PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, + }, +}; + +struct i7300_dimm_info { + int megabytes; /* size, 0 means not present */ +}; + +/* driver private data structure */ +struct i7300_pvt { + struct pci_dev *system_address; /* 16.0 */ + struct pci_dev *branchmap_werrors; /* 16.1 */ + struct pci_dev *fsb_error_regs; /* 16.2 */ + struct pci_dev *branch_pci[MAX_BRANCHES]; /* 21.0 and 22.0 */ + + u16 tolm; /* top of low memory */ + u64 ambase; /* AMB BAR */ + + u16 mir[MAX_MIR]; + + u16 mtr[MAX_SLOTS][MAX_BRANCHES]; /* Memory Technlogy Reg */ + u16 ambpresent[MAX_CHANNELS]; /* AMB present regs */ + + /* DIMM information matrix, allocating architecture maximums */ + struct i7300_dimm_info dimm_info[MAX_SLOTS][MAX_CHANNELS]; +}; + +#if 0 +/* I7300 MCH error information retrieved from Hardware */ +struct i7300_error_info { + /* These registers are always read from the MC */ + u32 ferr_fat_fbd; /* First Errors Fatal */ + u32 nerr_fat_fbd; /* Next Errors Fatal */ + u32 ferr_nf_fbd; /* First Errors Non-Fatal */ + u32 nerr_nf_fbd; /* Next Errors Non-Fatal */ + + /* These registers are input ONLY if there was a Recoverable Error */ + u32 redmemb; /* Recoverable Mem Data Error log B */ + u16 recmema; /* Recoverable Mem Error log A */ + u32 recmemb; /* Recoverable Mem Error log B */ + + /* These registers are input ONLY if there was a Non-Rec Error */ + u16 nrecmema; /* Non-Recoverable Mem log A */ + u16 nrecmemb; /* Non-Recoverable Mem log B */ + +}; +#endif + +/* FIXME: Why do we need to have this static? */ +static struct edac_pci_ctl_info *i7300_pci; + + +#if 0 +/* note that nrec_rdwr changed from NRECMEMA to NRECMEMB between the 5000 and + 5400 better to use an inline function than a macro in this case */ +static inline int nrec_bank(struct i7300_error_info *info) +{ + return ((info->nrecmema) >> 12) & 0x7; +} +static inline int nrec_rank(struct i7300_error_info *info) +{ + return ((info->nrecmema) >> 8) & 0xf; +} +static inline int nrec_buf_id(struct i7300_error_info *info) +{ + return ((info->nrecmema)) & 0xff; +} +static inline int nrec_rdwr(struct i7300_error_info *info) +{ + return (info->nrecmemb) >> 31; +} +/* This applies to both NREC and REC string so it can be used with nrec_rdwr + and rec_rdwr */ +static inline const char *rdwr_str(int rdwr) +{ + return rdwr ? "Write" : "Read"; +} +static inline int nrec_cas(struct i7300_error_info *info) +{ + return ((info->nrecmemb) >> 16) & 0x1fff; +} +static inline int nrec_ras(struct i7300_error_info *info) +{ + return (info->nrecmemb) & 0xffff; +} +static inline int rec_bank(struct i7300_error_info *info) +{ + return ((info->recmema) >> 12) & 0x7; +} +static inline int rec_rank(struct i7300_error_info *info) +{ + return ((info->recmema) >> 8) & 0xf; +} +static inline int rec_rdwr(struct i7300_error_info *info) +{ + return (info->recmemb) >> 31; +} +static inline int rec_cas(struct i7300_error_info *info) +{ + return ((info->recmemb) >> 16) & 0x1fff; +} +static inline int rec_ras(struct i7300_error_info *info) +{ + return (info->recmemb) & 0xffff; +} + +/* + * i7300_get_error_info Retrieve the hardware error information from + * the hardware and cache it in the 'info' + * structure + */ +static void i7300_get_error_info(struct mem_ctl_info *mci, + struct i7300_error_info *info) +{ + struct i7300_pvt *pvt; + u32 value; + + pvt = mci->pvt_info; + + /* read in the 1st FATAL error register */ + pci_read_config_dword(pvt->branchmap_werrors, FERR_FAT_FBD, &value); + + /* Mask only the bits that the doc says are valid + */ + value &= (FERR_FAT_FBDCHAN | FERR_FAT_MASK); + + /* If there is an error, then read in the + NEXT FATAL error register and the Memory Error Log Register A + */ + if (value & FERR_FAT_MASK) { + info->ferr_fat_fbd = value; + + /* harvest the various error data we need */ + pci_read_config_dword(pvt->branchmap_werrors, + NERR_FAT_FBD, &info->nerr_fat_fbd); + pci_read_config_word(pvt->branchmap_werrors, + NRECMEMA, &info->nrecmema); + pci_read_config_word(pvt->branchmap_werrors, + NRECMEMB, &info->nrecmemb); + + /* Clear the error bits, by writing them back */ + pci_write_config_dword(pvt->branchmap_werrors, + FERR_FAT_FBD, value); + } else { + info->ferr_fat_fbd = 0; + info->nerr_fat_fbd = 0; + info->nrecmema = 0; + info->nrecmemb = 0; + } + + /* read in the 1st NON-FATAL error register */ + pci_read_config_dword(pvt->branchmap_werrors, FERR_NF_FBD, &value); + + /* If there is an error, then read in the 1st NON-FATAL error + * register as well */ + if (value & FERR_NF_MASK) { + info->ferr_nf_fbd = value; + + /* harvest the various error data we need */ + pci_read_config_dword(pvt->branchmap_werrors, + NERR_NF_FBD, &info->nerr_nf_fbd); + pci_read_config_word(pvt->branchmap_werrors, + RECMEMA, &info->recmema); + pci_read_config_dword(pvt->branchmap_werrors, + RECMEMB, &info->recmemb); + pci_read_config_dword(pvt->branchmap_werrors, + REDMEMB, &info->redmemb); + + /* Clear the error bits, by writing them back */ + pci_write_config_dword(pvt->branchmap_werrors, + FERR_NF_FBD, value); + } else { + info->ferr_nf_fbd = 0; + info->nerr_nf_fbd = 0; + info->recmema = 0; + info->recmemb = 0; + info->redmemb = 0; + } +} + +/* + * i7300_proccess_non_recoverable_info(struct mem_ctl_info *mci, + * struct i7300_error_info *info, + * int handle_errors); + * + * handle the Intel FATAL and unrecoverable errors, if any + */ +static void i7300_proccess_non_recoverable_info(struct mem_ctl_info *mci, + struct i7300_error_info *info, + unsigned long allErrors) +{ + char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80]; + int branch; + int channel; + int bank; + int buf_id; + int rank; + int rdwr; + int ras, cas; + int errnum; + char *type = NULL; + + if (!allErrors) + return; /* if no error, return now */ + + if (allErrors & ERROR_FAT_MASK) + type = "FATAL"; + else if (allErrors & FERR_NF_UNCORRECTABLE) + type = "NON-FATAL uncorrected"; + else + type = "NON-FATAL recoverable"; + + /* ONLY ONE of the possible error bits will be set, as per the docs */ + + branch = extract_fbdchan_indx(info->ferr_fat_fbd); + channel = branch; + + /* Use the NON-Recoverable macros to extract data */ + bank = nrec_bank(info); + rank = nrec_rank(info); + buf_id = nrec_buf_id(info); + rdwr = nrec_rdwr(info); + ras = nrec_ras(info); + cas = nrec_cas(info); + + debugf0("\t\tCSROW= %d Channels= %d,%d (Branch= %d " + "DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n", + rank, channel, channel + 1, branch >> 1, bank, + buf_id, rdwr_str(rdwr), ras, cas); + + /* Only 1 bit will be on */ + errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); + + /* Form out message */ + snprintf(msg, sizeof(msg), + "%s (Branch=%d DRAM-Bank=%d Buffer ID = %d RDWR=%s " + "RAS=%d CAS=%d %s Err=0x%lx (%s))", + type, branch >> 1, bank, buf_id, rdwr_str(rdwr), ras, cas, + type, allErrors, error_name[errnum]); + + /* Call the helper to output message */ + edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); +} + +/* + * i7300_process_fatal_error_info(struct mem_ctl_info *mci, + * struct i7300_error_info *info, + * int handle_errors); + * + * handle the Intel NON-FATAL errors, if any + */ +static void i7300_process_nonfatal_error_info(struct mem_ctl_info *mci, + struct i7300_error_info *info) +{ + char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80]; + unsigned long allErrors; + int branch; + int channel; + int bank; + int rank; + int rdwr; + int ras, cas; + int errnum; + + /* mask off the Error bits that are possible */ + allErrors = from_nf_ferr(info->ferr_nf_fbd & FERR_NF_MASK); + if (!allErrors) + return; /* if no error, return now */ + + /* ONLY ONE of the possible error bits will be set, as per the docs */ + + if (allErrors & (ERROR_NF_UNCORRECTABLE | ERROR_NF_RECOVERABLE)) { + i7300_proccess_non_recoverable_info(mci, info, allErrors); + return; + } + + /* Correctable errors */ + if (allErrors & ERROR_NF_CORRECTABLE) { + debugf0("\tCorrected bits= 0x%lx\n", allErrors); + + branch = extract_fbdchan_indx(info->ferr_nf_fbd); + + channel = 0; + if (REC_ECC_LOCATOR_ODD(info->redmemb)) + channel = 1; + + /* Convert channel to be based from zero, instead of + * from branch base of 0 */ + channel += branch; + + bank = rec_bank(info); + rank = rec_rank(info); + rdwr = rec_rdwr(info); + ras = rec_ras(info); + cas = rec_cas(info); + + /* Only 1 bit will be on */ + errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); + + debugf0("\t\tCSROW= %d Channel= %d (Branch %d " + "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n", + rank, channel, branch >> 1, bank, + rdwr_str(rdwr), ras, cas); + + /* Form out message */ + snprintf(msg, sizeof(msg), + "Corrected error (Branch=%d DRAM-Bank=%d RDWR=%s " + "RAS=%d CAS=%d, CE Err=0x%lx (%s))", + branch >> 1, bank, rdwr_str(rdwr), ras, cas, + allErrors, error_name[errnum]); + + /* Call the helper to output message */ + edac_mc_handle_fbd_ce(mci, rank, channel, msg); + + return; + } + + /* Miscelaneous errors */ + errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); + + branch = extract_fbdchan_indx(info->ferr_nf_fbd); + + i7300_mc_printk(mci, KERN_EMERG, + "Non-Fatal misc error (Branch=%d Err=%#lx (%s))", + branch >> 1, allErrors, error_name[errnum]); +} + +/* + * i7300_process_error_info Process the error info that is + * in the 'info' structure, previously retrieved from hardware + */ +static void i7300_process_error_info(struct mem_ctl_info *mci, + struct i7300_error_info *info) +{ u32 allErrors; + + /* First handle any fatal errors that occurred */ + allErrors = (info->ferr_fat_fbd & FERR_FAT_MASK); + i7300_proccess_non_recoverable_info(mci, info, allErrors); + + /* now handle any non-fatal errors that occurred */ + i7300_process_nonfatal_error_info(mci, info); +} + +/* + * i7300_clear_error Retrieve any error from the hardware + * but do NOT process that error. + * Used for 'clearing' out of previous errors + * Called by the Core module. + */ +static void i7300_clear_error(struct mem_ctl_info *mci) +{ + struct i7300_error_info info; + + i7300_get_error_info(mci, &info); +} + +/* + * i7300_check_error Retrieve and process errors reported by the + * hardware. Called by the Core module. + */ +static void i7300_check_error(struct mem_ctl_info *mci) +{ + struct i7300_error_info info; + debugf4("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__); + i7300_get_error_info(mci, &info); + i7300_process_error_info(mci, &info); +} + +/* + * i7300_enable_error_reporting + * Turn on the memory reporting features of the hardware + */ +static void i7300_enable_error_reporting(struct mem_ctl_info *mci) +{ + struct i7300_pvt *pvt; + u32 fbd_error_mask; + + pvt = mci->pvt_info; + + /* Read the FBD Error Mask Register */ + pci_read_config_dword(pvt->branchmap_werrors, EMASK_FBD, + &fbd_error_mask); + + /* Enable with a '0' */ + fbd_error_mask &= ~(ENABLE_EMASK_ALL); + + pci_write_config_dword(pvt->branchmap_werrors, EMASK_FBD, + fbd_error_mask); +} +#endif + +/* + * determine_mtr(pvt, csrow, channel) + * + * return the proper MTR register as determine by the csrow and desired channel + */ +static int decode_mtr(struct i7300_pvt *pvt, + int slot, int ch, int branch, + struct i7300_dimm_info *dinfo, + struct csrow_info *p_csrow) +{ + int mtr, ans, addrBits, channel; + + channel = to_channel(ch, branch); + + mtr = pvt->mtr[slot][branch]; + ans = MTR_DIMMS_PRESENT(mtr) ? 1 : 0; + + debugf2("\tMTR%d CH%d: DIMMs are %s (mtr)\n", + slot, channel, + ans ? "Present" : "NOT Present"); + + /* Determine if there is a DIMM present in this DIMM slot */ + +#if 0 + if (!amb_present || !ans) + return 0; +#else + if (!ans) + return 0; +#endif + + /* Start with the number of bits for a Bank + * on the DRAM */ + addrBits = MTR_DRAM_BANKS_ADDR_BITS; + /* Add thenumber of ROW bits */ + addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr); + /* add the number of COLUMN bits */ + addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr); + /* add the number of RANK bits */ + addrBits += MTR_DIMM_RANKS(mtr); + + addrBits += 6; /* add 64 bits per DIMM */ + addrBits -= 20; /* divide by 2^^20 */ + addrBits -= 3; /* 8 bits per bytes */ + + dinfo->megabytes = 1 << addrBits; + + debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); + + debugf2("\t\tELECTRICAL THROTTLING is %s\n", + MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled"); + + debugf2("\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); + debugf2("\t\tNUMRANK: %s\n", MTR_DIMM_RANKS(mtr) ? "double" : "single"); + debugf2("\t\tNUMROW: %s\n", numrow_toString[MTR_DIMM_ROWS(mtr)]); + debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]); + debugf2("\t\tSIZE: %d MB\n", dinfo->megabytes); + + p_csrow->grain = 8; + p_csrow->nr_pages = dinfo->megabytes << 8; + p_csrow->mtype = MEM_FB_DDR2; + p_csrow->edac_mode = EDAC_S8ECD8ED; + + /* ask what device type on this row */ + if (MTR_DRAM_WIDTH(mtr)) + p_csrow->dtype = DEV_X8; + else + p_csrow->dtype = DEV_X4; + + return mtr; +} + +/* + * print_dimm_size + * + * also will output a DIMM matrix map, if debug is enabled, for viewing + * how the DIMMs are populated + */ +static void print_dimm_size(struct i7300_pvt *pvt) +{ + struct i7300_dimm_info *dinfo; + char *p, *mem_buffer; + int space, n; + int channel, slot; + + space = PAGE_SIZE; + mem_buffer = p = kmalloc(space, GFP_KERNEL); + if (p == NULL) { + i7300_printk(KERN_ERR, "MC: %s:%s() kmalloc() failed\n", + __FILE__, __func__); + return; + } + + n = snprintf(p, space, " "); + p += n; + space -= n; + for (channel = 0; channel < MAX_CHANNELS; channel++) { + n = snprintf(p, space, "channel %d | ", channel); + p += n; + space -= n; + } + debugf2("%s\n", mem_buffer); + p = mem_buffer; + space = PAGE_SIZE; + n = snprintf(p, space, "-------------------------------" + "------------------------------"); + p += n; + space -= n; + debugf2("%s\n", mem_buffer); + p = mem_buffer; + space = PAGE_SIZE; + + for (slot = 0; slot < MAX_SLOTS; slot++) { + n = snprintf(p, space, "csrow/SLOT %d ", slot); + p += n; + space -= n; + + for (channel = 0; channel < MAX_CHANNELS; channel++) { + dinfo = &pvt->dimm_info[slot][channel]; + n = snprintf(p, space, "%4d MB | ", dinfo->megabytes); + p += n; + space -= n; + } + + debugf2("%s\n", mem_buffer); + p = mem_buffer; + space = PAGE_SIZE; + } + + n = snprintf(p, space, "-------------------------------" + "------------------------------"); + p += n; + space -= n; + debugf2("%s\n", mem_buffer); + p = mem_buffer; + space = PAGE_SIZE; + + kfree(mem_buffer); +} + +/* + * i7300_init_csrows Initialize the 'csrows' table within + * the mci control structure with the + * addressing of memory. + * + * return: + * 0 success + * 1 no actual memory found on this MC + */ +static int i7300_init_csrows(struct mem_ctl_info *mci) +{ + struct i7300_pvt *pvt; + struct i7300_dimm_info *dinfo; + struct csrow_info *p_csrow; + int empty; + int mtr; + int ch, branch, slot, channel; + + pvt = mci->pvt_info; + + empty = 1; /* Assume NO memory */ + + debugf2("Memory Technology Registers:\n"); + + /* Get the AMB present registers for the four channels */ + for (branch = 0; branch < MAX_BRANCHES; branch++) { + /* Read and dump branch 0's MTRs */ + channel = to_channel(0, branch); + pci_read_config_word(pvt->branch_pci[branch], AMBPRESENT_0, + &pvt->ambpresent[channel]); + debugf2("\t\tAMB-present CH%d = 0x%x:\n", + channel, pvt->ambpresent[channel]); + + channel = to_channel(1, branch); + pci_read_config_word(pvt->branch_pci[branch], AMBPRESENT_1, + &pvt->ambpresent[channel]); + debugf2("\t\tAMB-present CH%d = 0x%x:\n", + channel, pvt->ambpresent[channel]); + } + + /* Get the set of MTR[0-7] regs by each branch */ + for (slot = 0; slot < MAX_SLOTS; slot++) { + int where = mtr_regs[slot]; + for (branch = 0; branch < MAX_BRANCHES; branch++) { + pci_read_config_word(pvt->branch_pci[branch], + where, + &pvt->mtr[slot][branch]); + for (ch = 0; ch < MAX_BRANCHES; ch++) { + int channel = to_channel(ch, branch); + + dinfo = &pvt->dimm_info[slot][channel]; + p_csrow = &mci->csrows[slot]; + + mtr = decode_mtr(pvt, slot, ch, branch, + dinfo, p_csrow); + /* if no DIMMS on this row, continue */ + if (!MTR_DIMMS_PRESENT(mtr)) + continue; + + p_csrow->csrow_idx = slot; + + /* FAKE OUT VALUES, FIXME */ + p_csrow->first_page = 0 + slot * 20; + p_csrow->last_page = 9 + slot * 20; + p_csrow->page_mask = 0xfff; + + empty = 0; + } + } + } + + return empty; +} + +static void decode_mir(int mir_no, u16 mir[MAX_MIR]) +{ + if (mir[mir_no] & 3) + debugf2("MIR%d: limit= 0x%x Branch(es) that participate: %s %s\n", + mir_no, + (mir[mir_no] >> 4) & 0xfff, + (mir[mir_no] & 1) ? "B0" : "", + (mir[mir_no] & 2) ? "B1": ""); +} + +/* + * i7300_get_mc_regs read in the necessary registers and + * cache locally + * + * Fills in the private data members + */ +static int i7300_get_mc_regs(struct mem_ctl_info *mci) +{ + struct i7300_pvt *pvt; + u32 actual_tolm; + int i, rc; + + pvt = mci->pvt_info; + + pci_read_config_dword(pvt->system_address, AMBASE, + (u32 *) &pvt->ambase); + + debugf2("AMBASE= 0x%lx\n", (long unsigned int)pvt->ambase); + + /* Get the Branch Map regs */ + pci_read_config_word(pvt->branchmap_werrors, TOLM, &pvt->tolm); + pvt->tolm >>= 12; + debugf2("TOLM (number of 256M regions) =%u (0x%x)\n", pvt->tolm, + pvt->tolm); + + actual_tolm = (u32) ((1000l * pvt->tolm) >> (30 - 28)); + debugf2("Actual TOLM byte addr=%u.%03u GB (0x%x)\n", + actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28); + + pci_read_config_word(pvt->branchmap_werrors, MIR0, &pvt->mir[0]); + pci_read_config_word(pvt->branchmap_werrors, MIR1, &pvt->mir[1]); + pci_read_config_word(pvt->branchmap_werrors, MIR2, &pvt->mir[2]); + + /* Decode the MIR regs */ + for (i = 0; i < MAX_MIR; i++) + decode_mir(i, pvt->mir); + + rc = i7300_init_csrows(mci); + if (rc < 0) + return rc; + + /* Go and determine the size of each DIMM and place in an + * orderly matrix */ + print_dimm_size(pvt); + + return 0; +} + +/* + * i7300_put_devices 'put' all the devices that we have + * reserved via 'get' + */ +static void i7300_put_devices(struct mem_ctl_info *mci) +{ + struct i7300_pvt *pvt; + int branch; + + pvt = mci->pvt_info; + + /* Decrement usage count for devices */ + for (branch = 0; branch < MAX_CH_PER_BRANCH; branch++) + pci_dev_put(pvt->branch_pci[branch]); + pci_dev_put(pvt->fsb_error_regs); + pci_dev_put(pvt->branchmap_werrors); +} + +/* + * i7300_get_devices Find and perform 'get' operation on the MCH's + * device/functions we want to reference for this driver + * + * Need to 'get' device 16 func 1 and func 2 + */ +static int i7300_get_devices(struct mem_ctl_info *mci, int dev_idx) +{ + struct i7300_pvt *pvt; + struct pci_dev *pdev; + + pvt = mci->pvt_info; + + /* Attempt to 'get' the MCH register we want */ + pdev = NULL; + while (!pvt->branchmap_werrors || !pvt->fsb_error_regs) { + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, pdev); + if (!pdev) { + /* End of list, leave */ + i7300_printk(KERN_ERR, + "'system address,Process Bus' " + "device not found:" + "vendor 0x%x device 0x%x ERR funcs " + "(broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7300_MCH_ERR); + goto error; + } + + /* Store device 16 funcs 1 and 2 */ + switch (PCI_FUNC(pdev->devfn)) { + case 1: + pvt->branchmap_werrors = pdev; + break; + case 2: + pvt->fsb_error_regs = pdev; + break; + } + } + + debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n", + pci_name(pvt->system_address), + pvt->system_address->vendor, pvt->system_address->device); + debugf1("Branchmap, control and errors - PCI Bus ID: %s %x:%x\n", + pci_name(pvt->branchmap_werrors), + pvt->branchmap_werrors->vendor, pvt->branchmap_werrors->device); + debugf1("FSB Error Regs - PCI Bus ID: %s %x:%x\n", + pci_name(pvt->fsb_error_regs), + pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device); + + pvt->branch_pci[0] = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7300_MCH_FB0, + NULL); + if (!pvt->branch_pci[0]) { + i7300_printk(KERN_ERR, + "MC: 'BRANCH 0' device not found:" + "vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_FB0); + goto error; + } + + pvt->branch_pci[1] = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7300_MCH_FB1, + NULL); + if (!pvt->branch_pci[1]) { + i7300_printk(KERN_ERR, + "MC: 'BRANCH 1' device not found:" + "vendor 0x%x device 0x%x Func 0 " + "(broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7300_MCH_FB1); + goto error; + } + + return 0; + +error: + i7300_put_devices(mci); + return -ENODEV; +} + +/* + * i7300_probe1 Probe for ONE instance of device to see if it is + * present. + * return: + * 0 for FOUND a device + * < 0 for error code + */ +static int i7300_probe1(struct pci_dev *pdev, int dev_idx) +{ + struct mem_ctl_info *mci; + struct i7300_pvt *pvt; + int num_channels; + int num_dimms_per_channel; + int num_csrows; + + if (dev_idx >= ARRAY_SIZE(i7300_devs)) + return -EINVAL; + + debugf0("MC: " __FILE__ ": %s(), pdev bus %u dev=0x%x fn=0x%x\n", + __func__, + pdev->bus->number, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + + /* We only are looking for func 0 of the set */ + if (PCI_FUNC(pdev->devfn) != 0) + return -ENODEV; + + /* As we don't have a motherboard identification routine to determine + * actual number of slots/dimms per channel, we thus utilize the + * resource as specified by the chipset. Thus, we might have + * have more DIMMs per channel than actually on the mobo, but this + * allows the driver to support upto the chipset max, without + * some fancy mobo determination. + */ + num_dimms_per_channel = MAX_SLOTS; + num_channels = MAX_CHANNELS; + num_csrows = MAX_SLOTS * MAX_CHANNELS; + + debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n", + __func__, num_channels, num_dimms_per_channel, num_csrows); + + /* allocate a new MC control structure */ + mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0); + + if (mci == NULL) + return -ENOMEM; + + debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci); + + mci->dev = &pdev->dev; /* record ptr to the generic device */ + + pvt = mci->pvt_info; + pvt->system_address = pdev; /* Record this device in our private */ + + /* 'get' the pci devices we want to reserve for our use */ + if (i7300_get_devices(mci, dev_idx)) + goto fail0; + + mci->mc_idx = 0; + mci->mtype_cap = MEM_FLAG_FB_DDR2; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + mci->edac_cap = EDAC_FLAG_NONE; + mci->mod_name = "i7300_edac.c"; + mci->mod_ver = I7300_REVISION; + mci->ctl_name = i7300_devs[dev_idx].ctl_name; + mci->dev_name = pci_name(pdev); + mci->ctl_page_to_phys = NULL; + +#if 0 + /* Set the function pointer to an actual operation function */ + mci->edac_check = i7300_check_error; +#endif + + /* initialize the MC control structure 'csrows' table + * with the mapping and control information */ + if (i7300_get_mc_regs(mci)) { + debugf0("MC: Setting mci->edac_cap to EDAC_FLAG_NONE\n" + " because i7300_init_csrows() returned nonzero " + "value\n"); + mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */ + } else { +#if 0 + debugf1("MC: Enable error reporting now\n"); + i7300_enable_error_reporting(mci); +#endif + } + + /* add this new MC control structure to EDAC's list of MCs */ + if (edac_mc_add_mc(mci)) { + debugf0("MC: " __FILE__ + ": %s(): failed edac_mc_add_mc()\n", __func__); + /* FIXME: perhaps some code should go here that disables error + * reporting if we just enabled it + */ + goto fail1; + } + +#if 0 + i7300_clear_error(mci); +#endif + + /* allocating generic PCI control info */ + i7300_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); + if (!i7300_pci) { + printk(KERN_WARNING + "%s(): Unable to create PCI control\n", + __func__); + printk(KERN_WARNING + "%s(): PCI error report via EDAC not setup\n", + __func__); + } + + return 0; + + /* Error exit unwinding stack */ +fail1: + + i7300_put_devices(mci); + +fail0: + edac_mc_free(mci); + return -ENODEV; +} + +/* + * i7300_init_one constructor for one instance of device + * + * returns: + * negative on error + * count (>= 0) + */ +static int __devinit i7300_init_one(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + int rc; + + debugf0("MC: " __FILE__ ": %s()\n", __func__); + + /* wake up device */ + rc = pci_enable_device(pdev); + if (rc == -EIO) + return rc; + + /* now probe and enable the device */ + return i7300_probe1(pdev, id->driver_data); +} + +/* + * i7300_remove_one destructor for one instance of device + * + */ +static void __devexit i7300_remove_one(struct pci_dev *pdev) +{ + struct mem_ctl_info *mci; + + debugf0(__FILE__ ": %s()\n", __func__); + + if (i7300_pci) + edac_pci_release_generic_ctl(i7300_pci); + + mci = edac_mc_del_mc(&pdev->dev); + if (!mci) + return; + + /* retrieve references to resources, and free those resources */ + i7300_put_devices(mci); + + edac_mc_free(mci); +} + +/* + * pci_device_id table for which devices we are looking for + * + * The "E500P" device is the first device supported. + */ +static const struct pci_device_id i7300_pci_tbl[] __devinitdata = { + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_ERR)}, + {0,} /* 0 terminated list. */ +}; + +MODULE_DEVICE_TABLE(pci, i7300_pci_tbl); + +/* + * i7300_driver pci_driver structure for this module + * + */ +static struct pci_driver i7300_driver = { + .name = "i7300_edac", + .probe = i7300_init_one, + .remove = __devexit_p(i7300_remove_one), + .id_table = i7300_pci_tbl, +}; + +/* + * i7300_init Module entry function + * Try to initialize this module for its devices + */ +static int __init i7300_init(void) +{ + int pci_rc; + + debugf2("MC: " __FILE__ ": %s()\n", __func__); + + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + + pci_rc = pci_register_driver(&i7300_driver); + + return (pci_rc < 0) ? pci_rc : 0; +} + +/* + * i7300_exit() Module exit function + * Unregister the driver + */ +static void __exit i7300_exit(void) +{ + debugf2("MC: " __FILE__ ": %s()\n", __func__); + pci_unregister_driver(&i7300_driver); +} + +module_init(i7300_init); +module_exit(i7300_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Mauro Carvalho Chehab "); +MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); +MODULE_DESCRIPTION("MC Driver for Intel I7300 memory controllers - " + I7300_REVISION); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f6a3b2d36cad..2eabe311f0d3 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -815,7 +815,7 @@ #define PCI_VENDOR_ID_ANIGMA 0x1051 #define PCI_DEVICE_ID_ANIGMA_MC145575 0x0100 - + #define PCI_VENDOR_ID_EFAR 0x1055 #define PCI_DEVICE_ID_EFAR_SLC90E66_1 0x9130 #define PCI_DEVICE_ID_EFAR_SLC90E66_3 0x9463 @@ -1446,7 +1446,7 @@ #define PCI_VENDOR_ID_ZIATECH 0x1138 #define PCI_DEVICE_ID_ZIATECH_5550_HC 0x5550 - + #define PCI_VENDOR_ID_SYSKONNECT 0x1148 #define PCI_DEVICE_ID_SYSKONNECT_TR 0x4200 @@ -1600,8 +1600,8 @@ #define PCI_DEVICE_ID_RP8OCTA 0x0005 #define PCI_DEVICE_ID_RP8J 0x0006 #define PCI_DEVICE_ID_RP4J 0x0007 -#define PCI_DEVICE_ID_RP8SNI 0x0008 -#define PCI_DEVICE_ID_RP16SNI 0x0009 +#define PCI_DEVICE_ID_RP8SNI 0x0008 +#define PCI_DEVICE_ID_RP16SNI 0x0009 #define PCI_DEVICE_ID_RPP4 0x000A #define PCI_DEVICE_ID_RPP8 0x000B #define PCI_DEVICE_ID_RP4M 0x000D @@ -1611,9 +1611,9 @@ #define PCI_DEVICE_ID_URP8INTF 0x0802 #define PCI_DEVICE_ID_URP16INTF 0x0803 #define PCI_DEVICE_ID_URP8OCTA 0x0805 -#define PCI_DEVICE_ID_UPCI_RM3_8PORT 0x080C +#define PCI_DEVICE_ID_UPCI_RM3_8PORT 0x080C #define PCI_DEVICE_ID_UPCI_RM3_4PORT 0x080D -#define PCI_DEVICE_ID_CRP16INTF 0x0903 +#define PCI_DEVICE_ID_CRP16INTF 0x0903 #define PCI_VENDOR_ID_CYCLADES 0x120e #define PCI_DEVICE_ID_CYCLOM_Y_Lo 0x0100 @@ -2139,7 +2139,7 @@ #define PCI_DEVICE_ID_RASTEL_2PORT 0x2000 #define PCI_VENDOR_ID_ZOLTRIX 0x15b0 -#define PCI_DEVICE_ID_ZOLTRIX_2BD0 0x2bd0 +#define PCI_DEVICE_ID_ZOLTRIX_2BD0 0x2bd0 #define PCI_VENDOR_ID_MELLANOX 0x15b3 #define PCI_DEVICE_ID_MELLANOX_TAVOR 0x5a44 @@ -2413,7 +2413,7 @@ #define PCI_DEVICE_ID_INTEL_82815_MC 0x1130 #define PCI_DEVICE_ID_INTEL_82815_CGC 0x1132 #define PCI_DEVICE_ID_INTEL_82092AA_0 0x1221 -#define PCI_DEVICE_ID_INTEL_7505_0 0x2550 +#define PCI_DEVICE_ID_INTEL_7505_0 0x2550 #define PCI_DEVICE_ID_INTEL_7205_0 0x255d #define PCI_DEVICE_ID_INTEL_82437 0x122d #define PCI_DEVICE_ID_INTEL_82371FB_0 0x122e @@ -2616,6 +2616,9 @@ #define PCI_DEVICE_ID_INTEL_MCH_PC 0x3599 #define PCI_DEVICE_ID_INTEL_MCH_PC1 0x359a #define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e +#define PCI_DEVICE_ID_INTEL_I7300_MCH_ERR 0x360c +#define PCI_DEVICE_ID_INTEL_I7300_MCH_FB0 0x360f +#define PCI_DEVICE_ID_INTEL_I7300_MCH_FB1 0x3610 #define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b #define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c #define PCI_DEVICE_ID_INTEL_IOAT_JSF0 0x3710 -- cgit v1.2.3 From dca43c75e7e545694a9dd6288553f55c53e2a3a3 Mon Sep 17 00:00:00 2001 From: Jerry Chu Date: Fri, 27 Aug 2010 19:13:28 +0000 Subject: tcp: Add TCP_USER_TIMEOUT socket option. This patch provides a "user timeout" support as described in RFC793. The socket option is also needed for the the local half of RFC5482 "TCP User Timeout Option". TCP_USER_TIMEOUT is a TCP level socket option that takes an unsigned int, when > 0, to specify the maximum amount of time in ms that transmitted data may remain unacknowledged before TCP will forcefully close the corresponding connection and return ETIMEDOUT to the application. If 0 is given, TCP will continue to use the system default. Increasing the user timeouts allows a TCP connection to survive extended periods without end-to-end connectivity. Decreasing the user timeouts allows applications to "fail fast" if so desired. Otherwise it may take upto 20 minutes with the current system defaults in a normal WAN environment. The socket option can be made during any state of a TCP connection, but is only effective during the synchronized states of a connection (ESTABLISHED, FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, or LAST-ACK). Moreover, when used with the TCP keepalive (SO_KEEPALIVE) option, TCP_USER_TIMEOUT will overtake keepalive to determine when to close a connection due to keepalive failure. The option does not change in anyway when TCP retransmits a packet, nor when a keepalive probe will be sent. This option, like many others, will be inherited by an acceptor from its listener. Signed-off-by: H.K. Jerry Chu Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + include/net/inet_connection_sock.h | 1 + net/ipv4/tcp.c | 11 ++++++++++- net/ipv4/tcp_timer.c | 40 ++++++++++++++++++++++++-------------- 4 files changed, 37 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a778ee024590..e64f4c67d0ef 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -105,6 +105,7 @@ enum { #define TCP_COOKIE_TRANSACTIONS 15 /* TCP Cookie Transactions */ #define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ #define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */ +#define TCP_USER_TIMEOUT 18 /* How long for loss retry before timeout */ /* for TCP_INFO socket option */ #define TCPI_OPT_TIMESTAMPS 1 diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index b6d3b55da19b..e4f494b42e06 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -125,6 +125,7 @@ struct inet_connection_sock { int probe_size; } icsk_mtup; u32 icsk_ca_priv[16]; + u32 icsk_user_timeout; #define ICSK_CA_PRIV_SIZE (16 * sizeof(u32)) }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 176e11aaea77..cf3254528753 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2391,7 +2391,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level, err = tp->af_specific->md5_parse(sk, optval, optlen); break; #endif - + case TCP_USER_TIMEOUT: + /* Cap the max timeout in ms TCP will retry/retrans + * before giving up and aborting (ETIMEDOUT) a connection. + */ + icsk->icsk_user_timeout = msecs_to_jiffies(val); + break; default: err = -ENOPROTOOPT; break; @@ -2610,6 +2615,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_THIN_DUPACK: val = tp->thin_dupack; break; + + case TCP_USER_TIMEOUT: + val = jiffies_to_msecs(icsk->icsk_user_timeout); + break; default: return -ENOPROTOOPT; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 808bb920c9f5..11569deccbea 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -138,10 +138,10 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) * retransmissions with an initial RTO of TCP_RTO_MIN. */ static bool retransmits_timed_out(struct sock *sk, - unsigned int boundary) + unsigned int boundary, + unsigned int timeout) { - unsigned int timeout, linear_backoff_thresh; - unsigned int start_ts; + unsigned int linear_backoff_thresh, start_ts; if (!inet_csk(sk)->icsk_retransmits) return false; @@ -151,14 +151,15 @@ static bool retransmits_timed_out(struct sock *sk, else start_ts = tcp_sk(sk)->retrans_stamp; - linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); - - if (boundary <= linear_backoff_thresh) - timeout = ((2 << boundary) - 1) * TCP_RTO_MIN; - else - timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN + - (boundary - linear_backoff_thresh) * TCP_RTO_MAX; + if (likely(timeout == 0)) { + linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); + if (boundary <= linear_backoff_thresh) + timeout = ((2 << boundary) - 1) * TCP_RTO_MIN; + else + timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN + + (boundary - linear_backoff_thresh) * TCP_RTO_MAX; + } return (tcp_time_stamp - start_ts) >= timeout; } @@ -174,7 +175,7 @@ static int tcp_write_timeout(struct sock *sk) dst_negative_advice(sk); retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; } else { - if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { + if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0)) { /* Black hole detection */ tcp_mtu_probing(icsk, sk); @@ -187,14 +188,16 @@ static int tcp_write_timeout(struct sock *sk) retry_until = tcp_orphan_retries(sk, alive); do_reset = alive || - !retransmits_timed_out(sk, retry_until); + !retransmits_timed_out(sk, retry_until, 0); if (tcp_out_of_resources(sk, do_reset)) return 1; } } - if (retransmits_timed_out(sk, retry_until)) { + if (retransmits_timed_out(sk, retry_until, + (1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV) ? 0 : + icsk->icsk_user_timeout)) { /* Has it gone just too far? */ tcp_write_err(sk); return 1; @@ -436,7 +439,7 @@ out_reset_timer: icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); - if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) + if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0)) __sk_dst_reset(sk); out:; @@ -556,7 +559,14 @@ static void tcp_keepalive_timer (unsigned long data) elapsed = keepalive_time_elapsed(tp); if (elapsed >= keepalive_time_when(tp)) { - if (icsk->icsk_probes_out >= keepalive_probes(tp)) { + /* If the TCP_USER_TIMEOUT option is enabled, use that + * to determine when to timeout instead. + */ + if ((icsk->icsk_user_timeout != 0 && + elapsed >= icsk->icsk_user_timeout && + icsk->icsk_probes_out > 0) || + (icsk->icsk_user_timeout == 0 && + icsk->icsk_probes_out >= keepalive_probes(tp))) { tcp_send_active_reset(sk, GFP_ATOMIC); tcp_write_err(sk); goto out; -- cgit v1.2.3 From 22b71c8f4f3db8df92f5e7b081c265bc56c0bd2f Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 29 Aug 2010 19:23:12 +0000 Subject: tcp/dccp: Consolidate common code for RFC 3390 conversion This patch consolidates initial-window code common to TCP and CCID-2: * TCP uses RFC 3390 in a packet-oriented manner (tcp_input.c) and * CCID-2 uses RFC 3390 in packet-oriented manner (RFC 4341). Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/net/tcp.h | 15 +++++++++++++++ net/dccp/ccids/ccid2.c | 8 ++------ net/ipv4/tcp_input.c | 17 ++--------------- 3 files changed, 19 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index df6a2eb20193..a64022199b62 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -779,6 +779,21 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) /* Use define here intentionally to get WARN_ON location shown at the caller */ #define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out) +/* + * Convert RFC 3390 larger initial window into an equivalent number of packets. + * + * John Heffner states: + * + * The RFC specifies a window of no more than 4380 bytes + * unless 2*MSS > 4380. Reading the pseudocode in the RFC + * is a bit misleading because they use a clamp at 4380 bytes + * rather than a multiplier in the relevant range. + */ +static inline u32 rfc3390_bytes_to_packets(const u32 smss) +{ + return smss <= 1095 ? 4 : (smss > 1460 ? 2 : 3); +} + extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 8c95813bcc67..b9c942a09c98 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -641,12 +641,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ hc->tx_ssthresh = ~0U; - /* - * RFC 4341, 5: "The cwnd parameter is initialized to at most four - * packets for new connections, following the rules from [RFC3390]". - * We need to convert the bytes of RFC3390 into the packets of RFC 4341. - */ - hc->tx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U); + /* Use larger initial windows (RFC 4341, section 5). */ + hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache); /* Make sure that Ack Ratio is enabled and within bounds. */ max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e663b78a2ef6..1bc87a05c734 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -805,25 +805,12 @@ void tcp_update_metrics(struct sock *sk) } } -/* Numbers are taken from RFC3390. - * - * John Heffner states: - * - * The RFC specifies a window of no more than 4380 bytes - * unless 2*MSS > 4380. Reading the pseudocode in the RFC - * is a bit misleading because they use a clamp at 4380 bytes - * rather than use a multiplier in the relevant range. - */ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) { __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); - if (!cwnd) { - if (tp->mss_cache > 1460) - cwnd = 2; - else - cwnd = (tp->mss_cache > 1095) ? 3 : 4; - } + if (!cwnd) + cwnd = rfc3390_bytes_to_packets(tp->mss_cache); return min_t(__u32, cwnd, tp->snd_cwnd_clamp); } -- cgit v1.2.3 From 3d5b99ae82f8742e3bb1f8634fd11ac36ea19ee1 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 29 Aug 2010 19:27:34 +0000 Subject: TCP: update initial windows according to RFC 5681 This updates the use of larger initial windows, as originally specified in RFC 3390, to use the newer IW values specified in RFC 5681, section 3.1. The changes made in RFC 5681 are: a) the setting now is more clearly specified in units of segments (as the comments by John Heffner emphasized, this was not very clear in RFC 3390); b) for connections with 1095 < SMSS <= 2190 there is now a change: - RFC 3390 says that IW <= 4380, - RFC 5681 says that IW = 3 * SMSS <= 6570. Since RFC 3390 is older and "only" proposed standard, whereas the newer RFC 5681 is already draft standard, it seems preferable to use the newer IW variant. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/net/tcp.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index a64022199b62..11dbacc886c0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -781,17 +781,11 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) /* * Convert RFC 3390 larger initial window into an equivalent number of packets. - * - * John Heffner states: - * - * The RFC specifies a window of no more than 4380 bytes - * unless 2*MSS > 4380. Reading the pseudocode in the RFC - * is a bit misleading because they use a clamp at 4380 bytes - * rather than a multiplier in the relevant range. + * This is based on the numbers specified in RFC 5681, 3.1. */ static inline u32 rfc3390_bytes_to_packets(const u32 smss) { - return smss <= 1095 ? 4 : (smss > 1460 ? 2 : 3); + return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3); } extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); -- cgit v1.2.3 From 4dc89133f49b8cfd77ba7e83f5960aed63aaa99e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Aug 2010 07:40:16 +0000 Subject: net: add a comment on netdev->last_rx As some driver authors seem to reintroduce dev->last_rx use, add a comment to strongly discourage this. Since commit 6cf3f41e6c0 (bonding, net: Move last_rx update into bonding recv logic), network drivers dont need to update last_rx themselves, unless they use this field to implement a timeout. Not updating last_rx helps not dirtying a cache line, improving performance in SMP. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0cf9448a32c4..c82220a9f3d5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -953,7 +953,14 @@ struct net_device { /* * Cache line mostly used on receive path (including eth_type_trans()) */ - unsigned long last_rx; /* Time of last Rx */ + unsigned long last_rx; /* Time of last Rx + * This should not be set in + * drivers, unless really needed, + * because network stack (bonding) + * use it if/when necessary, to + * avoid dirtying this cache line. + */ + /* Interface address info used in eth_type_trans() */ unsigned char *dev_addr; /* hw address, (before bcast because most packets are -- cgit v1.2.3 From 86cac58b71227cc34a3d0e78f19585c0eff49ea3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Aug 2010 18:25:32 +0000 Subject: skge: add GRO support - napi_gro_flush() is exported from net/core/dev.c, to avoid an irq_save/irq_restore in the packet receive path. - use napi_gro_receive() instead of netif_receive_skb() - use napi_gro_flush() before calling __napi_complete() - turn on NETIF_F_GRO by default - Tested on a Marvell 88E8001 Gigabit NIC Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/skge.c | 5 +++-- include/linux/netdevice.h | 1 + net/core/dev.c | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/net/skge.c b/drivers/net/skge.c index 40e5c46e7571..a8a63581d63d 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c @@ -3178,8 +3178,7 @@ static int skge_poll(struct napi_struct *napi, int to_do) skb = skge_rx_get(dev, e, control, rd->status, rd->csum2); if (likely(skb)) { - netif_receive_skb(skb); - + napi_gro_receive(napi, skb); ++work_done; } } @@ -3192,6 +3191,7 @@ static int skge_poll(struct napi_struct *napi, int to_do) if (work_done < to_do) { unsigned long flags; + napi_gro_flush(napi); spin_lock_irqsave(&hw->hw_lock, flags); __napi_complete(napi); hw->intr_mask |= napimask[skge->port]; @@ -3849,6 +3849,7 @@ static struct net_device *skge_devinit(struct skge_hw *hw, int port, dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG; skge->rx_csum = 1; } + dev->features |= NETIF_F_GRO; /* read the mac address */ memcpy_fromio(dev->dev_addr, hw->regs + B2_MAC_1 + port*8, ETH_ALEN); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c82220a9f3d5..af05186d5b36 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1702,6 +1702,7 @@ extern gro_result_t dev_gro_receive(struct napi_struct *napi, extern gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb); extern gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); +extern void napi_gro_flush(struct napi_struct *napi); extern void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb); extern struct sk_buff * napi_get_frags(struct napi_struct *napi); diff --git a/net/core/dev.c b/net/core/dev.c index 63bd20a75929..d8c43e73f0b7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3063,7 +3063,7 @@ out: return netif_receive_skb(skb); } -static void napi_gro_flush(struct napi_struct *napi) +inline void napi_gro_flush(struct napi_struct *napi) { struct sk_buff *skb, *next; @@ -3076,6 +3076,7 @@ static void napi_gro_flush(struct napi_struct *napi) napi->gro_count = 0; napi->gro_list = NULL; } +EXPORT_SYMBOL(napi_gro_flush); enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { -- cgit v1.2.3 From c68839963426d42bdb2c915b435f9860d060e645 Mon Sep 17 00:00:00 2001 From: Peter Meerwald Date: Thu, 2 Sep 2010 04:06:24 +0000 Subject: net: Improve comments in include/linux/phy.h Correct state range of PHY bus addresses (i.e. 0-31) in comment, make spelling of PHY consistent in comments. Signed-off-by: Peter Meerwald Signed-off-by: David S. Miller --- include/linux/phy.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 6b0a782c6224..a6e047a04f79 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -116,7 +116,7 @@ struct mii_bus { /* list of all PHYs on bus */ struct phy_device *phy_map[PHY_MAX_ADDR]; - /* Phy addresses to be ignored when probing */ + /* PHY addresses to be ignored when probing */ u32 phy_mask; /* @@ -283,7 +283,7 @@ struct phy_device { phy_interface_t interface; - /* Bus address of the PHY (0-32) */ + /* Bus address of the PHY (0-31) */ int addr; /* -- cgit v1.2.3 From bc8acf2c8c3e43fcc192762a9f964b3e9a17748b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Sep 2010 13:07:41 -0700 Subject: drivers/net: avoid some skb->ip_summed initializations fresh skbs have ip_summed set to CHECKSUM_NONE (0) We can avoid setting again skb->ip_summed to CHECKSUM_NONE in drivers. Introduce skb_checksum_none_assert() helper so that we keep this assertion documented in driver sources. Change most occurrences of : skb->ip_summed = CHECKSUM_NONE; by : skb_checksum_none_assert(skb); Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/8139cp.c | 2 +- drivers/net/acenic.c | 2 +- drivers/net/atl1c/atl1c_main.c | 2 +- drivers/net/atl1e/atl1e_main.c | 2 +- drivers/net/atlx/atl1.c | 2 +- drivers/net/b44.c | 2 +- drivers/net/benet/be_main.c | 2 +- drivers/net/bna/bnad.c | 2 +- drivers/net/bnx2.c | 2 +- drivers/net/bnx2x/bnx2x_cmn.c | 2 +- drivers/net/cassini.c | 2 +- drivers/net/chelsio/sge.c | 2 +- drivers/net/cpmac.c | 2 +- drivers/net/cxgb3/sge.c | 2 +- drivers/net/cxgb4/sge.c | 2 +- drivers/net/cxgb4vf/sge.c | 2 +- drivers/net/dm9000.c | 2 +- drivers/net/e1000/e1000_main.c | 3 ++- drivers/net/e1000e/netdev.c | 3 ++- drivers/net/gianfar.c | 2 +- drivers/net/greth.c | 2 +- drivers/net/ibmlana.c | 2 +- drivers/net/igb/igb_main.c | 2 +- drivers/net/igbvf/netdev.c | 2 +- drivers/net/ipg.c | 6 +++--- drivers/net/iseries_veth.c | 2 +- drivers/net/ixgb/ixgb_main.c | 4 ++-- drivers/net/ixgbe/ixgbe_fcoe.c | 5 +++-- drivers/net/ixgbe/ixgbe_main.c | 2 +- drivers/net/ixgbevf/ixgbevf_main.c | 2 +- drivers/net/jme.c | 2 +- drivers/net/ll_temac_main.c | 2 +- drivers/net/macb.c | 2 +- drivers/net/niu.c | 2 +- drivers/net/ns83820.c | 2 +- drivers/net/pasemi_mac.c | 2 +- drivers/net/ps3_gelic_net.c | 4 ++-- drivers/net/qla3xxx.c | 4 ++-- drivers/net/qlcnic/qlcnic_init.c | 2 +- drivers/net/qlge/qlge_main.c | 6 +++--- drivers/net/r8169.c | 2 +- drivers/net/s2io.c | 4 ++-- drivers/net/sb1250-mac.c | 2 +- drivers/net/sfc/rx.c | 2 +- drivers/net/sh_eth.c | 2 +- drivers/net/smsc911x.c | 2 +- drivers/net/spider_net.c | 4 ++-- drivers/net/stmmac/stmmac_main.c | 2 +- drivers/net/tehuti.c | 5 +++-- drivers/net/tg3.c | 2 +- drivers/net/typhoon.c | 2 +- drivers/net/via-velocity.c | 2 +- drivers/net/vmxnet3/vmxnet3_drv.c | 4 ++-- drivers/net/vxge/vxge-main.c | 2 +- drivers/net/xilinx_emaclite.c | 2 +- include/linux/skbuff.h | 15 +++++++++++++++ 56 files changed, 86 insertions(+), 67 deletions(-) (limited to 'include') diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c index 4a4f6b81e32d..237d4ea5a416 100644 --- a/drivers/net/8139cp.c +++ b/drivers/net/8139cp.c @@ -561,7 +561,7 @@ rx_status_loop: if (cp_rx_csum_ok(status)) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); skb_put(skb, len); diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c index b9a591604e5b..41d9911202d0 100644 --- a/drivers/net/acenic.c +++ b/drivers/net/acenic.c @@ -2033,7 +2033,7 @@ static void ace_rx_int(struct net_device *dev, u32 rxretprd, u32 rxretcsm) skb->csum = htons(csum); skb->ip_summed = CHECKSUM_COMPLETE; } else { - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } /* send it up */ diff --git a/drivers/net/atl1c/atl1c_main.c b/drivers/net/atl1c/atl1c_main.c index 61f1634ab3f8..553230eb365c 100644 --- a/drivers/net/atl1c/atl1c_main.c +++ b/drivers/net/atl1c/atl1c_main.c @@ -1719,7 +1719,7 @@ static inline void atl1c_rx_checksum(struct atl1c_adapter *adapter, * cannot figure out if the packet is fragmented or not, * so we tell the KERNEL CHECKSUM_NONE */ - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, const int ringid) diff --git a/drivers/net/atl1e/atl1e_main.c b/drivers/net/atl1e/atl1e_main.c index 1ae44bb26317..56ace3fbe40d 100644 --- a/drivers/net/atl1e/atl1e_main.c +++ b/drivers/net/atl1e/atl1e_main.c @@ -1331,7 +1331,7 @@ static inline void atl1e_rx_checksum(struct atl1e_adapter *adapter, u16 pkt_flags; u16 err_flags; - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); pkt_flags = prrs->pkt_flag; err_flags = prrs->err_flag; if (((pkt_flags & RRS_IS_IPV4) || (pkt_flags & RRS_IS_IPV6)) && diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c index 5837b0184d4b..e1e0171d6e62 100644 --- a/drivers/net/atlx/atl1.c +++ b/drivers/net/atlx/atl1.c @@ -1805,7 +1805,7 @@ static void atl1_rx_checksum(struct atl1_adapter *adapter, * the higher layers and let it be sorted out there. */ - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); if (unlikely(rrd->pkt_flg & PACKET_FLAG_ERR)) { if (rrd->err_flg & (ERR_FLAG_CRC | ERR_FLAG_TRUNC | diff --git a/drivers/net/b44.c b/drivers/net/b44.c index 37617abc1647..7342308718a0 100644 --- a/drivers/net/b44.c +++ b/drivers/net/b44.c @@ -818,7 +818,7 @@ static int b44_rx(struct b44 *bp, int budget) copy_skb->data, len); skb = copy_skb; } - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); skb->protocol = eth_type_trans(skb, bp->dev); netif_receive_skb(skb); received++; diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c index 9db10fec8235..dcee7a20c0b9 100644 --- a/drivers/net/benet/be_main.c +++ b/drivers/net/benet/be_main.c @@ -1016,7 +1016,7 @@ static void be_rx_compl_process(struct be_adapter *adapter, skb_fill_rx_data(adapter, skb, rxcp, num_rcvd); if (do_pkt_csum(rxcp, adapter->rx_csum)) - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); else skb->ip_summed = CHECKSUM_UNNECESSARY; diff --git a/drivers/net/bna/bnad.c b/drivers/net/bna/bnad.c index 79c4c2441449..44adc7aefddc 100644 --- a/drivers/net/bna/bnad.c +++ b/drivers/net/bna/bnad.c @@ -510,7 +510,7 @@ bnad_poll_cq(struct bnad *bnad, struct bna_ccb *ccb, int budget) (flags & BNA_CQ_EF_L4_CKSUM_OK))) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); rcb->rxq->rx_packets++; rcb->rxq->rx_bytes += skb->len; diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index a0e02aa42214..4ff76e38e788 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -3218,7 +3218,7 @@ bnx2_rx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget) } - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); if (bp->rx_csum && (status & (L2_FHDR_STATUS_TCP_SEGMENT | L2_FHDR_STATUS_UDP_DATAGRAM))) { diff --git a/drivers/net/bnx2x/bnx2x_cmn.c b/drivers/net/bnx2x/bnx2x_cmn.c index da96d1a18c20..0e4caf411905 100644 --- a/drivers/net/bnx2x/bnx2x_cmn.c +++ b/drivers/net/bnx2x/bnx2x_cmn.c @@ -623,7 +623,7 @@ reuse_rx: /* Set Toeplitz hash for a none-LRO skb */ bnx2x_set_skb_rxhash(bp, cqe, skb); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); if (bp->rx_csum) { if (likely(BNX2X_RX_CSUM_OK(cqe))) skb->ip_summed = CHECKSUM_UNNECESSARY; diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c index 28c88eeec757..32aaadc4734f 100644 --- a/drivers/net/cassini.c +++ b/drivers/net/cassini.c @@ -2149,7 +2149,7 @@ end_copy_pkt: skb->csum = csum_unfold(~csum); skb->ip_summed = CHECKSUM_COMPLETE; } else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); return len; } diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c index f01cfdb995de..1950b9a20ecd 100644 --- a/drivers/net/chelsio/sge.c +++ b/drivers/net/chelsio/sge.c @@ -1388,7 +1388,7 @@ static void sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len) ++st->rx_cso_good; skb->ip_summed = CHECKSUM_UNNECESSARY; } else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); if (unlikely(adapter->vlan_grp && p->vlan_valid)) { st->vlan_xtract++; diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c index 5a5af1ca7541..fec939f8f65f 100644 --- a/drivers/net/cpmac.c +++ b/drivers/net/cpmac.c @@ -391,7 +391,7 @@ static struct sk_buff *cpmac_rx_one(struct cpmac_priv *priv, if (likely(skb)) { skb_put(desc->skb, desc->datalen); desc->skb->protocol = eth_type_trans(desc->skb, priv->dev); - desc->skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(desc->skb); priv->dev->stats.rx_packets++; priv->dev->stats.rx_bytes += desc->datalen; result = desc->skb; diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index 8ff96c6f6de5..c5a142bea5e9 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -2022,7 +2022,7 @@ static void rx_eth(struct adapter *adap, struct sge_rspq *rq, qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; skb->ip_summed = CHECKSUM_UNNECESSARY; } else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); skb_record_rx_queue(skb, qs - &adap->sge.qs[0]); if (unlikely(p->vlan_valid)) { diff --git a/drivers/net/cxgb4/sge.c b/drivers/net/cxgb4/sge.c index 6ddb3bb0ce67..9967f3debce7 100644 --- a/drivers/net/cxgb4/sge.c +++ b/drivers/net/cxgb4/sge.c @@ -1605,7 +1605,7 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp, rxq->stats.rx_cso++; } } else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); if (unlikely(pkt->vlan_ex)) { struct vlan_group *grp = pi->vlan_grp; diff --git a/drivers/net/cxgb4vf/sge.c b/drivers/net/cxgb4vf/sge.c index e00fd9c36e8b..f10864ddafbe 100644 --- a/drivers/net/cxgb4vf/sge.c +++ b/drivers/net/cxgb4vf/sge.c @@ -1534,7 +1534,7 @@ int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp, } rxq->stats.rx_cso++; } else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); if (unlikely(pkt->vlan_ex)) { struct vlan_group *grp = pi->vlan_grp; diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c index 4fd6b2b4554b..9f6aeefa06bf 100644 --- a/drivers/net/dm9000.c +++ b/drivers/net/dm9000.c @@ -1056,7 +1056,7 @@ dm9000_rx(struct net_device *dev) if ((((rxbyte & 0x1c) << 3) & rxbyte) == 0) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } netif_rx(skb); dev->stats.rx_packets++; diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 3e8ac4baae1b..17f5867b5d9b 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -3552,7 +3552,8 @@ static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err, struct e1000_hw *hw = &adapter->hw; u16 status = (u16)status_err; u8 errors = (u8)(status_err >> 24); - skb->ip_summed = CHECKSUM_NONE; + + skb_checksum_none_assert(skb); /* 82543 or newer only */ if (unlikely(hw->mac_type < e1000_82543)) return; diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 5f3eac6432cb..c9b66f4727e4 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -475,7 +475,8 @@ static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err, { u16 status = (u16)status_err; u8 errors = (u8)(status_err >> 24); - skb->ip_summed = CHECKSUM_NONE; + + skb_checksum_none_assert(skb); /* Ignore Checksum bit is set */ if (status & E1000_RXD_STAT_IXSM) diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index e6048d6ab0ea..f30adbf86bb2 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -2654,7 +2654,7 @@ static inline void gfar_rx_checksum(struct sk_buff *skb, struct rxfcb *fcb) if ((fcb->flags & RXFCB_CSUM_MASK) == (RXFCB_CIP | RXFCB_CTU)) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } diff --git a/drivers/net/greth.c b/drivers/net/greth.c index fbeaf70d1727..27d6960ce09e 100644 --- a/drivers/net/greth.c +++ b/drivers/net/greth.c @@ -893,7 +893,7 @@ static int greth_rx_gbit(struct net_device *dev, int limit) if (greth->flags & GRETH_FLAG_RX_CSUM && hw_checksummed(status)) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); skb->protocol = eth_type_trans(skb, dev); dev->stats.rx_packets++; diff --git a/drivers/net/ibmlana.c b/drivers/net/ibmlana.c index 294ccfb427cf..0037a696cd0a 100644 --- a/drivers/net/ibmlana.c +++ b/drivers/net/ibmlana.c @@ -602,7 +602,7 @@ static void irqrx_handler(struct net_device *dev) /* set up skb fields */ skb->protocol = eth_type_trans(skb, dev); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); /* bookkeeping */ dev->stats.rx_packets++; diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index d35cc38bf8b2..c4d861b557ca 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -5455,7 +5455,7 @@ static void igb_receive_skb(struct igb_q_vector *q_vector, static inline void igb_rx_checksum_adv(struct igb_ring *ring, u32 status_err, struct sk_buff *skb) { - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); /* Ignore Checksum bit is set or checksum is disabled through ethtool */ if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) || diff --git a/drivers/net/igbvf/netdev.c b/drivers/net/igbvf/netdev.c index c539f7c9c3e0..c7fab80d2490 100644 --- a/drivers/net/igbvf/netdev.c +++ b/drivers/net/igbvf/netdev.c @@ -103,7 +103,7 @@ static void igbvf_receive_skb(struct igbvf_adapter *adapter, static inline void igbvf_rx_checksum_adv(struct igbvf_adapter *adapter, u32 status_err, struct sk_buff *skb) { - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); /* Ignore Checksum bit is set or checksum is disabled through ethtool */ if ((status_err & E1000_RXD_STAT_IXSM) || diff --git a/drivers/net/ipg.c b/drivers/net/ipg.c index 72e3d2da9e9f..dc0198092343 100644 --- a/drivers/net/ipg.c +++ b/drivers/net/ipg.c @@ -1213,7 +1213,7 @@ static void ipg_nic_rx_with_start_and_end(struct net_device *dev, skb_put(skb, framelen); skb->protocol = eth_type_trans(skb, dev); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); netif_rx(skb); sp->rx_buff[entry] = NULL; } @@ -1278,7 +1278,7 @@ static void ipg_nic_rx_with_end(struct net_device *dev, jumbo->skb->protocol = eth_type_trans(jumbo->skb, dev); - jumbo->skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(jumbo->skb); netif_rx(jumbo->skb); } } @@ -1476,7 +1476,7 @@ static int ipg_nic_rx(struct net_device *dev) * IP/TCP/UDP frame was received. Let the * upper layer decide. */ - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); /* Hand off frame for higher layer processing. * The function netif_rx() releases the sk_buff diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c index ba1de5973fb2..8df645e78f2e 100644 --- a/drivers/net/iseries_veth.c +++ b/drivers/net/iseries_veth.c @@ -1524,7 +1524,7 @@ static void veth_receive(struct veth_lpar_connection *cnx, skb_put(skb, length); skb->protocol = eth_type_trans(skb, dev); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); netif_rx(skb); /* send it up */ dev->stats.rx_packets++; dev->stats.rx_bytes += length; diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c index 33c4ffe6e103..c2f6e71e1181 100644 --- a/drivers/net/ixgb/ixgb_main.c +++ b/drivers/net/ixgb/ixgb_main.c @@ -1905,7 +1905,7 @@ ixgb_rx_checksum(struct ixgb_adapter *adapter, */ if ((rx_desc->status & IXGB_RX_DESC_STATUS_IXSM) || (!(rx_desc->status & IXGB_RX_DESC_STATUS_TCPCS))) { - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); return; } @@ -1913,7 +1913,7 @@ ixgb_rx_checksum(struct ixgb_adapter *adapter, /* now look at the TCP checksum error bit */ if (rx_desc->errors & IXGB_RX_DESC_ERRORS_TCPE) { /* let the stack verify checksum errors */ - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); adapter->hw_csum_rx_error++; } else { /* TCP checksum is good */ diff --git a/drivers/net/ixgbe/ixgbe_fcoe.c b/drivers/net/ixgbe/ixgbe_fcoe.c index 86fa07cb061d..2f1de8b90f9e 100644 --- a/drivers/net/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ixgbe/ixgbe_fcoe.c @@ -304,12 +304,13 @@ int ixgbe_fcoe_ddp(struct ixgbe_adapter *adapter, if (!ixgbe_rx_is_fcoe(rx_desc)) goto ddp_out; - skb->ip_summed = CHECKSUM_UNNECESSARY; sterr = le32_to_cpu(rx_desc->wb.upper.status_error); fcerr = (sterr & IXGBE_RXDADV_ERR_FCERR); fceofe = (sterr & IXGBE_RXDADV_ERR_FCEOFE); if (fcerr == IXGBE_FCERR_BADCRC) - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); + else + skb->ip_summed = CHECKSUM_UNNECESSARY; if (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q)) fh = (struct fc_frame_header *)(skb->data + diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 5e4dc1b0a1bd..3aafe94741ba 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -980,7 +980,7 @@ static inline void ixgbe_rx_checksum(struct ixgbe_adapter *adapter, { u32 status_err = le32_to_cpu(rx_desc->wb.upper.status_error); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); /* Rx csum disabled */ if (!(adapter->flags & IXGBE_FLAG_RX_CSUM_ENABLED)) diff --git a/drivers/net/ixgbevf/ixgbevf_main.c b/drivers/net/ixgbevf/ixgbevf_main.c index 5d3c869283a5..bdbd26c60ae6 100644 --- a/drivers/net/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ixgbevf/ixgbevf_main.c @@ -356,7 +356,7 @@ static void ixgbevf_receive_skb(struct ixgbevf_q_vector *q_vector, static inline void ixgbevf_rx_checksum(struct ixgbevf_adapter *adapter, u32 status_err, struct sk_buff *skb) { - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); /* Rx csum disabled */ if (!(adapter->flags & IXGBE_FLAG_RX_CSUM_ENABLED)) diff --git a/drivers/net/jme.c b/drivers/net/jme.c index 99f24f5cac53..1fcd533b1a61 100644 --- a/drivers/net/jme.c +++ b/drivers/net/jme.c @@ -936,7 +936,7 @@ jme_alloc_and_feed_skb(struct jme_adapter *jme, int idx) if (jme_rxsum_ok(jme, le16_to_cpu(rxdesc->descwb.flags))) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); if (rxdesc->descwb.flags & cpu_to_le16(RXWBFLAG_TAGON)) { if (jme->vlgrp) { diff --git a/drivers/net/ll_temac_main.c b/drivers/net/ll_temac_main.c index bdf2149e5296..874ee01e8d9d 100644 --- a/drivers/net/ll_temac_main.c +++ b/drivers/net/ll_temac_main.c @@ -760,7 +760,7 @@ static void ll_temac_recv(struct net_device *ndev) skb_put(skb, length); skb->dev = ndev; skb->protocol = eth_type_trans(skb, ndev); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); /* if we're doing rx csum offload, set it up */ if (((lp->temac_features & TEMAC_FEATURE_RX_CSUM) != 0) && diff --git a/drivers/net/macb.c b/drivers/net/macb.c index ff2f158ab0b9..4297f6e8c4bc 100644 --- a/drivers/net/macb.c +++ b/drivers/net/macb.c @@ -407,7 +407,7 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag, } skb_reserve(skb, RX_OFFSET); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); skb_put(skb, len); for (frag = first_frag; ; frag = NEXT_RX(frag)) { diff --git a/drivers/net/niu.c b/drivers/net/niu.c index b4cc61f1fc59..1f89f472cbfb 100644 --- a/drivers/net/niu.c +++ b/drivers/net/niu.c @@ -3484,7 +3484,7 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np, RCR_ENTRY_ERROR))) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } else if (!(val & RCR_ENTRY_MULTI)) append_size = len - skb->len; diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c index 447c2c43769a..4475ca97f031 100644 --- a/drivers/net/ns83820.c +++ b/drivers/net/ns83820.c @@ -923,7 +923,7 @@ static void rx_irq(struct net_device *ndev) if ((extsts & 0x002a0000) && !(extsts & 0x00540000)) { skb->ip_summed = CHECKSUM_UNNECESSARY; } else { - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } skb->protocol = eth_type_trans(skb, ndev); #ifdef NS83820_VLAN_ACCEL_SUPPORT diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c index 8ab6ae0a6107..828e97cacdbf 100644 --- a/drivers/net/pasemi_mac.c +++ b/drivers/net/pasemi_mac.c @@ -808,7 +808,7 @@ static int pasemi_mac_clean_rx(struct pasemi_mac_rxring *rx, skb->csum = (macrx & XCT_MACRX_CSUM_M) >> XCT_MACRX_CSUM_S; } else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); packets++; tot_bytes += len; diff --git a/drivers/net/ps3_gelic_net.c b/drivers/net/ps3_gelic_net.c index 87d6b8f36304..5526ab4895e6 100644 --- a/drivers/net/ps3_gelic_net.c +++ b/drivers/net/ps3_gelic_net.c @@ -956,9 +956,9 @@ static void gelic_net_pass_skb_up(struct gelic_descr *descr, (!(data_error & GELIC_DESCR_DATA_ERROR_CHK_MASK))) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); /* update netdevice statistics */ netdev->stats.rx_packets++; diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c index 6168a130f33f..7496ed2c34ab 100644 --- a/drivers/net/qla3xxx.c +++ b/drivers/net/qla3xxx.c @@ -2029,7 +2029,7 @@ static void ql_process_mac_rx_intr(struct ql3_adapter *qdev, dma_unmap_len(lrg_buf_cb2, maplen), PCI_DMA_FROMDEVICE); prefetch(skb->data); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); skb->protocol = eth_type_trans(skb, qdev->ndev); netif_receive_skb(skb); @@ -2076,7 +2076,7 @@ static void ql_process_macip_rx_intr(struct ql3_adapter *qdev, PCI_DMA_FROMDEVICE); prefetch(skb2->data); - skb2->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb2); if (qdev->device_id == QL3022_DEVICE_ID) { /* * Copy the ethhdr from first buffer to second. This diff --git a/drivers/net/qlcnic/qlcnic_init.c b/drivers/net/qlcnic/qlcnic_init.c index 8e47d7aea562..26a7d6bca5c7 100644 --- a/drivers/net/qlcnic/qlcnic_init.c +++ b/drivers/net/qlcnic/qlcnic_init.c @@ -1369,7 +1369,7 @@ static struct sk_buff *qlcnic_process_rxbuf(struct qlcnic_adapter *adapter, adapter->stats.csummed++; skb->ip_summed = CHECKSUM_UNNECESSARY; } else { - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } skb->dev = adapter->netdev; diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c index 5a245211fc53..e2d0e108b9aa 100644 --- a/drivers/net/qlge/qlge_main.c +++ b/drivers/net/qlge/qlge_main.c @@ -1566,7 +1566,7 @@ static void ql_process_mac_rx_page(struct ql_adapter *qdev, rx_ring->rx_packets++; rx_ring->rx_bytes += skb->len; skb->protocol = eth_type_trans(skb, ndev); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); if (qdev->rx_csum && !(ib_mac_rsp->flags1 & IB_MAC_CSUM_ERR_MASK)) { @@ -1676,7 +1676,7 @@ static void ql_process_mac_rx_skb(struct ql_adapter *qdev, rx_ring->rx_packets++; rx_ring->rx_bytes += skb->len; skb->protocol = eth_type_trans(skb, ndev); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); /* If rx checksum is on, and there are no * csum or frame errors. @@ -1996,7 +1996,7 @@ static void ql_process_mac_split_rx_intr(struct ql_adapter *qdev, } skb->protocol = eth_type_trans(skb, ndev); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); /* If rx checksum is on, and there are no * csum or frame errors. diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 078bbf4e6f19..07b3fb5175e5 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -4460,7 +4460,7 @@ static inline void rtl8169_rx_csum(struct sk_buff *skb, struct RxDesc *desc) ((status == RxProtoIP) && !(opts1 & IPFail))) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } static inline bool rtl8169_try_rx_copy(struct sk_buff **sk_buff, diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index 7061fc8e99c7..c70ad515383a 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -7603,10 +7603,10 @@ static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp) * Packet with erroneous checksum, let the * upper layers deal with it. */ - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } } else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); swstats->mem_freed += skb->truesize; send_up: diff --git a/drivers/net/sb1250-mac.c b/drivers/net/sb1250-mac.c index 8e6bd45b9f31..d8249d7653c6 100644 --- a/drivers/net/sb1250-mac.c +++ b/drivers/net/sb1250-mac.c @@ -1170,7 +1170,7 @@ again: sb->ip_summed = CHECKSUM_UNNECESSARY; /* don't need to set sb->csum */ } else { - sb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(sb); } } prefetch(sb->data); diff --git a/drivers/net/sfc/rx.c b/drivers/net/sfc/rx.c index 799c461ce7b8..acb372e841b2 100644 --- a/drivers/net/sfc/rx.c +++ b/drivers/net/sfc/rx.c @@ -615,7 +615,7 @@ void __efx_rx_packet(struct efx_channel *channel, EFX_BUG_ON_PARANOID(!skb); /* Set the SKB flags */ - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); /* Pass the packet up */ netif_receive_skb(skb); diff --git a/drivers/net/sh_eth.c b/drivers/net/sh_eth.c index a812efc3632e..50259dfec583 100644 --- a/drivers/net/sh_eth.c +++ b/drivers/net/sh_eth.c @@ -798,7 +798,7 @@ static int sh_eth_rx(struct net_device *ndev) skb->dev = ndev; sh_eth_set_receive_align(skb); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); rxdesc->addr = virt_to_phys(PTR_ALIGN(skb->data, 4)); } if (entry >= RX_RING_SIZE - 1) diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c index 0909ae934ad0..13ddcd487200 100644 --- a/drivers/net/smsc911x.c +++ b/drivers/net/smsc911x.c @@ -1048,7 +1048,7 @@ static int smsc911x_poll(struct napi_struct *napi, int budget) smsc911x_rx_readfifo(pdata, (unsigned int *)skb->head, pktwords); skb->protocol = eth_type_trans(skb, dev); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); netif_receive_skb(skb); /* Update counters */ diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c index 1636a34d95dd..cb6bcca9d541 100644 --- a/drivers/net/spider_net.c +++ b/drivers/net/spider_net.c @@ -1000,9 +1000,9 @@ spider_net_pass_skb_up(struct spider_net_descr *descr, !(data_error & SPIDER_NET_DATA_ERR_CKSUM_MASK)) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); if (data_status & SPIDER_NET_VLAN_PACKET) { /* further enhancements: HW-accel VLAN diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c index e3f002eba89a..1ccea76d89ae 100644 --- a/drivers/net/stmmac/stmmac_main.c +++ b/drivers/net/stmmac/stmmac_main.c @@ -1256,7 +1256,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit) if (unlikely(status == csum_none)) { /* always for the old mac 10/100 */ - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); netif_receive_skb(skb); } else { skb->ip_summed = CHECKSUM_UNNECESSARY; diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c index 3128d6a8e9ce..8b3dc1eb4015 100644 --- a/drivers/net/tehuti.c +++ b/drivers/net/tehuti.c @@ -1297,12 +1297,13 @@ static int bdx_rx_receive(struct bdx_priv *priv, struct rxd_fifo *f, int budget) ndev->stats.rx_bytes += len; skb_put(skb, len); - skb->ip_summed = CHECKSUM_UNNECESSARY; skb->protocol = eth_type_trans(skb, ndev); /* Non-IP packets aren't checksum-offloaded */ if (GET_RXD_PKT_ID(rxd_val1) == 0) - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); + else + skb->ip_summed = CHECKSUM_UNNECESSARY; NETIF_RX_MUX(priv, rxd_val1, rxd_vlan, skb); diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index bc3af78a869f..9f6ffffc8376 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -4719,7 +4719,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget) >> RXD_TCPCSUM_SHIFT) == 0xffff)) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); skb->protocol = eth_type_trans(skb, tp->dev); diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c index 3f4681f78262..5dfb39539b3e 100644 --- a/drivers/net/typhoon.c +++ b/drivers/net/typhoon.c @@ -1760,7 +1760,7 @@ typhoon_rx(struct typhoon *tp, struct basic_ring *rxRing, volatile __le32 * read (TYPHOON_RX_IP_CHK_GOOD | TYPHOON_RX_UDP_CHK_GOOD)) { new_skb->ip_summed = CHECKSUM_UNNECESSARY; } else - new_skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(new_skb); spin_lock(&tp->state_lock); if(tp->vlgrp != NULL && rx->rxStatus & TYPHOON_RX_VLAN) diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c index fd69095ef6e3..ed7f4f5c4062 100644 --- a/drivers/net/via-velocity.c +++ b/drivers/net/via-velocity.c @@ -1954,7 +1954,7 @@ static int velocity_tx_srv(struct velocity_info *vptr) */ static inline void velocity_rx_csum(struct rx_desc *rd, struct sk_buff *skb) { - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); if (rd->rdesc1.CSM & CSM_IPKT) { if (rd->rdesc1.CSM & CSM_IPOK) { diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index abe0ff53daf3..198ce92af0c3 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1042,11 +1042,11 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter, skb->csum = htons(gdesc->rcd.csum); skb->ip_summed = CHECKSUM_PARTIAL; } else { - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } } } else { - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } } diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c index 01cdec712b64..5378b849f54f 100644 --- a/drivers/net/vxge/vxge-main.c +++ b/drivers/net/vxge/vxge-main.c @@ -501,7 +501,7 @@ vxge_rx_1b_compl(struct __vxge_hw_ring *ringh, void *dtr, ext_info.l4_cksum == VXGE_HW_L4_CKSUM_OK) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); vxge_rx_complete(ring, skb, ext_info.vlan, pkt_length, &ext_info); diff --git a/drivers/net/xilinx_emaclite.c b/drivers/net/xilinx_emaclite.c index 71122ee4e830..f3f8be5a35fa 100644 --- a/drivers/net/xilinx_emaclite.c +++ b/drivers/net/xilinx_emaclite.c @@ -641,7 +641,7 @@ static void xemaclite_rx_handler(struct net_device *dev) skb_put(skb, len); /* Tell the skb how much data we got */ skb->protocol = eth_type_trans(skb, dev); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); dev->stats.rx_packets++; dev->stats.rx_bytes += len; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f900ffcd847e..9e8085a89589 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2206,6 +2206,21 @@ static inline void skb_forward_csum(struct sk_buff *skb) skb->ip_summed = CHECKSUM_NONE; } +/** + * skb_checksum_none_assert - make sure skb ip_summed is CHECKSUM_NONE + * @skb: skb to check + * + * fresh skbs have their ip_summed set to CHECKSUM_NONE. + * Instead of forcing ip_summed to CHECKSUM_NONE, we can + * use this helper, to document places where we make this assertion. + */ +static inline void skb_checksum_none_assert(struct sk_buff *skb) +{ +#ifdef DEBUG + BUG_ON(skb->ip_summed != CHECKSUM_NONE); +#endif +} + bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ -- cgit v1.2.3 From 57a2ce5f54f3120467be760662c6ef3bea3f9579 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 3 Sep 2010 19:09:46 +0800 Subject: padata: add missing __percpu markup in include/linux/padata.h parallel_data->queue is a percpu pointer but was missing __percpu markup. Add it. Signed-off-by: Namhyung Kim Acked-by: Steffen Klassert Signed-off-by: Herbert Xu --- include/linux/padata.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/padata.h b/include/linux/padata.h index bdcd1e9eacea..4633b2f726b6 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -127,8 +127,8 @@ struct padata_cpumask { */ struct parallel_data { struct padata_instance *pinst; - struct padata_parallel_queue *pqueue; - struct padata_serial_queue *squeue; + struct padata_parallel_queue __percpu *pqueue; + struct padata_serial_queue __percpu *squeue; atomic_t seq_nr; atomic_t reorder_objects; atomic_t refcnt; -- cgit v1.2.3 From b8ef3204f460912a46659cdc74d237adbe705053 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 6 Aug 2010 03:02:37 -0500 Subject: [SCSI] fc class: add fc host default default dev loss setting This patch adds a fc_host setting to store the default dev_loss_tmo. It is used if the driver has a callack to get the value from the LLD. If the callback is not set, then we use the fc class module default value. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/scsi_transport_fc.c | 6 +++++- include/scsi/scsi_transport_fc.h | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index d7e470a06180..9f0f7d9c7422 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -2525,7 +2525,11 @@ fc_rport_create(struct Scsi_Host *shost, int channel, rport->maxframe_size = -1; rport->supported_classes = FC_COS_UNSPECIFIED; - rport->dev_loss_tmo = fc_dev_loss_tmo; + if (fci->f->get_host_def_dev_loss_tmo) { + fci->f->get_host_def_dev_loss_tmo(shost); + rport->dev_loss_tmo = fc_host_def_dev_loss_tmo(shost); + } else + rport->dev_loss_tmo = fc_dev_loss_tmo; memcpy(&rport->node_name, &ids->node_name, sizeof(rport->node_name)); memcpy(&rport->port_name, &ids->port_name, sizeof(rport->port_name)); rport->port_id = ids->port_id; diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index 87d81b3ce564..9f98fca9b763 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -496,6 +496,7 @@ struct fc_host_attrs { u64 fabric_name; char symbolic_name[FC_SYMBOLIC_NAME_SIZE]; char system_hostname[FC_SYMBOLIC_NAME_SIZE]; + u32 def_dev_loss_tmo; /* Private (Transport-managed) Attributes */ enum fc_tgtid_binding_type tgtid_bind_type; @@ -580,6 +581,8 @@ struct fc_host_attrs { (((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q_name) #define fc_host_devloss_work_q(x) \ (((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q) +#define fc_host_def_dev_loss_tmo(x) \ + (((struct fc_host_attrs *)(x)->shost_data)->def_dev_loss_tmo) struct fc_bsg_buffer { @@ -640,6 +643,7 @@ struct fc_function_template { void (*get_host_fabric_name)(struct Scsi_Host *); void (*get_host_symbolic_name)(struct Scsi_Host *); void (*set_host_system_hostname)(struct Scsi_Host *); + void (*get_host_def_dev_loss_tmo)(struct Scsi_Host *); struct fc_host_statistics * (*get_fc_host_stats)(struct Scsi_Host *); void (*reset_fc_host_stats)(struct Scsi_Host *); -- cgit v1.2.3 From 144c0f8833d0458e4369a27a53aea8856c665c41 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 3 Sep 2010 10:31:05 -0700 Subject: Input: fix a few typos Signed-off-by: Dmitry Torokhov --- drivers/input/input.c | 6 +++--- include/linux/input.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/input/input.c b/drivers/input/input.c index ab6982056518..acb3c8095c65 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -171,7 +171,7 @@ static int input_handle_abs_event(struct input_dev *dev, if (code == ABS_MT_SLOT) { /* * "Stage" the event; we'll flush it later, when we - * get actiual touch data. + * get actual touch data. */ if (*pval >= 0 && *pval < dev->mtsize) dev->slot = *pval; @@ -188,7 +188,7 @@ static int input_handle_abs_event(struct input_dev *dev, pold = &mtslot->abs[code - ABS_MT_FIRST]; } else { /* - * Bypass filtering for multitouch events when + * Bypass filtering for multi-touch events when * not employing slots. */ pold = NULL; @@ -1601,7 +1601,7 @@ EXPORT_SYMBOL(input_free_device); * * This function allocates all necessary memory for MT slot handling in the * input device, and adds ABS_MT_SLOT to the device capabilities. All slots - * are initially marked as unused iby setting ABS_MT_TRACKING_ID to -1. + * are initially marked as unused by setting ABS_MT_TRACKING_ID to -1. */ int input_mt_create_slots(struct input_dev *dev, unsigned int num_slots) { diff --git a/include/linux/input.h b/include/linux/input.h index 896a92227bc4..789265123531 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -67,7 +67,7 @@ struct input_absinfo { #define EVIOCGPHYS(len) _IOC(_IOC_READ, 'E', 0x07, len) /* get physical location */ #define EVIOCGUNIQ(len) _IOC(_IOC_READ, 'E', 0x08, len) /* get unique identifier */ -#define EVIOCGKEY(len) _IOC(_IOC_READ, 'E', 0x18, len) /* get global keystate */ +#define EVIOCGKEY(len) _IOC(_IOC_READ, 'E', 0x18, len) /* get global key state */ #define EVIOCGLED(len) _IOC(_IOC_READ, 'E', 0x19, len) /* get all LEDs */ #define EVIOCGSND(len) _IOC(_IOC_READ, 'E', 0x1a, len) /* get all sounds status */ #define EVIOCGSW(len) _IOC(_IOC_READ, 'E', 0x1b, len) /* get all switch states */ -- cgit v1.2.3 From fe8e0c25cad28e8858ecfa5863333c70685a6811 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Mon, 6 Sep 2010 20:53:42 +0200 Subject: x86, 32-bit: Align percpu area and irq stacks to THREAD_SIZE The irq stacks, located in the percpu-area, need to be THREAD_SIZE aligned. Add the infrastucture to align percpu variables to larger-than-pagesize amounts within the percpu area, and use it to specify the alignment for the irq stacks. Also align the percpu area itself to THREAD_SIZE. This should make irq stacks work with 8K THREAD_SIZE. Signed-off-by: Alexander van Heukelum Cc: Tejun Heo Cc: hch@lst.de LKML-Reference: <1283799222.15941.1393621887@webmail.messagingengine.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq_32.c | 4 ++-- arch/x86/kernel/vmlinux.lds.S | 2 +- include/linux/percpu-defs.h | 12 ++++++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 3b5609f54c4b..50fbbe60e507 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -60,8 +60,8 @@ union irq_ctx { static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx); static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx); -static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, hardirq_stack); -static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, softirq_stack); +static DEFINE_PER_CPU_MULTIPAGE_ALIGNED(union irq_ctx, hardirq_stack, THREAD_SIZE); +static DEFINE_PER_CPU_MULTIPAGE_ALIGNED(union irq_ctx, softirq_stack, THREAD_SIZE); static void call_on_stack(void *func, void *stack) { diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index d0bb52296fa3..bb899475355d 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -273,7 +273,7 @@ SECTIONS } #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) - PERCPU(PAGE_SIZE) + PERCPU(THREAD_SIZE) #endif . = ALIGN(PAGE_SIZE); diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index ce2dc655cd1d..ab20d119a85d 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -138,6 +138,18 @@ DEFINE_PER_CPU_SECTION(type, name, "..page_aligned") \ __aligned(PAGE_SIZE) +/* + * Declaration/definition used for large per-CPU variables that must be + * aligned to something larger than the pagesize. + */ +#define DECLARE_PER_CPU_MULTIPAGE_ALIGNED(type, name, size) \ + DECLARE_PER_CPU_SECTION(type, name, "..page_aligned") \ + __aligned(size) + +#define DEFINE_PER_CPU_MULTIPAGE_ALIGNED(type, name, size) \ + DEFINE_PER_CPU_SECTION(type, name, "..page_aligned") \ + __aligned(size) + /* * Intermodule exports for per-CPU variables. sparse forgets about * address space across EXPORT_SYMBOL(), change EXPORT_SYMBOL() to -- cgit v1.2.3 From 2bf2160d8805de64308e2e7c3cd97813cb58ed2f Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Mon, 23 Aug 2010 18:42:48 +0900 Subject: irq: Add tracepoint to softirq_raise Add a tracepoint for tracing when softirq action is raised. This and the existing tracepoints complete softirq's tracepoints: softirq_raise, softirq_entry and softirq_exit. And when this tracepoint is used in combination with the softirq_entry tracepoint we can determine the softirq raise latency. Signed-off-by: Lai Jiangshan Acked-by: Mathieu Desnoyers Acked-by: Neil Horman Cc: David Miller Cc: Kaneshige Kenji Cc: Izumo Taku Cc: Kosaki Motohiro Cc: Lai Jiangshan Cc: Scott Mcmillan Cc: Steven Rostedt Cc: Eric Dumazet LKML-Reference: <4C724298.4050509@jp.fujitsu.com> [ factorize softirq events with DECLARE_EVENT_CLASS ] Signed-off-by: Koki Sanagi Signed-off-by: Frederic Weisbecker --- include/linux/interrupt.h | 8 +++++++- include/trace/events/irq.h | 26 ++++++++++++++++++++++++-- 2 files changed, 31 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a0384a4d1e6f..531495db1708 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -18,6 +18,7 @@ #include #include #include +#include /* * These correspond to the IORESOURCE_IRQ_* defines in @@ -407,7 +408,12 @@ asmlinkage void do_softirq(void); asmlinkage void __do_softirq(void); extern void open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); -#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) +static inline void __raise_softirq_irqoff(unsigned int nr) +{ + trace_softirq_raise((struct softirq_action *)(unsigned long)nr, NULL); + or_softirq_pending(1UL << nr); +} + extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); extern void wakeup_softirqd(void); diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 0e4cfb694fe7..6fa7cbab7d93 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -5,7 +5,9 @@ #define _TRACE_IRQ_H #include -#include + +struct irqaction; +struct softirq_action; #define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq } #define show_softirq_name(val) \ @@ -93,7 +95,10 @@ DECLARE_EVENT_CLASS(softirq, ), TP_fast_assign( - __entry->vec = (int)(h - vec); + if (vec) + __entry->vec = (int)(h - vec); + else + __entry->vec = (int)(long)h; ), TP_printk("vec=%d [action=%s]", __entry->vec, @@ -136,6 +141,23 @@ DEFINE_EVENT(softirq, softirq_exit, TP_ARGS(h, vec) ); +/** + * softirq_raise - called immediately when a softirq is raised + * @h: pointer to struct softirq_action + * @vec: pointer to first struct softirq_action in softirq_vec array + * + * The @h parameter contains a pointer to the softirq vector number which is + * raised. @vec is NULL and it means @h includes vector number not + * softirq_action. When used in combination with the softirq_entry tracepoint + * we can determine the softirq raise latency. + */ +DEFINE_EVENT(softirq, softirq_raise, + + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + + TP_ARGS(h, vec) +); + #endif /* _TRACE_IRQ_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 3e4b10d7a4d2a78af64f8096dc7cdb3bebd65adb Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 23 Aug 2010 18:43:51 +0900 Subject: napi: Convert trace_napi_poll to TRACE_EVENT This patch converts trace_napi_poll from DECLARE_EVENT to TRACE_EVENT to improve the usability of napi_poll tracepoint. -0 [001] 241302.750777: napi_poll: napi poll on napi struct f6acc480 for device eth3 -0 [000] 241302.852389: napi_poll: napi poll on napi struct f5d0d70c for device eth1 The original patch is below: http://marc.info/?l=linux-kernel&m=126021713809450&w=2 [ sanagi.koki@jp.fujitsu.com: And add a fix by Steven Rostedt: http://marc.info/?l=linux-kernel&m=126150506519173&w=2 ] Signed-off-by: Neil Horman Acked-by: David S. Miller Acked-by: Neil Horman Cc: Mathieu Desnoyers Cc: Kaneshige Kenji Cc: Izumo Taku Cc: Kosaki Motohiro Cc: Lai Jiangshan Cc: Scott Mcmillan Cc: Steven Rostedt Cc: Eric Dumazet LKML-Reference: <4C7242D7.4050009@jp.fujitsu.com> Signed-off-by: Koki Sanagi Signed-off-by: Frederic Weisbecker --- include/trace/events/napi.h | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h index 188deca2f3c7..8fe1e93f531d 100644 --- a/include/trace/events/napi.h +++ b/include/trace/events/napi.h @@ -6,10 +6,31 @@ #include #include +#include + +#define NO_DEV "(no_device)" + +TRACE_EVENT(napi_poll, -DECLARE_TRACE(napi_poll, TP_PROTO(struct napi_struct *napi), - TP_ARGS(napi)); + + TP_ARGS(napi), + + TP_STRUCT__entry( + __field( struct napi_struct *, napi) + __string( dev_name, napi->dev ? napi->dev->name : NO_DEV) + ), + + TP_fast_assign( + __entry->napi = napi; + __assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV); + ), + + TP_printk("napi poll on napi struct %p for device %s", + __entry->napi, __get_str(dev_name)) +); + +#undef NO_DEV #endif /* _TRACE_NAPI_H_ */ -- cgit v1.2.3 From cf66ba58b5cb8b1526e9dd2fb96ff8db048d4d44 Mon Sep 17 00:00:00 2001 From: Koki Sanagi Date: Mon, 23 Aug 2010 18:45:02 +0900 Subject: netdev: Add tracepoints to netdev layer This patch adds tracepoint to dev_queue_xmit, dev_hard_start_xmit, netif_rx and netif_receive_skb. These tracepoints help you to monitor network driver's input/output. -0 [001] 112447.902030: netif_rx: dev=eth1 skbaddr=f3ef0900 len=84 -0 [001] 112447.902039: netif_receive_skb: dev=eth1 skbaddr=f3ef0900 len=84 sshd-6828 [000] 112447.903257: net_dev_queue: dev=eth4 skbaddr=f3fca538 len=226 sshd-6828 [000] 112447.903260: net_dev_xmit: dev=eth4 skbaddr=f3fca538 len=226 rc=0 Signed-off-by: Koki Sanagi Acked-by: David S. Miller Acked-by: Neil Horman Cc: Mathieu Desnoyers Cc: Kaneshige Kenji Cc: Izumo Taku Cc: Kosaki Motohiro Cc: Lai Jiangshan Cc: Scott Mcmillan Cc: Steven Rostedt Cc: Eric Dumazet LKML-Reference: <4C72431E.3000901@jp.fujitsu.com> Signed-off-by: Frederic Weisbecker --- include/trace/events/net.h | 82 ++++++++++++++++++++++++++++++++++++++++++++++ net/core/dev.c | 6 ++++ net/core/net-traces.c | 1 + 3 files changed, 89 insertions(+) create mode 100644 include/trace/events/net.h (limited to 'include') diff --git a/include/trace/events/net.h b/include/trace/events/net.h new file mode 100644 index 000000000000..5f247f5ffc56 --- /dev/null +++ b/include/trace/events/net.h @@ -0,0 +1,82 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM net + +#if !defined(_TRACE_NET_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NET_H + +#include +#include +#include +#include + +TRACE_EVENT(net_dev_xmit, + + TP_PROTO(struct sk_buff *skb, + int rc), + + TP_ARGS(skb, rc), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned int, len ) + __field( int, rc ) + __string( name, skb->dev->name ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + __entry->len = skb->len; + __entry->rc = rc; + __assign_str(name, skb->dev->name); + ), + + TP_printk("dev=%s skbaddr=%p len=%u rc=%d", + __get_str(name), __entry->skbaddr, __entry->len, __entry->rc) +); + +DECLARE_EVENT_CLASS(net_dev_template, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned int, len ) + __string( name, skb->dev->name ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + __entry->len = skb->len; + __assign_str(name, skb->dev->name); + ), + + TP_printk("dev=%s skbaddr=%p len=%u", + __get_str(name), __entry->skbaddr, __entry->len) +) + +DEFINE_EVENT(net_dev_template, net_dev_queue, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb) +); + +DEFINE_EVENT(net_dev_template, netif_receive_skb, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb) +); + +DEFINE_EVENT(net_dev_template, netif_rx, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb) +); +#endif /* _TRACE_NET_H */ + +/* This part must be outside protection */ +#include diff --git a/net/core/dev.c b/net/core/dev.c index 3721fbb9a83c..5a4fbc7405e2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -128,6 +128,7 @@ #include #include #include +#include #include #include "net-sysfs.h" @@ -1978,6 +1979,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, } rc = ops->ndo_start_xmit(skb, dev); + trace_net_dev_xmit(skb, rc); if (rc == NETDEV_TX_OK) txq_trans_update(txq); return rc; @@ -1998,6 +2000,7 @@ gso: skb_dst_drop(nskb); rc = ops->ndo_start_xmit(nskb, dev); + trace_net_dev_xmit(nskb, rc); if (unlikely(rc != NETDEV_TX_OK)) { if (rc & ~NETDEV_TX_MASK) goto out_kfree_gso_skb; @@ -2186,6 +2189,7 @@ int dev_queue_xmit(struct sk_buff *skb) #ifdef CONFIG_NET_CLS_ACT skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); #endif + trace_net_dev_queue(skb); if (q->enqueue) { rc = __dev_xmit_skb(skb, q, dev, txq); goto out; @@ -2512,6 +2516,7 @@ int netif_rx(struct sk_buff *skb) if (netdev_tstamp_prequeue) net_timestamp_check(skb); + trace_netif_rx(skb); #ifdef CONFIG_RPS { struct rps_dev_flow voidflow, *rflow = &voidflow; @@ -2828,6 +2833,7 @@ static int __netif_receive_skb(struct sk_buff *skb) if (!netdev_tstamp_prequeue) net_timestamp_check(skb); + trace_netif_receive_skb(skb); if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) return NET_RX_SUCCESS; diff --git a/net/core/net-traces.c b/net/core/net-traces.c index afa6380ed88a..7f1bb2aba03b 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -26,6 +26,7 @@ #define CREATE_TRACE_POINTS #include +#include #include EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); -- cgit v1.2.3 From 07dc22e7295f25526f110d704655ff0ea7687420 Mon Sep 17 00:00:00 2001 From: Koki Sanagi Date: Mon, 23 Aug 2010 18:46:12 +0900 Subject: skb: Add tracepoints to freeing skb This patch adds tracepoint to consume_skb and add trace_kfree_skb before __kfree_skb in skb_free_datagram_locked and net_tx_action. Combinating with tracepoint on dev_hard_start_xmit, we can check how long it takes to free transmitted packets. And using it, we can calculate how many packets driver had at that time. It is useful when a drop of transmitted packet is a problem. sshd-6828 [000] 112689.258154: consume_skb: skbaddr=f2d99bb8 Signed-off-by: Koki Sanagi Acked-by: David S. Miller Acked-by: Neil Horman Cc: Mathieu Desnoyers Cc: Kaneshige Kenji Cc: Izumo Taku Cc: Kosaki Motohiro Cc: Lai Jiangshan Cc: Scott Mcmillan Cc: Steven Rostedt Cc: Eric Dumazet LKML-Reference: <4C724364.50903@jp.fujitsu.com> Signed-off-by: Frederic Weisbecker --- include/trace/events/skb.h | 17 +++++++++++++++++ net/core/datagram.c | 1 + net/core/dev.c | 2 ++ net/core/skbuff.c | 1 + 4 files changed, 21 insertions(+) (limited to 'include') diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 4b2be6dc76f0..75ce9d500d8e 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -35,6 +35,23 @@ TRACE_EVENT(kfree_skb, __entry->skbaddr, __entry->protocol, __entry->location) ); +TRACE_EVENT(consume_skb, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + ), + + TP_printk("skbaddr=%p", __entry->skbaddr) +); + TRACE_EVENT(skb_copy_datagram_iovec, TP_PROTO(const struct sk_buff *skb, int len), diff --git a/net/core/datagram.c b/net/core/datagram.c index 251997a95483..282806ba7a57 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -243,6 +243,7 @@ void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) unlock_sock_fast(sk, slow); /* skb is now orphaned, can be freed outside of locked section */ + trace_kfree_skb(skb, skb_free_datagram_locked); __kfree_skb(skb); } EXPORT_SYMBOL(skb_free_datagram_locked); diff --git a/net/core/dev.c b/net/core/dev.c index 5a4fbc7405e2..2308cce48048 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -129,6 +129,7 @@ #include #include #include +#include #include #include "net-sysfs.h" @@ -2576,6 +2577,7 @@ static void net_tx_action(struct softirq_action *h) clist = clist->next; WARN_ON(atomic_read(&skb->users)); + trace_kfree_skb(skb, net_tx_action); __kfree_skb(skb); } } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3a2513f0d0c3..12e61e351d0e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -466,6 +466,7 @@ void consume_skb(struct sk_buff *skb) smp_rmb(); else if (likely(!atomic_dec_and_test(&skb->users))) return; + trace_consume_skb(skb); __kfree_skb(skb); } EXPORT_SYMBOL(consume_skb); -- cgit v1.2.3 From 269cddd44e3588d1c50a7ec055b78de4d6c72cb6 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 7 Sep 2010 14:17:10 -0500 Subject: dlm: Fix dlm lock status block comment in dlm.h There is only one place in the dlm where the sb_status is set and that is queue_cast(). Tracing back the callers of that function shows that the listed set of return values is out of date, so here are an updated set. Signed-off-by: Steven Whitehouse Signed-off-by: David Teigland --- include/linux/dlm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dlm.h b/include/linux/dlm.h index 0b3518c42356..d4e02f5353a0 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -48,10 +48,10 @@ typedef void dlm_lockspace_t; * * 0 if lock request was successful * -EAGAIN if request would block and is flagged DLM_LKF_NOQUEUE - * -ENOMEM if there is no memory to process request - * -EINVAL if there are invalid parameters * -DLM_EUNLOCK if unlock request was successful * -DLM_ECANCEL if a cancel completed successfully + * -EDEADLK if a deadlock was detected + * -ETIMEDOUT if the lock request was canceled due to a timeout */ #define DLM_SBF_DEMOTED 0x01 -- cgit v1.2.3 From 17cebf658e088935d4bdebfc7ad9800e9fc4a0b2 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 12 Aug 2010 16:55:22 +1000 Subject: sunrpc: extract some common sunrpc_cache code from nfsd Rather can duplicating this idiom twice, put it in an inline function. This reduces the usage of 'expiry_time' out side the sunrpc/cache.c code and thus the impact of a change that is about to be made to that field. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 9 +++------ include/linux/sunrpc/cache.h | 6 ++++++ 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index c2a4f71d87dd..e56827b88fd2 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -935,10 +935,9 @@ static void exp_fsid_unhash(struct svc_export *exp) ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid); if (!IS_ERR(ek)) { - ek->h.expiry_time = get_seconds()-1; + sunrpc_invalidate(&ek->h, &svc_expkey_cache); cache_put(&ek->h, &svc_expkey_cache); } - svc_expkey_cache.nextcheck = get_seconds(); } static int exp_fsid_hash(svc_client *clp, struct svc_export *exp) @@ -973,10 +972,9 @@ static void exp_unhash(struct svc_export *exp) ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino); if (!IS_ERR(ek)) { - ek->h.expiry_time = get_seconds()-1; + sunrpc_invalidate(&ek->h, &svc_expkey_cache); cache_put(&ek->h, &svc_expkey_cache); } - svc_expkey_cache.nextcheck = get_seconds(); } /* @@ -1097,8 +1095,7 @@ out: static void exp_do_unexport(svc_export *unexp) { - unexp->h.expiry_time = get_seconds()-1; - svc_export_cache.nextcheck = get_seconds(); + sunrpc_invalidate(&unexp->h, &svc_export_cache); exp_unhash(unexp); exp_fsid_unhash(unexp); } diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 7bf3e84b92f4..0e1febf4e5bc 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -228,4 +228,10 @@ static inline time_t get_expiry(char **bpp) return rv; } +static inline void sunrpc_invalidate(struct cache_head *h, + struct cache_detail *detail) +{ + h->expiry_time = get_seconds() - 1; + detail->nextcheck = get_seconds(); +} #endif /* _LINUX_SUNRPC_CACHE_H_ */ -- cgit v1.2.3 From c5b29f885afe890f953f7f23424045cdad31d3e4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 12 Aug 2010 16:55:22 +1000 Subject: sunrpc: use seconds since boot in expiry cache This protects us from confusion when the wallclock time changes. We convert to and from wallclock when setting or reading expiry times. Also use seconds since boot for last_clost time. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- fs/nfs/dns_resolve.c | 6 +++--- fs/nfsd/nfs4idmap.c | 2 +- include/linux/sunrpc/cache.h | 28 +++++++++++++++++++++++++--- net/sunrpc/cache.c | 36 +++++++++++++++++++----------------- 4 files changed, 48 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index dba50a5625db..a6e711ad130f 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -167,7 +167,7 @@ static int nfs_dns_show(struct seq_file *m, struct cache_detail *cd, return 0; } item = container_of(h, struct nfs_dns_ent, h); - ttl = (long)item->h.expiry_time - (long)get_seconds(); + ttl = item->h.expiry_time - seconds_since_boot(); if (ttl < 0) ttl = 0; @@ -239,7 +239,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) ttl = get_expiry(&buf); if (ttl == 0) goto out; - key.h.expiry_time = ttl + get_seconds(); + key.h.expiry_time = ttl + seconds_since_boot(); ret = -ENOMEM; item = nfs_dns_lookup(cd, &key); @@ -301,7 +301,7 @@ static int do_cache_lookup_nowait(struct cache_detail *cd, goto out_err; ret = -ETIMEDOUT; if (!test_bit(CACHE_VALID, &(*item)->h.flags) - || (*item)->h.expiry_time < get_seconds() + || (*item)->h.expiry_time < seconds_since_boot() || cd->flush_time > (*item)->h.last_refresh) goto out_put; ret = -ENOENT; diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index c78dbf493424..808b33a4a090 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -550,7 +550,7 @@ do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *), goto out_err; ret = -ETIMEDOUT; if (!test_bit(CACHE_VALID, &(*item)->h.flags) - || (*item)->h.expiry_time < get_seconds() + || (*item)->h.expiry_time < seconds_since_boot() || detail->flush_time > (*item)->h.last_refresh) goto out_put; ret = -ENOENT; diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 0e1febf4e5bc..ece432b7f87f 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -218,20 +218,42 @@ static inline int get_int(char **bpp, int *anint) return 0; } +/* + * timestamps kept in the cache are expressed in seconds + * since boot. This is the best for measuring differences in + * real time. + */ +static inline time_t seconds_since_boot(void) +{ + struct timespec boot; + getboottime(&boot); + return get_seconds() - boot.tv_sec; +} + +static inline time_t convert_to_wallclock(time_t sinceboot) +{ + struct timespec boot; + getboottime(&boot); + return boot.tv_sec + sinceboot; +} + static inline time_t get_expiry(char **bpp) { int rv; + struct timespec boot; + if (get_int(bpp, &rv)) return 0; if (rv < 0) return 0; - return rv; + getboottime(&boot); + return rv - boot.tv_sec; } static inline void sunrpc_invalidate(struct cache_head *h, struct cache_detail *detail) { - h->expiry_time = get_seconds() - 1; - detail->nextcheck = get_seconds(); + h->expiry_time = seconds_since_boot() - 1; + detail->nextcheck = seconds_since_boot(); } #endif /* _LINUX_SUNRPC_CACHE_H_ */ diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 2b06410e584e..8dc121955fdc 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -42,7 +42,7 @@ static void cache_revisit_request(struct cache_head *item); static void cache_init(struct cache_head *h) { - time_t now = get_seconds(); + time_t now = seconds_since_boot(); h->next = NULL; h->flags = 0; kref_init(&h->ref); @@ -52,7 +52,7 @@ static void cache_init(struct cache_head *h) static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) { - return (h->expiry_time < get_seconds()) || + return (h->expiry_time < seconds_since_boot()) || (detail->flush_time > h->last_refresh); } @@ -127,7 +127,7 @@ static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch); static void cache_fresh_locked(struct cache_head *head, time_t expiry) { head->expiry_time = expiry; - head->last_refresh = get_seconds(); + head->last_refresh = seconds_since_boot(); set_bit(CACHE_VALID, &head->flags); } @@ -238,7 +238,7 @@ int cache_check(struct cache_detail *detail, /* now see if we want to start an upcall */ refresh_age = (h->expiry_time - h->last_refresh); - age = get_seconds() - h->last_refresh; + age = seconds_since_boot() - h->last_refresh; if (rqstp == NULL) { if (rv == -EAGAIN) @@ -253,7 +253,7 @@ int cache_check(struct cache_detail *detail, cache_revisit_request(h); if (rv == -EAGAIN) { set_bit(CACHE_NEGATIVE, &h->flags); - cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY); + cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY); cache_fresh_unlocked(h, detail); rv = -ENOENT; } @@ -388,11 +388,11 @@ static int cache_clean(void) return -1; } current_detail = list_entry(next, struct cache_detail, others); - if (current_detail->nextcheck > get_seconds()) + if (current_detail->nextcheck > seconds_since_boot()) current_index = current_detail->hash_size; else { current_index = 0; - current_detail->nextcheck = get_seconds()+30*60; + current_detail->nextcheck = seconds_since_boot()+30*60; } } @@ -477,7 +477,7 @@ EXPORT_SYMBOL_GPL(cache_flush); void cache_purge(struct cache_detail *detail) { detail->flush_time = LONG_MAX; - detail->nextcheck = get_seconds(); + detail->nextcheck = seconds_since_boot(); cache_flush(); detail->flush_time = 1; } @@ -902,7 +902,7 @@ static int cache_release(struct inode *inode, struct file *filp, filp->private_data = NULL; kfree(rp); - cd->last_close = get_seconds(); + cd->last_close = seconds_since_boot(); atomic_dec(&cd->readers); } module_put(cd->owner); @@ -1034,7 +1034,7 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, int len; if (atomic_read(&detail->readers) == 0 && - detail->last_close < get_seconds() - 30) { + detail->last_close < seconds_since_boot() - 30) { warn_no_listener(detail); return -EINVAL; } @@ -1219,7 +1219,8 @@ static int c_show(struct seq_file *m, void *p) ifdebug(CACHE) seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n", - cp->expiry_time, atomic_read(&cp->ref.refcount), cp->flags); + convert_to_wallclock(cp->expiry_time), + atomic_read(&cp->ref.refcount), cp->flags); cache_get(cp); if (cache_check(cd, cp, NULL)) /* cache_check does a cache_put on failure */ @@ -1285,7 +1286,7 @@ static ssize_t read_flush(struct file *file, char __user *buf, unsigned long p = *ppos; size_t len; - sprintf(tbuf, "%lu\n", cd->flush_time); + sprintf(tbuf, "%lu\n", convert_to_wallclock(cd->flush_time)); len = strlen(tbuf); if (p >= len) return 0; @@ -1303,19 +1304,20 @@ static ssize_t write_flush(struct file *file, const char __user *buf, struct cache_detail *cd) { char tbuf[20]; - char *ep; - long flushtime; + char *bp, *ep; + if (*ppos || count > sizeof(tbuf)-1) return -EINVAL; if (copy_from_user(tbuf, buf, count)) return -EFAULT; tbuf[count] = 0; - flushtime = simple_strtoul(tbuf, &ep, 0); + simple_strtoul(tbuf, &ep, 0); if (*ep && *ep != '\n') return -EINVAL; - cd->flush_time = flushtime; - cd->nextcheck = get_seconds(); + bp = tbuf; + cd->flush_time = get_expiry(&bp); + cd->nextcheck = seconds_since_boot(); cache_flush(); *ppos += count; -- cgit v1.2.3 From f16b6e8d838b2e2bb4561201311c66ac02ad67df Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 12 Aug 2010 17:04:06 +1000 Subject: sunrpc/cache: allow threads to block while waiting for cache update. The current practice of waiting for cache updates by queueing the whole request to be retried has (at least) two problems. 1/ With NFSv4, requests can be quite complex and re-trying a whole request when a later part fails should only be a last-resort, not a normal practice. 2/ Large requests, and in particular any 'write' request, will not be queued by the current code and doing so would be undesirable. In many cases only a very sort wait is needed before the cache gets valid data. So, providing the underlying transport permits it by setting ->thread_wait, arrange to wait briefly for an upcall to be completed (as reflected in the clearing of CACHE_PENDING). If the short wait was not long enough and CACHE_PENDING is still set, fall back on the old approach. The 'thread_wait' value is set to 5 seconds when there are spare threads, and 1 second when there are no spare threads. These values are probably much higher than needed, but will ensure some forward progress. Note that as we only request an update for a non-valid item, and as non-valid items are updated in place it is extremely unlikely that cache_check will return -ETIMEDOUT. Normally cache_defer_req will sleep for a short while and then find that the item is_valid. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 3 +++ net/sunrpc/cache.c | 59 +++++++++++++++++++++++++++++++++++++++++++- net/sunrpc/svc_xprt.c | 11 +++++++++ 3 files changed, 72 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index ece432b7f87f..52a7d7224e90 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -125,6 +125,9 @@ struct cache_detail { */ struct cache_req { struct cache_deferred_req *(*defer)(struct cache_req *req); + int thread_wait; /* How long (jiffies) we can block the + * current thread to wait for updates. + */ }; /* this must be embedded in a deferred_request that is being * delayed awaiting cache-fill diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 8dc121955fdc..2c5297f245b4 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -509,10 +509,22 @@ static LIST_HEAD(cache_defer_list); static struct list_head cache_defer_hash[DFR_HASHSIZE]; static int cache_defer_cnt; +struct thread_deferred_req { + struct cache_deferred_req handle; + struct completion completion; +}; +static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many) +{ + struct thread_deferred_req *dr = + container_of(dreq, struct thread_deferred_req, handle); + complete(&dr->completion); +} + static int cache_defer_req(struct cache_req *req, struct cache_head *item) { struct cache_deferred_req *dreq, *discard; int hash = DFR_HASH(item); + struct thread_deferred_req sleeper; if (cache_defer_cnt >= DFR_MAX) { /* too much in the cache, randomly drop this one, @@ -521,7 +533,15 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item) if (net_random()&1) return -ENOMEM; } - dreq = req->defer(req); + if (req->thread_wait) { + dreq = &sleeper.handle; + sleeper.completion = + COMPLETION_INITIALIZER_ONSTACK(sleeper.completion); + dreq->revisit = cache_restart_thread; + } else + dreq = req->defer(req); + + retry: if (dreq == NULL) return -ENOMEM; @@ -555,6 +575,43 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item) cache_revisit_request(item); return -EAGAIN; } + + if (dreq == &sleeper.handle) { + if (wait_for_completion_interruptible_timeout( + &sleeper.completion, req->thread_wait) <= 0) { + /* The completion wasn't completed, so we need + * to clean up + */ + spin_lock(&cache_defer_lock); + if (!list_empty(&sleeper.handle.hash)) { + list_del_init(&sleeper.handle.recent); + list_del_init(&sleeper.handle.hash); + cache_defer_cnt--; + spin_unlock(&cache_defer_lock); + } else { + /* cache_revisit_request already removed + * this from the hash table, but hasn't + * called ->revisit yet. It will very soon + * and we need to wait for it. + */ + spin_unlock(&cache_defer_lock); + wait_for_completion(&sleeper.completion); + } + } + if (test_bit(CACHE_PENDING, &item->flags)) { + /* item is still pending, try request + * deferral + */ + dreq = req->defer(req); + goto retry; + } + /* only return success if we actually deferred the + * request. In this case we waited until it was + * answered so no deferral has happened - rather + * an answer already exists. + */ + return -EEXIST; + } return 0; } diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index cbc084939dd8..8ff6840866fa 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -651,6 +651,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (signalled() || kthread_should_stop()) return -EINTR; + /* Normally we will wait up to 5 seconds for any required + * cache information to be provided. + */ + rqstp->rq_chandle.thread_wait = 5*HZ; + spin_lock_bh(&pool->sp_lock); xprt = svc_xprt_dequeue(pool); if (xprt) { @@ -658,6 +663,12 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) svc_xprt_get(xprt); rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); + + /* As there is a shortage of threads and this request + * had to be queue, don't allow the thread to wait so + * long for cache updates. + */ + rqstp->rq_chandle.thread_wait = 1*HZ; } else { /* No data pending. Go to sleep */ svc_thread_enqueue(pool, rqstp); -- cgit v1.2.3 From 4f8b02b4e5c6896e073bed736136d420bd44b627 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 18:22:47 +0200 Subject: vmalloc: pcpu_get/free_vm_areas() aren't needed on UP These functions are used only by percpu memory allocator on SMP. Don't build them on UP. Signed-off-by: Tejun Heo Cc: Nick Piggin Reviewed-by: Chrsitoph Lameter --- include/linux/vmalloc.h | 2 ++ mm/vmalloc.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 01c2145118dc..63a4fe6d51bd 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -117,10 +117,12 @@ extern rwlock_t vmlist_lock; extern struct vm_struct *vmlist; extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); +#ifdef CONFIG_SMP struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, const size_t *sizes, int nr_vms, size_t align, gfp_t gfp_mask); void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms); +#endif #endif /* _LINUX_VMALLOC_H */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6b8889da69a6..c623e0ce3f00 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2056,6 +2056,7 @@ void free_vm_area(struct vm_struct *area) } EXPORT_SYMBOL_GPL(free_vm_area); +#ifdef CONFIG_SMP static struct vmap_area *node_to_va(struct rb_node *n) { return n ? rb_entry(n, struct vmap_area, rb_node) : NULL; @@ -2336,6 +2337,7 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) free_vm_area(vms[i]); kfree(vms); } +#endif /* CONFIG_SMP */ #ifdef CONFIG_PROC_FS static void *s_start(struct seq_file *m, loff_t *pos) -- cgit v1.2.3 From 6abad5acac09921f4944af77d3860f82d49f528d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 18:22:47 +0200 Subject: percpu: reduce PCPU_MIN_UNIT_SIZE to 32k In preparation of enabling percpu allocator for UP, reduce PCPU_MIN_UNIT_SIZE to 32k. On UP, the first chunk doesn't have to include static percpu variables and chunk size can be smaller which is important as UP percpu allocator will use contiguous kernel memory to populate chunks. PCPU_MIN_UNIT_SIZE also determines the maximum supported allocation size but 32k should still be enough. Signed-off-by: Tejun Heo Reviewed-by: Christoph Lameter --- include/linux/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 49466b13c5c6..fc8130a7cac0 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -42,7 +42,7 @@ #ifdef CONFIG_SMP /* minimum unit size, also is the maximum supported allocation size */ -#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) +#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) /* * Percpu allocator can serve percpu allocations before slab is -- cgit v1.2.3 From bbddff0545878a8649c091a9dd7c43ce91516734 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 18:22:48 +0200 Subject: percpu: use percpu allocator on UP too On UP, percpu allocations were redirected to kmalloc. This has the following problems. * For certain amount of allocations (determined by PERCPU_DYNAMIC_EARLY_SLOTS and PERCPU_DYNAMIC_EARLY_SIZE), percpu allocator can be used before the usual kernel memory allocator is brought online. On SMP, this is used to initialize the kernel memory allocator. * percpu allocator honors alignment upto PAGE_SIZE but kmalloc() doesn't. For example, workqueue makes use of larger alignments for cpu_workqueues. Currently, users of percpu allocators need to handle UP differently, which is somewhat fragile and ugly. Other than small amount of memory, there isn't much to lose by enabling percpu allocator on UP. It can simply use kernel memory based chunk allocation which was added for SMP archs w/o MMUs. This patch removes mm/percpu_up.c, builds mm/percpu.c on UP too and makes UP build use percpu-km. As percpu addresses and kernel addresses are always identity mapped and static percpu variables don't need any special treatment, nothing is arch dependent and mm/percpu.c implements generic setup_per_cpu_areas() for UP. Signed-off-by: Tejun Heo Reviewed-by: Christoph Lameter Acked-by: Pekka Enberg --- include/linux/percpu.h | 29 +++++------------------- mm/Kconfig | 8 +++++++ mm/Makefile | 7 +----- mm/percpu-km.c | 2 +- mm/percpu.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++---- mm/percpu_up.c | 30 ------------------------- 6 files changed, 71 insertions(+), 65 deletions(-) delete mode 100644 mm/percpu_up.c (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index fc8130a7cac0..aeeeef1093cd 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -39,8 +39,6 @@ preempt_enable(); \ } while (0) -#ifdef CONFIG_SMP - /* minimum unit size, also is the maximum supported allocation size */ #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) @@ -137,37 +135,20 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, * dynamically allocated. Non-atomic access to the current CPU's * version should probably be combined with get_cpu()/put_cpu(). */ +#ifdef CONFIG_SMP #define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) +#else +#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); }) +#endif extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); extern bool is_kernel_percpu_address(unsigned long addr); -#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA +#if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) extern void __init setup_per_cpu_areas(void); #endif extern void __init percpu_init_late(void); -#else /* CONFIG_SMP */ - -#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); }) - -/* can't distinguish from other static vars, always false */ -static inline bool is_kernel_percpu_address(unsigned long addr) -{ - return false; -} - -static inline void __init setup_per_cpu_areas(void) { } - -static inline void __init percpu_init_late(void) { } - -static inline void *pcpu_lpage_remapped(void *kaddr) -{ - return NULL; -} - -#endif /* CONFIG_SMP */ - extern void __percpu *__alloc_percpu(size_t size, size_t align); extern void free_percpu(void __percpu *__pdata); extern phys_addr_t per_cpu_ptr_to_phys(void *addr); diff --git a/mm/Kconfig b/mm/Kconfig index f4e516e9c37c..01a57447a410 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -301,3 +301,11 @@ config NOMMU_INITIAL_TRIM_EXCESS of 1 says that all excess pages should be trimmed. See Documentation/nommu-mmap.txt for more information. + +# +# UP and nommu archs use km based percpu allocator +# +config NEED_PER_CPU_KM + depends on !SMP + bool + default y diff --git a/mm/Makefile b/mm/Makefile index 34b2546a9e37..f73f75a29f82 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -11,7 +11,7 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ maccess.o page_alloc.o page-writeback.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ - page_isolation.o mm_init.o mmu_context.o \ + page_isolation.o mm_init.o mmu_context.o percpu.o \ $(mmu-y) obj-y += init-mm.o @@ -36,11 +36,6 @@ obj-$(CONFIG_FAILSLAB) += failslab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o -ifdef CONFIG_SMP -obj-y += percpu.o -else -obj-y += percpu_up.o -endif obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o diff --git a/mm/percpu-km.c b/mm/percpu-km.c index df680855540a..7037bc73bfa4 100644 --- a/mm/percpu-km.c +++ b/mm/percpu-km.c @@ -27,7 +27,7 @@ * chunk size is not aligned. percpu-km code will whine about it. */ -#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK +#if defined(CONFIG_SMP) && defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK) #error "contiguous percpu allocation is incompatible with paged first chunk" #endif diff --git a/mm/percpu.c b/mm/percpu.c index 58c572b18b07..fa70122dfdd0 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -76,6 +76,7 @@ #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ +#ifdef CONFIG_SMP /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ #ifndef __addr_to_pcpu_ptr #define __addr_to_pcpu_ptr(addr) \ @@ -89,6 +90,11 @@ (unsigned long)pcpu_base_addr - \ (unsigned long)__per_cpu_start) #endif +#else /* CONFIG_SMP */ +/* on UP, it's always identity mapped */ +#define __addr_to_pcpu_ptr(addr) (void __percpu *)(addr) +#define __pcpu_ptr_to_addr(ptr) (void __force *)(ptr) +#endif /* CONFIG_SMP */ struct pcpu_chunk { struct list_head list; /* linked to pcpu_slot lists */ @@ -949,6 +955,7 @@ EXPORT_SYMBOL_GPL(free_percpu); */ bool is_kernel_percpu_address(unsigned long addr) { +#ifdef CONFIG_SMP const size_t static_size = __per_cpu_end - __per_cpu_start; void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); unsigned int cpu; @@ -959,6 +966,8 @@ bool is_kernel_percpu_address(unsigned long addr) if ((void *)addr >= start && (void *)addr < start + static_size) return true; } +#endif + /* on UP, can't distinguish from other static vars, always false */ return false; } @@ -1066,6 +1075,8 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai) free_bootmem(__pa(ai), ai->__ai_size); } +#if defined(CONFIG_SMP) && (defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \ + defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)) /** * pcpu_build_alloc_info - build alloc_info considering distances between CPUs * @reserved_size: the size of reserved percpu area in bytes @@ -1220,6 +1231,8 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info( return ai; } +#endif /* CONFIG_SMP && (CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || + CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK) */ /** * pcpu_dump_alloc_info - print out information about pcpu_alloc_info @@ -1363,7 +1376,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, /* sanity checks */ PCPU_SETUP_BUG_ON(ai->nr_groups <= 0); +#ifdef CONFIG_SMP PCPU_SETUP_BUG_ON(!ai->static_size); +#endif PCPU_SETUP_BUG_ON(!base_addr); PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK); @@ -1488,6 +1503,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, return 0; } +#ifdef CONFIG_SMP + const char *pcpu_fc_names[PCPU_FC_NR] __initdata = { [PCPU_FC_AUTO] = "auto", [PCPU_FC_EMBED] = "embed", @@ -1758,8 +1775,9 @@ out_free_ar: } #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ +#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA /* - * Generic percpu area setup. + * Generic SMP percpu area setup. * * The embedding helper is used because its behavior closely resembles * the original non-dynamic generic percpu area setup. This is @@ -1770,7 +1788,6 @@ out_free_ar: * on the physical linear memory mapping which uses large page * mappings on applicable archs. */ -#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); @@ -1799,13 +1816,48 @@ void __init setup_per_cpu_areas(void) PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL, pcpu_dfl_fc_alloc, pcpu_dfl_fc_free); if (rc < 0) - panic("Failed to initialized percpu areas."); + panic("Failed to initialize percpu areas."); delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; } -#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ +#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ + +#else /* CONFIG_SMP */ + +/* + * UP percpu area setup. + * + * UP always uses km-based percpu allocator with identity mapping. + * Static percpu variables are indistinguishable from the usual static + * variables and don't require any special preparation. + */ +void __init setup_per_cpu_areas(void) +{ + const size_t unit_size = + roundup_pow_of_two(max_t(size_t, PCPU_MIN_UNIT_SIZE, + PERCPU_DYNAMIC_RESERVE)); + struct pcpu_alloc_info *ai; + void *fc; + + ai = pcpu_alloc_alloc_info(1, 1); + fc = __alloc_bootmem(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); + if (!ai || !fc) + panic("Failed to allocate memory for percpu areas."); + + ai->dyn_size = unit_size; + ai->unit_size = unit_size; + ai->atom_size = unit_size; + ai->alloc_size = unit_size; + ai->groups[0].nr_units = 1; + ai->groups[0].cpu_map[0] = 0; + + if (pcpu_setup_first_chunk(ai, fc) < 0) + panic("Failed to initialize percpu areas."); +} + +#endif /* CONFIG_SMP */ /* * First and reserved chunks are initialized with temporary allocation diff --git a/mm/percpu_up.c b/mm/percpu_up.c deleted file mode 100644 index db884fae5721..000000000000 --- a/mm/percpu_up.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * mm/percpu_up.c - dummy percpu memory allocator implementation for UP - */ - -#include -#include -#include - -void __percpu *__alloc_percpu(size_t size, size_t align) -{ - /* - * Can't easily make larger alignment work with kmalloc. WARN - * on it. Larger alignment should only be used for module - * percpu sections on SMP for which this path isn't used. - */ - WARN_ON_ONCE(align > SMP_CACHE_BYTES); - return (void __percpu __force *)kzalloc(size, GFP_KERNEL); -} -EXPORT_SYMBOL_GPL(__alloc_percpu); - -void free_percpu(void __percpu *p) -{ - kfree(this_cpu_ptr(p)); -} -EXPORT_SYMBOL_GPL(free_percpu); - -phys_addr_t per_cpu_ptr_to_phys(void *addr) -{ - return __pa(addr); -} -- cgit v1.2.3 From 0ade638655f0ef4d807295c14a4c97544bd6b9ca Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 24 Aug 2010 22:18:41 +0200 Subject: intel-gtt: introduce drm/intel-gtt.h Add a few definitions to it that are already shared and that will be shared in the future (like the number of stolen entries). No functional changes in here. Signed-off-by: Daniel Vetter Signed-off-by: Chris Wilson --- drivers/char/agp/intel-gtt.c | 42 +++++++++++++++++++---------------------- drivers/gpu/drm/i915/i915_dma.c | 2 -- drivers/gpu/drm/i915/i915_drv.h | 1 + include/drm/intel-gtt.h | 18 ++++++++++++++++++ 4 files changed, 38 insertions(+), 25 deletions(-) create mode 100644 include/drm/intel-gtt.h (limited to 'include') diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 72f937615056..0a3e91ba0f2b 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -25,6 +25,7 @@ #include "agp.h" #include "intel-agp.h" #include +#include /* * If we have Intel graphics, we're not going to have anything other than @@ -81,17 +82,11 @@ static struct gatt_mask intel_gen6_masks[] = }; static struct _intel_private { + struct intel_gtt base; struct pci_dev *pcidev; /* device one */ u8 __iomem *registers; u32 __iomem *gtt; /* I915G */ int num_dcache_entries; - /* gtt_entries is the number of gtt entries that are already mapped - * to stolen memory. Stolen memory is larger than the memory mapped - * through gtt_entries, as it includes some reserved space for the BIOS - * popup and for the GTT. - */ - int gtt_entries; /* i830+ */ - int gtt_total_size; union { void __iomem *i9xx_flush_page; void *i8xx_flush_page; @@ -772,7 +767,7 @@ static void intel_i830_init_gtt_entries(void) gtt_entries = 0; } - intel_private.gtt_entries = gtt_entries; + intel_private.base.gtt_stolen_entries = gtt_entries; } static void intel_i830_fini_flush(void) @@ -849,7 +844,7 @@ static int intel_i830_create_gatt_table(struct agp_bridge_data *bridge) /* we have to call this as early as possible after the MMIO base address is known */ intel_i830_init_gtt_entries(); - if (intel_private.gtt_entries == 0) { + if (intel_private.base.gtt_stolen_entries == 0) { iounmap(intel_private.registers); return -ENOMEM; } @@ -919,7 +914,7 @@ static int intel_i830_configure(void) readl(intel_private.registers+I810_PGETBL_CTL); /* PCI Posting. */ if (agp_bridge->driver->needs_scratch_page) { - for (i = intel_private.gtt_entries; i < current_size->num_entries; i++) { + for (i = intel_private.base.gtt_stolen_entries; i < current_size->num_entries; i++) { writel(agp_bridge->scratch_page, intel_private.registers+I810_PTE_BASE+(i*4)); } readl(intel_private.registers+I810_PTE_BASE+((i-1)*4)); /* PCI Posting. */ @@ -950,10 +945,10 @@ static int intel_i830_insert_entries(struct agp_memory *mem, off_t pg_start, temp = agp_bridge->current_size; num_entries = A_SIZE_FIX(temp)->num_entries; - if (pg_start < intel_private.gtt_entries) { + if (pg_start < intel_private.base.gtt_stolen_entries) { dev_printk(KERN_DEBUG, &intel_private.pcidev->dev, - "pg_start == 0x%.8lx, intel_private.gtt_entries == 0x%.8x\n", - pg_start, intel_private.gtt_entries); + "pg_start == 0x%.8lx, gtt_stolen_entries == 0x%.8x\n", + pg_start, intel_private.base.gtt_stolen_entries); dev_info(&intel_private.pcidev->dev, "trying to insert into local/stolen memory\n"); @@ -1001,7 +996,7 @@ static int intel_i830_remove_entries(struct agp_memory *mem, off_t pg_start, if (mem->page_count == 0) return 0; - if (pg_start < intel_private.gtt_entries) { + if (pg_start < intel_private.base.gtt_stolen_entries) { dev_info(&intel_private.pcidev->dev, "trying to disable local/stolen memory\n"); return -EINVAL; @@ -1136,7 +1131,8 @@ static int intel_i9xx_configure(void) readl(intel_private.registers+I810_PGETBL_CTL); /* PCI Posting. */ if (agp_bridge->driver->needs_scratch_page) { - for (i = intel_private.gtt_entries; i < intel_private.gtt_total_size; i++) { + for (i = intel_private.base.gtt_stolen_entries; i < + intel_private.base.gtt_total_entries; i++) { writel(agp_bridge->scratch_page, intel_private.gtt+i); } readl(intel_private.gtt+i-1); /* PCI Posting. */ @@ -1181,10 +1177,10 @@ static int intel_i915_insert_entries(struct agp_memory *mem, off_t pg_start, temp = agp_bridge->current_size; num_entries = A_SIZE_FIX(temp)->num_entries; - if (pg_start < intel_private.gtt_entries) { + if (pg_start < intel_private.base.gtt_stolen_entries) { dev_printk(KERN_DEBUG, &intel_private.pcidev->dev, - "pg_start == 0x%.8lx, intel_private.gtt_entries == 0x%.8x\n", - pg_start, intel_private.gtt_entries); + "pg_start == 0x%.8lx, gtt_stolen_entries == 0x%.8x\n", + pg_start, intel_private.base.gtt_stolen_entries); dev_info(&intel_private.pcidev->dev, "trying to insert into local/stolen memory\n"); @@ -1227,7 +1223,7 @@ static int intel_i915_remove_entries(struct agp_memory *mem, off_t pg_start, if (mem->page_count == 0) return 0; - if (pg_start < intel_private.gtt_entries) { + if (pg_start < intel_private.base.gtt_stolen_entries) { dev_info(&intel_private.pcidev->dev, "trying to disable local/stolen memory\n"); return -EINVAL; @@ -1323,7 +1319,7 @@ static int intel_i915_create_gatt_table(struct agp_bridge_data *bridge) if (!intel_private.gtt) return -ENOMEM; - intel_private.gtt_total_size = gtt_map_size / 4; + intel_private.base.gtt_total_entries = gtt_map_size / 4; temp &= 0xfff80000; @@ -1338,7 +1334,7 @@ static int intel_i915_create_gatt_table(struct agp_bridge_data *bridge) /* we have to call this as early as possible after the MMIO base address is known */ intel_i830_init_gtt_entries(); - if (intel_private.gtt_entries == 0) { + if (intel_private.base.gtt_stolen_entries == 0) { iounmap(intel_private.gtt); iounmap(intel_private.registers); return -ENOMEM; @@ -1449,7 +1445,7 @@ static int intel_i965_create_gatt_table(struct agp_bridge_data *bridge) if (!intel_private.gtt) return -ENOMEM; - intel_private.gtt_total_size = gtt_size / 4; + intel_private.base.gtt_total_entries = gtt_size / 4; intel_private.registers = ioremap(temp, 128 * 4096); if (!intel_private.registers) { @@ -1462,7 +1458,7 @@ static int intel_i965_create_gatt_table(struct agp_bridge_data *bridge) /* we have to call this as early as possible after the MMIO base address is known */ intel_i830_init_gtt_entries(); - if (intel_private.gtt_entries == 0) { + if (intel_private.base.gtt_stolen_entries == 0) { iounmap(intel_private.gtt); iounmap(intel_private.registers); return -ENOMEM; diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 8d52f01a6d90..47228cb16901 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -42,8 +42,6 @@ #include #include -extern int intel_max_stolen; /* from AGP driver */ - /** * Sets up the hardware status page for devices that need a physical address * in the register. diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cfc8bfd0fd7e..d825ef207b2c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -34,6 +34,7 @@ #include "intel_bios.h" #include "intel_ringbuffer.h" #include +#include /* General customization: */ diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h new file mode 100644 index 000000000000..6769cb704e9b --- /dev/null +++ b/include/drm/intel-gtt.h @@ -0,0 +1,18 @@ +/* Common header for intel-gtt.ko and i915.ko */ + +#ifndef _DRM_INTEL_GTT_H +#define _DRM_INTEL_GTT_H +extern int intel_max_stolen; /* from AGP driver */ + +struct intel_gtt { + /* Number of stolen gtt entries at the beginning. */ + unsigned int gtt_stolen_entries; + /* Total number of gtt entries. */ + unsigned int gtt_total_entries; + /* Part of the gtt that is mappable by the cpu, for those chips where + * this is not the full gtt. */ + unsigned int gtt_mappable_entries; +}; + +#endif + -- cgit v1.2.3 From 19966754328d99ee003ddfc7a8c31ceb115483ac Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 6 Sep 2010 20:08:44 +0200 Subject: drm/i915: die, i915_probe_agp, die Use the detection from intel-gtt.ko instead. Hooray! Also move the stolen mem allocator to the other gtt stuff in dev_prv->mem. v2: Chris Wilson noted that my error handling was crap. Fix it. He also said that this fixes a problem on his i845. Indeed, i915_probe_agp misses a special case for i830/i845 stolen mem detection. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=25476 Cc: stable@kernel.org Signed-off-by: Daniel Vetter Signed-off-by: Chris Wilson --- drivers/char/agp/intel-gtt.c | 6 ++ drivers/gpu/drm/i915/i915_dma.c | 190 +++------------------------------------- drivers/gpu/drm/i915/i915_drv.h | 7 +- include/drm/intel-gtt.h | 2 + 4 files changed, 25 insertions(+), 180 deletions(-) (limited to 'include') diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 6eb64c19af0e..9cb7c98afb9c 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -1714,6 +1714,12 @@ int intel_gmch_probe(struct pci_dev *pdev, } EXPORT_SYMBOL(intel_gmch_probe); +struct intel_gtt *intel_gtt_get(void) +{ + return &intel_private.base; +} +EXPORT_SYMBOL(intel_gtt_get); + void intel_gmch_remove(struct pci_dev *pdev) { if (intel_private.pcidev) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index a693b27f3df4..428c75b466aa 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -989,172 +989,6 @@ intel_teardown_mchbar(struct drm_device *dev) release_resource(&dev_priv->mch_res); } -/** - * i915_probe_agp - get AGP bootup configuration - * @pdev: PCI device - * @aperture_size: returns AGP aperture configured size - * @preallocated_size: returns size of BIOS preallocated AGP space - * - * Since Intel integrated graphics are UMA, the BIOS has to set aside - * some RAM for the framebuffer at early boot. This code figures out - * how much was set aside so we can use it for our own purposes. - */ -static int i915_probe_agp(struct drm_device *dev, uint32_t *aperture_size, - uint32_t *preallocated_size) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - u16 tmp = 0; - unsigned long overhead; - unsigned long stolen; - - /* Get the fb aperture size and "stolen" memory amount. */ - pci_read_config_word(dev_priv->bridge_dev, INTEL_GMCH_CTRL, &tmp); - - *aperture_size = 1024 * 1024; - *preallocated_size = 1024 * 1024; - - switch (dev->pdev->device) { - case PCI_DEVICE_ID_INTEL_82830_CGC: - case PCI_DEVICE_ID_INTEL_82845G_IG: - case PCI_DEVICE_ID_INTEL_82855GM_IG: - case PCI_DEVICE_ID_INTEL_82865_IG: - if ((tmp & INTEL_GMCH_MEM_MASK) == INTEL_GMCH_MEM_64M) - *aperture_size *= 64; - else - *aperture_size *= 128; - break; - default: - /* 9xx supports large sizes, just look at the length */ - *aperture_size = pci_resource_len(dev->pdev, 2); - break; - } - - /* - * Some of the preallocated space is taken by the GTT - * and popup. GTT is 1K per MB of aperture size, and popup is 4K. - */ - if (IS_G4X(dev) || IS_PINEVIEW(dev) || IS_IRONLAKE(dev) || IS_GEN6(dev)) - overhead = 4096; - else - overhead = (*aperture_size / 1024) + 4096; - - if (IS_GEN6(dev)) { - /* SNB has memory control reg at 0x50.w */ - pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &tmp); - - switch (tmp & SNB_GMCH_GMS_STOLEN_MASK) { - case INTEL_855_GMCH_GMS_DISABLED: - DRM_ERROR("video memory is disabled\n"); - return -1; - case SNB_GMCH_GMS_STOLEN_32M: - stolen = 32 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_64M: - stolen = 64 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_96M: - stolen = 96 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_128M: - stolen = 128 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_160M: - stolen = 160 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_192M: - stolen = 192 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_224M: - stolen = 224 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_256M: - stolen = 256 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_288M: - stolen = 288 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_320M: - stolen = 320 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_352M: - stolen = 352 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_384M: - stolen = 384 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_416M: - stolen = 416 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_448M: - stolen = 448 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_480M: - stolen = 480 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_512M: - stolen = 512 * 1024 * 1024; - break; - default: - DRM_ERROR("unexpected GMCH_GMS value: 0x%02x\n", - tmp & SNB_GMCH_GMS_STOLEN_MASK); - return -1; - } - } else { - switch (tmp & INTEL_GMCH_GMS_MASK) { - case INTEL_855_GMCH_GMS_DISABLED: - DRM_ERROR("video memory is disabled\n"); - return -1; - case INTEL_855_GMCH_GMS_STOLEN_1M: - stolen = 1 * 1024 * 1024; - break; - case INTEL_855_GMCH_GMS_STOLEN_4M: - stolen = 4 * 1024 * 1024; - break; - case INTEL_855_GMCH_GMS_STOLEN_8M: - stolen = 8 * 1024 * 1024; - break; - case INTEL_855_GMCH_GMS_STOLEN_16M: - stolen = 16 * 1024 * 1024; - break; - case INTEL_855_GMCH_GMS_STOLEN_32M: - stolen = 32 * 1024 * 1024; - break; - case INTEL_915G_GMCH_GMS_STOLEN_48M: - stolen = 48 * 1024 * 1024; - break; - case INTEL_915G_GMCH_GMS_STOLEN_64M: - stolen = 64 * 1024 * 1024; - break; - case INTEL_GMCH_GMS_STOLEN_128M: - stolen = 128 * 1024 * 1024; - break; - case INTEL_GMCH_GMS_STOLEN_256M: - stolen = 256 * 1024 * 1024; - break; - case INTEL_GMCH_GMS_STOLEN_96M: - stolen = 96 * 1024 * 1024; - break; - case INTEL_GMCH_GMS_STOLEN_160M: - stolen = 160 * 1024 * 1024; - break; - case INTEL_GMCH_GMS_STOLEN_224M: - stolen = 224 * 1024 * 1024; - break; - case INTEL_GMCH_GMS_STOLEN_352M: - stolen = 352 * 1024 * 1024; - break; - default: - DRM_ERROR("unexpected GMCH_GMS value: 0x%02x\n", - tmp & INTEL_GMCH_GMS_MASK); - return -1; - } - } - - *preallocated_size = stolen - overhead; - - return 0; -} - #define PTE_ADDRESS_MASK 0xfffff000 #define PTE_ADDRESS_MASK_HIGH 0x000000f0 /* i915+ */ #define PTE_MAPPING_TYPE_UNCACHED (0 << 1) @@ -1249,7 +1083,7 @@ static void i915_setup_compression(struct drm_device *dev, int size) unsigned long ll_base = 0; /* Leave 1M for line length buffer & misc. */ - compressed_fb = drm_mm_search_free(&dev_priv->vram, size, 4096, 0); + compressed_fb = drm_mm_search_free(&dev_priv->mm.vram, size, 4096, 0); if (!compressed_fb) { dev_priv->no_fbc_reason = FBC_STOLEN_TOO_SMALL; i915_warn_stolen(dev); @@ -1270,7 +1104,7 @@ static void i915_setup_compression(struct drm_device *dev, int size) } if (!(IS_GM45(dev) || IS_IRONLAKE_M(dev))) { - compressed_llb = drm_mm_search_free(&dev_priv->vram, 4096, + compressed_llb = drm_mm_search_free(&dev_priv->mm.vram, 4096, 4096, 0); if (!compressed_llb) { i915_warn_stolen(dev); @@ -1366,8 +1200,8 @@ static int i915_load_modeset_init(struct drm_device *dev, struct drm_i915_private *dev_priv = dev->dev_private; int ret = 0; - /* Basic memrange allocator for stolen space (aka vram) */ - drm_mm_init(&dev_priv->vram, 0, prealloc_size); + /* Basic memrange allocator for stolen space (aka mm.vram) */ + drm_mm_init(&dev_priv->mm.vram, 0, prealloc_size); DRM_INFO("set up %ldM of stolen space\n", prealloc_size / (1024*1024)); /* We're off and running w/KMS */ @@ -2107,16 +1941,16 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) "performance may suffer.\n"); } - ret = i915_probe_agp(dev, &agp_size, &prealloc_size); - if (ret) + dev_priv->mm.gtt = intel_gtt_get(); + if (!dev_priv->mm.gtt) { + DRM_ERROR("Failed to initialize GTT\n"); + ret = -ENODEV; goto out_iomapfree; - - if (prealloc_size > intel_max_stolen) { - DRM_INFO("detected %dM stolen memory, trimming to %dM\n", - prealloc_size >> 20, intel_max_stolen >> 20); - prealloc_size = intel_max_stolen; } + prealloc_size = dev_priv->mm.gtt->gtt_stolen_entries << PAGE_SHIFT; + agp_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; + dev_priv->wq = create_singlethread_workqueue("i915"); if (dev_priv->wq == NULL) { DRM_ERROR("Failed to create our workqueue.\n"); @@ -2301,7 +2135,7 @@ int i915_driver_unload(struct drm_device *dev) mutex_unlock(&dev->struct_mutex); if (I915_HAS_FBC(dev) && i915_powersave) i915_cleanup_compression(dev); - drm_mm_takedown(&dev_priv->vram); + drm_mm_takedown(&dev_priv->mm.vram); intel_cleanup_overlay(dev); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d825ef207b2c..c8b22005ec18 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -305,8 +305,6 @@ typedef struct drm_i915_private { uint32_t last_instdone; uint32_t last_instdone1; - struct drm_mm vram; - unsigned long cfb_size; unsigned long cfb_pitch; int cfb_fence; @@ -511,6 +509,11 @@ typedef struct drm_i915_private { u32 saveMCHBAR_RENDER_STANDBY; struct { + /** Bridge to intel-gtt-ko */ + struct intel_gtt *gtt; + /** Memory allocator for GTT stolen memory */ + struct drm_mm vram; + /** Memory allocator for GTT */ struct drm_mm gtt_space; struct io_mapping *gtt_mapping; diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h index 6769cb704e9b..b3aa7ab72d09 100644 --- a/include/drm/intel-gtt.h +++ b/include/drm/intel-gtt.h @@ -14,5 +14,7 @@ struct intel_gtt { unsigned int gtt_mappable_entries; }; +struct intel_gtt *intel_gtt_get(void); + #endif -- cgit v1.2.3 From e3634169bcc0cce33c815865d62ab378739f7389 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 7 Sep 2010 05:55:38 +0000 Subject: include/net/raw.h: Convert raw_seq_private macro to inline Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/raw.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/raw.h b/include/net/raw.h index 43c57502659b..42ce6fe7a2d5 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -45,7 +45,10 @@ struct raw_iter_state { struct raw_hashinfo *h; }; -#define raw_seq_private(seq) ((struct raw_iter_state *)(seq)->private) +static inline struct raw_iter_state *raw_seq_private(struct seq_file *seq) +{ + return seq->private; +} void *raw_seq_start(struct seq_file *seq, loff_t *pos); void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos); void raw_seq_stop(struct seq_file *seq, void *v); -- cgit v1.2.3 From a6e0fc8514d41dfdd98b1d15cacc432cf040f8af Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Sep 2010 14:15:32 -0700 Subject: net: introduce rcu_dereference_rtnl We use rcu_dereference_check(p, rcu_read_lock_held() || lockdep_rtnl_is_held()) several times in network stack. More usages to come too, so its time to create a helper. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 58d44491880f..263690d991a8 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -749,6 +749,17 @@ extern int rtnl_is_locked(void); extern int lockdep_rtnl_is_held(void); #endif /* #ifdef CONFIG_PROVE_LOCKING */ +/** + * rcu_dereference_rtnl - rcu_dereference with debug checking + * @p: The pointer to read, prior to dereferencing + * + * Do an rcu_dereference(p), but check caller either holds rcu_read_lock() + * or RTNL + */ +#define rcu_dereference_rtnl(p) \ + rcu_dereference_check(p, rcu_read_lock_held() || \ + lockdep_rtnl_is_held()) + extern void rtnetlink_init(void); extern void __rtnl_unlock(void); -- cgit v1.2.3 From 15133f6e67d8d646d0744336b4daa3135452cb0d Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 12 Jan 2010 14:33:38 -0800 Subject: RDS: Implement atomic operations Implement a CMSG-based interface to do FADD and CSWP ops. Alter send routines to handle atomic ops. Add atomic counters to stats. Add xmit_atomic() to struct rds_transport Inline rds_ib_send_unmap_rdma into unmap_rm Signed-off-by: Andy Grover --- include/linux/rds.h | 19 +++++++ net/rds/ib.c | 1 + net/rds/ib.h | 1 + net/rds/ib_rdma.c | 4 +- net/rds/ib_send.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++------ net/rds/rdma.c | 73 +++++++++++++++++++++++++++ net/rds/rds.h | 33 +++++++++++-- net/rds/send.c | 71 ++++++++++++++++++++++++-- net/rds/stats.c | 2 + 9 files changed, 321 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/rds.h b/include/linux/rds.h index 7f3971d9fc5c..9239152abf7a 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -73,6 +73,8 @@ #define RDS_CMSG_RDMA_MAP 3 #define RDS_CMSG_RDMA_STATUS 4 #define RDS_CMSG_CONG_UPDATE 5 +#define RDS_CMSG_ATOMIC_FADD 6 +#define RDS_CMSG_ATOMIC_CSWP 7 #define RDS_INFO_FIRST 10000 #define RDS_INFO_COUNTERS 10000 @@ -237,6 +239,23 @@ struct rds_rdma_args { u_int64_t user_token; }; +struct rds_atomic_args { + rds_rdma_cookie_t cookie; + uint64_t local_addr; + uint64_t remote_addr; + union { + struct { + uint64_t compare; + uint64_t swap; + } cswp; + struct { + uint64_t add; + } fadd; + }; + uint64_t flags; + uint64_t user_token; +}; + struct rds_rdma_notify { u_int64_t user_token; int32_t status; diff --git a/net/rds/ib.c b/net/rds/ib.c index 8f2d6dd7700a..f0d29656baff 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -264,6 +264,7 @@ struct rds_transport rds_ib_transport = { .xmit = rds_ib_xmit, .xmit_cong_map = NULL, .xmit_rdma = rds_ib_xmit_rdma, + .xmit_atomic = rds_ib_xmit_atomic, .recv = rds_ib_recv, .conn_alloc = rds_ib_conn_alloc, .conn_free = rds_ib_conn_free, diff --git a/net/rds/ib.h b/net/rds/ib.h index 64df4e79b29f..d2fd0aa4fde7 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -336,6 +336,7 @@ void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits); void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted); int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted, u32 *adv_credits, int need_posted, int max_posted); +int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op); /* ib_stats.c */ DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats); diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 0f3b5a2f3fe0..242231f09464 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -298,7 +298,9 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev) ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd, (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE), + IB_ACCESS_REMOTE_WRITE| + IB_ACCESS_REMOTE_ATOMIC), + &pool->fmr_attr); if (IS_ERR(ibmr->fmr)) { err = PTR_ERR(ibmr->fmr); diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index f0edfdb2866c..b2bd164434ad 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -62,15 +62,17 @@ static void rds_ib_send_rdma_complete(struct rds_message *rm, rds_rdma_send_complete(rm, notify_status); } -static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic, - struct rds_rdma_op *op) +static void rds_ib_send_atomic_complete(struct rds_message *rm, + int wc_status) { - if (op->r_mapped) { - ib_dma_unmap_sg(ic->i_cm_id->device, - op->r_sg, op->r_nents, - op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - op->r_mapped = 0; - } + int notify_status; + + if (wc_status != IB_WC_SUCCESS) + notify_status = RDS_RDMA_OTHER_ERROR; + else + notify_status = RDS_RDMA_SUCCESS; + + rds_atomic_send_complete(rm, notify_status); } static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, @@ -86,7 +88,14 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, DMA_TO_DEVICE); if (rm->rdma.m_rdma_op.r_active) { - rds_ib_send_unmap_rdma(ic, &rm->rdma.m_rdma_op); + struct rds_rdma_op *op = &rm->rdma.m_rdma_op; + + if (op->r_mapped) { + ib_dma_unmap_sg(ic->i_cm_id->device, + op->r_sg, op->r_nents, + op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); + op->r_mapped = 0; + } /* If the user asked for a completion notification on this * message, we can implement three different semantics: @@ -116,6 +125,24 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op.r_bytes); } + if (rm->atomic.op_active) { + struct rm_atomic_op *op = &rm->atomic; + + /* unmap atomic recvbuf */ + if (op->op_mapped) { + ib_dma_unmap_sg(ic->i_cm_id->device, op->op_sg, 1, + DMA_FROM_DEVICE); + op->op_mapped = 0; + } + + rds_ib_send_atomic_complete(rm, wc_status); + + if (rm->atomic.op_type == RDS_ATOMIC_TYPE_CSWP) + rds_stats_inc(s_atomic_cswp); + else + rds_stats_inc(s_atomic_fadd); + } + /* If anyone waited for this message to get flushed out, wake * them up now */ rds_message_unmapped(rm); @@ -158,12 +185,9 @@ void rds_ib_send_clear_ring(struct rds_ib_connection *ic) u32 i; for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { - if (send->s_wr.opcode == 0xdead) + if (!send->s_rm || send->s_wr.opcode == 0xdead) continue; - if (send->s_rm) - rds_ib_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR); - if (send->s_op) - rds_ib_send_unmap_rdma(ic, send->s_op); + rds_ib_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR); } } @@ -218,6 +242,8 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) break; case IB_WR_RDMA_WRITE: case IB_WR_RDMA_READ: + case IB_WR_ATOMIC_FETCH_AND_ADD: + case IB_WR_ATOMIC_CMP_AND_SWP: /* Nothing to be done - the SG list will be unmapped * when the SEND completes. */ break; @@ -243,8 +269,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) rm = rds_send_get_message(conn, send->s_op); if (rm) { - if (rm->rdma.m_rdma_op.r_active) - rds_ib_send_unmap_rdma(ic, &rm->rdma.m_rdma_op); + rds_ib_send_unmap_rm(ic, send, wc.status); rds_ib_send_rdma_complete(rm, wc.status); rds_message_put(rm); } @@ -736,6 +761,89 @@ out: return ret; } +/* + * Issue atomic operation. + * A simplified version of the rdma case, we always map 1 SG, and + * only 8 bytes, for the return value from the atomic operation. + */ +int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) +{ + struct rds_ib_connection *ic = conn->c_transport_data; + struct rds_ib_send_work *send = NULL; + struct ib_send_wr *failed_wr; + struct rds_ib_device *rds_ibdev; + u32 pos; + u32 work_alloc; + int ret; + + rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); + + work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, 1, &pos); + if (work_alloc != 1) { + rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); + rds_ib_stats_inc(s_ib_tx_ring_full); + ret = -ENOMEM; + goto out; + } + + /* address of send request in ring */ + send = &ic->i_sends[pos]; + send->s_queued = jiffies; + + if (op->op_type == RDS_ATOMIC_TYPE_CSWP) { + send->s_wr.opcode = IB_WR_ATOMIC_CMP_AND_SWP; + send->s_wr.wr.atomic.compare_add = op->op_compare; + send->s_wr.wr.atomic.swap = op->op_swap_add; + } else { /* FADD */ + send->s_wr.opcode = IB_WR_ATOMIC_FETCH_AND_ADD; + send->s_wr.wr.atomic.compare_add = op->op_swap_add; + send->s_wr.wr.atomic.swap = 0; + } + send->s_wr.send_flags = IB_SEND_SIGNALED; + send->s_wr.num_sge = 1; + send->s_wr.next = NULL; + send->s_wr.wr.atomic.remote_addr = op->op_remote_addr; + send->s_wr.wr.atomic.rkey = op->op_rkey; + + /* map 8 byte retval buffer to the device */ + ret = ib_dma_map_sg(ic->i_cm_id->device, op->op_sg, 1, DMA_FROM_DEVICE); + rdsdebug("ic %p mapping atomic op %p. mapped %d pg\n", ic, op, ret); + if (ret != 1) { + rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); + rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); + ret = -ENOMEM; /* XXX ? */ + goto out; + } + + /* Convert our struct scatterlist to struct ib_sge */ + send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg); + send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg); + send->s_sge[0].lkey = ic->i_mr->lkey; + + rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr, + send->s_sge[0].addr, send->s_sge[0].length); + + failed_wr = &send->s_wr; + ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr); + rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic, + send, &send->s_wr, ret, failed_wr); + BUG_ON(failed_wr != &send->s_wr); + if (ret) { + printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 " + "returned %d\n", &conn->c_faddr, ret); + rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); + goto out; + } + + if (unlikely(failed_wr != &send->s_wr)) { + printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret); + BUG_ON(failed_wr != &send->s_wr); + } + +out: + return ret; +} + int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op) { struct rds_ib_connection *ic = conn->c_transport_data; diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 4fda33045598..a7019df38c70 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -719,3 +719,76 @@ int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm, return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->rdma.m_rdma_mr); } + +/* + * Fill in rds_message for an atomic request. + */ +int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, + struct cmsghdr *cmsg) +{ + struct page *page = NULL; + struct rds_atomic_args *args; + int ret = 0; + + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_atomic_args)) + || rm->atomic.op_active) + return -EINVAL; + + args = CMSG_DATA(cmsg); + + if (cmsg->cmsg_type == RDS_CMSG_ATOMIC_CSWP) { + rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP; + rm->atomic.op_swap_add = args->cswp.swap; + rm->atomic.op_compare = args->cswp.compare; + } else { + rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD; + rm->atomic.op_swap_add = args->fadd.add; + } + + rm->m_rdma_cookie = args->cookie; + rm->atomic.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); + rm->atomic.op_recverr = rs->rs_recverr; + rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1); + + /* verify 8 byte-aligned */ + if (args->local_addr & 0x7) { + ret = -EFAULT; + goto err; + } + + ret = rds_pin_pages(args->local_addr, 1, &page, 1); + if (ret != 1) + goto err; + ret = 0; + + sg_set_page(rm->atomic.op_sg, page, 8, offset_in_page(args->local_addr)); + + if (rm->atomic.op_notify || rm->atomic.op_recverr) { + /* We allocate an uninitialized notifier here, because + * we don't want to do that in the completion handler. We + * would have to use GFP_ATOMIC there, and don't want to deal + * with failed allocations. + */ + rm->atomic.op_notifier = kmalloc(sizeof(*rm->atomic.op_notifier), GFP_KERNEL); + if (!rm->atomic.op_notifier) { + ret = -ENOMEM; + goto err; + } + + rm->atomic.op_notifier->n_user_token = args->user_token; + rm->atomic.op_notifier->n_status = RDS_RDMA_SUCCESS; + } + + rm->atomic.op_rkey = rds_rdma_cookie_key(rm->m_rdma_cookie); + rm->atomic.op_remote_addr = args->remote_addr + rds_rdma_cookie_offset(args->cookie); + + rm->atomic.op_active = 1; + + return ret; +err: + if (page) + put_page(page); + kfree(rm->atomic.op_notifier); + + return ret; +} diff --git a/net/rds/rds.h b/net/rds/rds.h index 0bb4957e0cfc..830e2bbb3332 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -97,6 +97,7 @@ struct rds_connection { unsigned int c_xmit_hdr_off; unsigned int c_xmit_data_off; unsigned int c_xmit_rdma_sent; + unsigned int c_xmit_atomic_sent; spinlock_t c_lock; /* protect msg queues */ u64 c_next_tx_seq; @@ -260,6 +261,10 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie) return cookie >> 32; } +/* atomic operation types */ +#define RDS_ATOMIC_TYPE_CSWP 0 +#define RDS_ATOMIC_TYPE_FADD 1 + /* * m_sock_item and m_conn_item are on lists that are serialized under * conn->c_lock. m_sock_item has additional meaning in that once it is empty @@ -315,11 +320,27 @@ struct rds_message { struct rds_sock *m_rs; rds_rdma_cookie_t m_rdma_cookie; struct { - struct { + struct rm_atomic_op { + int op_type; + uint64_t op_swap_add; + uint64_t op_compare; + + u32 op_rkey; + u64 op_remote_addr; + unsigned int op_notify:1; + unsigned int op_recverr:1; + unsigned int op_mapped:1; + unsigned int op_active:1; + struct rds_notifier *op_notifier; + struct scatterlist *op_sg; + + struct rds_mr *op_rdma_mr; + } atomic; + struct rm_rdma_op { struct rds_rdma_op m_rdma_op; struct rds_mr *m_rdma_mr; } rdma; - struct { + struct rm_data_op { unsigned int m_nents; unsigned int m_count; struct scatterlist *m_sg; @@ -397,6 +418,7 @@ struct rds_transport { int (*xmit_cong_map)(struct rds_connection *conn, struct rds_cong_map *map, unsigned long offset); int (*xmit_rdma)(struct rds_connection *conn, struct rds_rdma_op *op); + int (*xmit_atomic)(struct rds_connection *conn, struct rm_atomic_op *op); int (*recv)(struct rds_connection *conn); int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov, size_t size); @@ -546,6 +568,8 @@ struct rds_statistics { uint64_t s_cong_update_received; uint64_t s_cong_send_error; uint64_t s_cong_send_blocked; + uint64_t s_atomic_cswp; + uint64_t s_atomic_fadd; }; /* af_rds.c */ @@ -722,7 +746,10 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm, struct cmsghdr *cmsg); void rds_rdma_free_op(struct rds_rdma_op *ro); -void rds_rdma_send_complete(struct rds_message *rm, int); +void rds_rdma_send_complete(struct rds_message *rm, int wc_status); +void rds_atomic_send_complete(struct rds_message *rm, int wc_status); +int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, + struct cmsghdr *cmsg); extern void __rds_put_mr_final(struct rds_mr *mr); static inline void rds_mr_put(struct rds_mr *mr) diff --git a/net/rds/send.c b/net/rds/send.c index b751a8e77c41..f3f4e79274bf 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -73,6 +73,7 @@ void rds_send_reset(struct rds_connection *conn) conn->c_xmit_hdr_off = 0; conn->c_xmit_data_off = 0; conn->c_xmit_rdma_sent = 0; + conn->c_xmit_atomic_sent = 0; conn->c_map_queued = 0; @@ -171,6 +172,7 @@ int rds_send_xmit(struct rds_connection *conn) conn->c_xmit_hdr_off = 0; conn->c_xmit_data_off = 0; conn->c_xmit_rdma_sent = 0; + conn->c_xmit_atomic_sent = 0; /* Release the reference to the previous message. */ rds_message_put(rm); @@ -262,6 +264,17 @@ int rds_send_xmit(struct rds_connection *conn) conn->c_xmit_rm = rm; } + + if (rm->atomic.op_active && !conn->c_xmit_atomic_sent) { + ret = conn->c_trans->xmit_atomic(conn, &rm->atomic); + if (ret) + break; + conn->c_xmit_atomic_sent = 1; + /* The transport owns the mapped memory for now. + * You can't unmap it while it's on the send queue */ + set_bit(RDS_MSG_MAPPED, &rm->m_flags); + } + /* * Try and send an rdma message. Let's see if we can * keep this simple and require that the transport either @@ -442,6 +455,41 @@ void rds_rdma_send_complete(struct rds_message *rm, int status) } EXPORT_SYMBOL_GPL(rds_rdma_send_complete); +/* + * Just like above, except looks at atomic op + */ +void rds_atomic_send_complete(struct rds_message *rm, int status) +{ + struct rds_sock *rs = NULL; + struct rm_atomic_op *ao; + struct rds_notifier *notifier; + + spin_lock(&rm->m_rs_lock); + + ao = &rm->atomic; + if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) + && ao->op_active && ao->op_notify && ao->op_notifier) { + notifier = ao->op_notifier; + rs = rm->m_rs; + sock_hold(rds_rs_to_sk(rs)); + + notifier->n_status = status; + spin_lock(&rs->rs_lock); + list_add_tail(¬ifier->n_list, &rs->rs_notify_queue); + spin_unlock(&rs->rs_lock); + + ao->op_notifier = NULL; + } + + spin_unlock(&rm->m_rs_lock); + + if (rs) { + rds_wake_sk_sleep(rs); + sock_put(rds_rs_to_sk(rs)); + } +} +EXPORT_SYMBOL_GPL(rds_atomic_send_complete); + /* * This is the same as rds_rdma_send_complete except we * don't do any locking - we have all the ingredients (message, @@ -788,6 +836,11 @@ static int rds_rm_size(struct msghdr *msg, int data_len) /* these are valid but do no add any size */ break; + case RDS_CMSG_ATOMIC_CSWP: + case RDS_CMSG_ATOMIC_FADD: + size += sizeof(struct scatterlist); + break; + default: return -EINVAL; } @@ -813,7 +866,7 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, continue; /* As a side effect, RDMA_DEST and RDMA_MAP will set - * rm->m_rdma_cookie and rm->m_rdma_mr. + * rm->rdma.m_rdma_cookie and rm->rdma.m_rdma_mr. */ switch (cmsg->cmsg_type) { case RDS_CMSG_RDMA_ARGS: @@ -829,6 +882,10 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, if (!ret) *allocated_mr = 1; break; + case RDS_CMSG_ATOMIC_CSWP: + case RDS_CMSG_ATOMIC_FADD: + ret = rds_cmsg_atomic(rs, rm, cmsg); + break; default: return -EINVAL; @@ -926,10 +983,18 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, goto out; if ((rm->m_rdma_cookie || rm->rdma.m_rdma_op.r_active) && - !conn->c_trans->xmit_rdma) { + !conn->c_trans->xmit_rdma) { if (printk_ratelimit()) printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", - &rm->rdma.m_rdma_op, conn->c_trans->xmit_rdma); + &rm->rdma.m_rdma_op, conn->c_trans->xmit_rdma); + ret = -EOPNOTSUPP; + goto out; + } + + if (rm->atomic.op_active && !conn->c_trans->xmit_atomic) { + if (printk_ratelimit()) + printk(KERN_NOTICE "atomic_op %p conn xmit_atomic %p\n", + &rm->atomic, conn->c_trans->xmit_atomic); ret = -EOPNOTSUPP; goto out; } diff --git a/net/rds/stats.c b/net/rds/stats.c index 7598eb07cfb1..c66d95d9c262 100644 --- a/net/rds/stats.c +++ b/net/rds/stats.c @@ -75,6 +75,8 @@ static const char *const rds_stat_names[] = { "cong_update_received", "cong_send_error", "cong_send_blocked", + "s_atomic_cswp", + "s_atomic_fadd", }; void rds_stats_info_copy(struct rds_info_iterator *iter, -- cgit v1.2.3 From 2c3a5f9abb1dc5efdab8ba9a568b1661c65fd1e3 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Mon, 1 Mar 2010 16:10:40 -0800 Subject: RDS: Add flag for silent ops. Do atomic op before RDMA Add a flag to the API so users can indicate they want silent operations. This is needed because silent ops cannot be used with USE_ONCE MRs, so we can't just assume silent. Also, change send_xmit to do atomic op before rdma op if both are present, and centralize the hairy logic to determine if we want to attempt silent, or not. Signed-off-by: Andy Grover --- include/linux/rds.h | 1 + net/rds/rdma.c | 2 ++ net/rds/rds.h | 2 ++ net/rds/send.c | 55 ++++++++++++++++++++++++++++++----------------------- 4 files changed, 36 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/rds.h b/include/linux/rds.h index 9239152abf7a..109f1d343318 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -276,5 +276,6 @@ struct rds_rdma_notify { #define RDS_RDMA_USE_ONCE 0x0008 /* free MR after use */ #define RDS_RDMA_DONTWAIT 0x0010 /* Don't wait in SET_BARRIER */ #define RDS_RDMA_NOTIFY_ME 0x0020 /* Notify when operation completes */ +#define RDS_RDMA_SILENT 0x0040 /* Do not interrupt remote */ #endif /* IB_RDS_H */ diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 5ba514684431..48781fe4431c 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -559,6 +559,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, op->op_write = !!(args->flags & RDS_RDMA_READWRITE); op->op_fence = !!(args->flags & RDS_RDMA_FENCE); op->op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); + op->op_silent = !!(args->flags & RDS_RDMA_SILENT); op->op_active = 1; op->op_recverr = rs->rs_recverr; WARN_ON(!nr_pages); @@ -747,6 +748,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, } rm->atomic.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); + rm->atomic.op_silent = !!(args->flags & RDS_RDMA_SILENT); rm->atomic.op_active = 1; rm->atomic.op_recverr = rs->rs_recverr; rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1); diff --git a/net/rds/rds.h b/net/rds/rds.h index 46d190d08549..23b921000e74 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -319,6 +319,7 @@ struct rds_message { unsigned int op_notify:1; unsigned int op_recverr:1; unsigned int op_mapped:1; + unsigned int op_silent:1; unsigned int op_active:1; struct scatterlist *op_sg; struct rds_notifier *op_notifier; @@ -333,6 +334,7 @@ struct rds_message { unsigned int op_notify:1; unsigned int op_recverr:1; unsigned int op_mapped:1; + unsigned int op_silent:1; unsigned int op_active:1; unsigned int op_bytes; unsigned int op_nents; diff --git a/net/rds/send.c b/net/rds/send.c index cdca9747fcbc..38567f3ee7e8 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -250,42 +250,50 @@ int rds_send_xmit(struct rds_connection *conn) conn->c_xmit_rm = rm; } - if (rm->atomic.op_active && !conn->c_xmit_atomic_sent) { - ret = conn->c_trans->xmit_atomic(conn, rm); + /* The transport either sends the whole rdma or none of it */ + if (rm->rdma.op_active && !conn->c_xmit_rdma_sent) { + ret = conn->c_trans->xmit_rdma(conn, &rm->rdma); if (ret) break; - conn->c_xmit_atomic_sent = 1; + conn->c_xmit_rdma_sent = 1; + /* The transport owns the mapped memory for now. * You can't unmap it while it's on the send queue */ set_bit(RDS_MSG_MAPPED, &rm->m_flags); - - /* - * This is evil, muahaha. - * We permit 0-byte sends. (rds-ping depends on this.) - * BUT if there is an atomic op and no sent data, - * we turn off sending the header, to achieve - * "silent" atomics. - * But see below; RDMA op might toggle this back on! - */ - if (rm->data.op_nents == 0) - rm->data.op_active = 0; } - /* The transport either sends the whole rdma or none of it */ - if (rm->rdma.op_active && !conn->c_xmit_rdma_sent) { - ret = conn->c_trans->xmit_rdma(conn, &rm->rdma); + if (rm->atomic.op_active && !conn->c_xmit_atomic_sent) { + ret = conn->c_trans->xmit_atomic(conn, rm); if (ret) break; - conn->c_xmit_rdma_sent = 1; - - /* rdmas need data sent, even if just the header */ - rm->data.op_active = 1; - + conn->c_xmit_atomic_sent = 1; /* The transport owns the mapped memory for now. * You can't unmap it while it's on the send queue */ set_bit(RDS_MSG_MAPPED, &rm->m_flags); } + /* + * A number of cases require an RDS header to be sent + * even if there is no data. + * We permit 0-byte sends; rds-ping depends on this. + * However, if there are exclusively attached silent ops, + * we skip the hdr/data send, to enable silent operation. + */ + if (rm->data.op_nents == 0) { + int ops_present; + int all_ops_are_silent = 1; + + ops_present = (rm->atomic.op_active || rm->rdma.op_active); + if (rm->atomic.op_active && !rm->atomic.op_silent) + all_ops_are_silent = 0; + if (rm->rdma.op_active && !rm->rdma.op_silent) + all_ops_are_silent = 0; + + if (ops_present && all_ops_are_silent + && !rm->m_rdma_cookie) + rm->data.op_active = 0; + } + if (rm->data.op_active && !conn->c_xmit_data_sent) { ret = conn->c_trans->xmit(conn, rm, conn->c_xmit_hdr_off, @@ -1009,8 +1017,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, if (ret) goto out; - if ((rm->m_rdma_cookie || rm->rdma.op_active) && - !conn->c_trans->xmit_rdma) { + if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) { if (printk_ratelimit()) printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", &rm->rdma, conn->c_trans->xmit_rdma); -- cgit v1.2.3 From 20c72bd5f5f902e5a8745d51573699605bf8d21c Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 25 Aug 2010 05:51:28 -0700 Subject: RDS: Implement masked atomic operations Add two CMSGs for masked versions of cswp and fadd. args struct modified to use a union for different atomic op type's arguments. Change IB to do masked atomic ops. Atomic op type in rds_message similarly unionized. Signed-off-by: Andy Grover --- include/linux/rds.h | 12 ++++++++++++ net/rds/ib_send.c | 14 +++++++++----- net/rds/rdma.c | 33 +++++++++++++++++++++++++++------ net/rds/rds.h | 14 ++++++++++++-- net/rds/send.c | 4 ++++ 5 files changed, 64 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/rds.h b/include/linux/rds.h index 109f1d343318..a2a5edb4a276 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -75,6 +75,8 @@ #define RDS_CMSG_CONG_UPDATE 5 #define RDS_CMSG_ATOMIC_FADD 6 #define RDS_CMSG_ATOMIC_CSWP 7 +#define RDS_CMSG_MASKED_ATOMIC_FADD 8 +#define RDS_CMSG_MASKED_ATOMIC_CSWP 9 #define RDS_INFO_FIRST 10000 #define RDS_INFO_COUNTERS 10000 @@ -251,6 +253,16 @@ struct rds_atomic_args { struct { uint64_t add; } fadd; + struct { + uint64_t compare; + uint64_t swap; + uint64_t compare_mask; + uint64_t swap_mask; + } m_cswp; + struct { + uint64_t add; + uint64_t nocarry_mask; + } m_fadd; }; uint64_t flags; uint64_t user_token; diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 808544aebb70..71f373c421bc 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -807,13 +807,17 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) send->s_queued = jiffies; if (op->op_type == RDS_ATOMIC_TYPE_CSWP) { - send->s_wr.opcode = IB_WR_ATOMIC_CMP_AND_SWP; - send->s_wr.wr.atomic.compare_add = op->op_compare; - send->s_wr.wr.atomic.swap = op->op_swap_add; + send->s_wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP; + send->s_wr.wr.atomic.compare_add = op->op_m_cswp.compare; + send->s_wr.wr.atomic.swap = op->op_m_cswp.swap; + send->s_wr.wr.atomic.compare_add_mask = op->op_m_cswp.compare_mask; + send->s_wr.wr.atomic.swap_mask = op->op_m_cswp.swap_mask; } else { /* FADD */ - send->s_wr.opcode = IB_WR_ATOMIC_FETCH_AND_ADD; - send->s_wr.wr.atomic.compare_add = op->op_swap_add; + send->s_wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD; + send->s_wr.wr.atomic.compare_add = op->op_m_fadd.add; send->s_wr.wr.atomic.swap = 0; + send->s_wr.wr.atomic.compare_add_mask = op->op_m_fadd.nocarry_mask; + send->s_wr.wr.atomic.swap_mask = 0; } nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify); send->s_wr.num_sge = 1; diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 48781fe4431c..48064673fc76 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -738,13 +738,34 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, args = CMSG_DATA(cmsg); - if (cmsg->cmsg_type == RDS_CMSG_ATOMIC_CSWP) { - rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP; - rm->atomic.op_swap_add = args->cswp.swap; - rm->atomic.op_compare = args->cswp.compare; - } else { + /* Nonmasked & masked cmsg ops converted to masked hw ops */ + switch (cmsg->cmsg_type) { + case RDS_CMSG_ATOMIC_FADD: + rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD; + rm->atomic.op_m_fadd.add = args->fadd.add; + rm->atomic.op_m_fadd.nocarry_mask = 0; + break; + case RDS_CMSG_MASKED_ATOMIC_FADD: rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD; - rm->atomic.op_swap_add = args->fadd.add; + rm->atomic.op_m_fadd.add = args->m_fadd.add; + rm->atomic.op_m_fadd.nocarry_mask = args->m_fadd.nocarry_mask; + break; + case RDS_CMSG_ATOMIC_CSWP: + rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP; + rm->atomic.op_m_cswp.compare = args->cswp.compare; + rm->atomic.op_m_cswp.swap = args->cswp.swap; + rm->atomic.op_m_cswp.compare_mask = ~0; + rm->atomic.op_m_cswp.swap_mask = ~0; + break; + case RDS_CMSG_MASKED_ATOMIC_CSWP: + rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP; + rm->atomic.op_m_cswp.compare = args->m_cswp.compare; + rm->atomic.op_m_cswp.swap = args->m_cswp.swap; + rm->atomic.op_m_cswp.compare_mask = args->m_cswp.compare_mask; + rm->atomic.op_m_cswp.swap_mask = args->m_cswp.swap_mask; + break; + default: + BUG(); /* should never happen */ } rm->atomic.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); diff --git a/net/rds/rds.h b/net/rds/rds.h index aadaddba88a7..8103dcf8b976 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -316,8 +316,18 @@ struct rds_message { struct { struct rm_atomic_op { int op_type; - uint64_t op_swap_add; - uint64_t op_compare; + union { + struct { + uint64_t compare; + uint64_t swap; + uint64_t compare_mask; + uint64_t swap_mask; + } op_m_cswp; + struct { + uint64_t add; + uint64_t nocarry_mask; + } op_m_fadd; + }; u32 op_rkey; u64 op_remote_addr; diff --git a/net/rds/send.c b/net/rds/send.c index 81471b25373b..9b951a0ab6b7 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -843,6 +843,8 @@ static int rds_rm_size(struct msghdr *msg, int data_len) case RDS_CMSG_ATOMIC_CSWP: case RDS_CMSG_ATOMIC_FADD: + case RDS_CMSG_MASKED_ATOMIC_CSWP: + case RDS_CMSG_MASKED_ATOMIC_FADD: cmsg_groups |= 1; size += sizeof(struct scatterlist); break; @@ -894,6 +896,8 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, break; case RDS_CMSG_ATOMIC_CSWP: case RDS_CMSG_ATOMIC_FADD: + case RDS_CMSG_MASKED_ATOMIC_CSWP: + case RDS_CMSG_MASKED_ATOMIC_FADD: ret = rds_cmsg_atomic(rs, rm, cmsg); break; -- cgit v1.2.3 From fd128dfa50cfc4f2959dc4aa5d7468d33b988332 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 25 Aug 2010 09:32:17 -0700 Subject: RDS: Add rds.h to exported headers list Also, a number of changes were made based on the assumption that rds.h wasn't exported, so roll these back. Signed-off-by: Andy Grover --- include/linux/Kbuild | 1 + include/linux/rds.h | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 626b629429ff..c7fbf298ad68 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -302,6 +302,7 @@ header-y += quota.h header-y += radeonfb.h header-y += random.h header-y += raw.h +header-y += rds.h header-y += reboot.h header-y += reiserfs_fs.h header-y += reiserfs_xattr.h diff --git a/include/linux/rds.h b/include/linux/rds.h index a2a5edb4a276..f371f885a352 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -95,7 +95,7 @@ struct rds_info_counter { u_int8_t name[32]; u_int64_t value; -} __packed; +} __attribute__((packed)); #define RDS_INFO_CONNECTION_FLAG_SENDING 0x01 #define RDS_INFO_CONNECTION_FLAG_CONNECTING 0x02 @@ -110,7 +110,7 @@ struct rds_info_connection { __be32 faddr; u_int8_t transport[TRANSNAMSIZ]; /* null term ascii */ u_int8_t flags; -} __packed; +} __attribute__((packed)); struct rds_info_flow { __be32 laddr; @@ -118,7 +118,7 @@ struct rds_info_flow { u_int32_t bytes; __be16 lport; __be16 fport; -} __packed; +} __attribute__((packed)); #define RDS_INFO_MESSAGE_FLAG_ACK 0x01 #define RDS_INFO_MESSAGE_FLAG_FAST_ACK 0x02 @@ -131,7 +131,7 @@ struct rds_info_message { __be16 lport; __be16 fport; u_int8_t flags; -} __packed; +} __attribute__((packed)); struct rds_info_socket { u_int32_t sndbuf; @@ -141,7 +141,7 @@ struct rds_info_socket { __be16 connected_port; u_int32_t rcvbuf; u_int64_t inum; -} __packed; +} __attribute__((packed)); struct rds_info_tcp_socket { __be32 local_addr; @@ -153,7 +153,7 @@ struct rds_info_tcp_socket { u_int32_t last_sent_nxt; u_int32_t last_expected_una; u_int32_t last_seen_una; -} __packed; +} __attribute__((packed)); #define RDS_IB_GID_LEN 16 struct rds_info_rdma_connection { -- cgit v1.2.3 From a46f561b774d90d8616473d56696e7d44fa1c9f1 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 25 Aug 2010 09:34:10 -0700 Subject: RDS: rds.h: Replace u_int[size]_t with uint[size]_t Replace e.g. u_int32_t types with the more common uint32_t. Reported-by: Matthew Wilcox Signed-off-by: Andy Grover --- include/linux/rds.h | 58 ++++++++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/rds.h b/include/linux/rds.h index f371f885a352..3576b31b6b7b 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -93,8 +93,8 @@ #define RDS_INFO_LAST 10010 struct rds_info_counter { - u_int8_t name[32]; - u_int64_t value; + uint8_t name[32]; + uint64_t value; } __attribute__((packed)); #define RDS_INFO_CONNECTION_FLAG_SENDING 0x01 @@ -104,18 +104,18 @@ struct rds_info_counter { #define TRANSNAMSIZ 16 struct rds_info_connection { - u_int64_t next_tx_seq; - u_int64_t next_rx_seq; + uint64_t next_tx_seq; + uint64_t next_rx_seq; __be32 laddr; __be32 faddr; - u_int8_t transport[TRANSNAMSIZ]; /* null term ascii */ - u_int8_t flags; + uint8_t transport[TRANSNAMSIZ]; /* null term ascii */ + uint8_t flags; } __attribute__((packed)); struct rds_info_flow { __be32 laddr; __be32 faddr; - u_int32_t bytes; + uint32_t bytes; __be16 lport; __be16 fport; } __attribute__((packed)); @@ -124,23 +124,23 @@ struct rds_info_flow { #define RDS_INFO_MESSAGE_FLAG_FAST_ACK 0x02 struct rds_info_message { - u_int64_t seq; - u_int32_t len; + uint64_t seq; + uint32_t len; __be32 laddr; __be32 faddr; __be16 lport; __be16 fport; - u_int8_t flags; + uint8_t flags; } __attribute__((packed)); struct rds_info_socket { - u_int32_t sndbuf; + uint32_t sndbuf; __be32 bound_addr; __be32 connected_addr; __be16 bound_port; __be16 connected_port; - u_int32_t rcvbuf; - u_int64_t inum; + uint32_t rcvbuf; + uint64_t inum; } __attribute__((packed)); struct rds_info_tcp_socket { @@ -148,11 +148,11 @@ struct rds_info_tcp_socket { __be16 local_port; __be32 peer_addr; __be16 peer_port; - u_int64_t hdr_rem; - u_int64_t data_rem; - u_int32_t last_sent_nxt; - u_int32_t last_expected_una; - u_int32_t last_seen_una; + uint64_t hdr_rem; + uint64_t data_rem; + uint32_t last_sent_nxt; + uint32_t last_expected_una; + uint32_t last_seen_una; } __attribute__((packed)); #define RDS_IB_GID_LEN 16 @@ -207,38 +207,38 @@ struct rds_info_rdma_connection { * (so that the application does not have to worry about * alignment). */ -typedef u_int64_t rds_rdma_cookie_t; +typedef uint64_t rds_rdma_cookie_t; struct rds_iovec { - u_int64_t addr; - u_int64_t bytes; + uint64_t addr; + uint64_t bytes; }; struct rds_get_mr_args { struct rds_iovec vec; - u_int64_t cookie_addr; + uint64_t cookie_addr; uint64_t flags; }; struct rds_get_mr_for_dest_args { struct sockaddr_storage dest_addr; struct rds_iovec vec; - u_int64_t cookie_addr; + uint64_t cookie_addr; uint64_t flags; }; struct rds_free_mr_args { rds_rdma_cookie_t cookie; - u_int64_t flags; + uint64_t flags; }; struct rds_rdma_args { rds_rdma_cookie_t cookie; struct rds_iovec remote_vec; - u_int64_t local_vec_addr; - u_int64_t nr_local; - u_int64_t flags; - u_int64_t user_token; + uint64_t local_vec_addr; + uint64_t nr_local; + uint64_t flags; + uint64_t user_token; }; struct rds_atomic_args { @@ -269,7 +269,7 @@ struct rds_atomic_args { }; struct rds_rdma_notify { - u_int64_t user_token; + uint64_t user_token; int32_t status; }; -- cgit v1.2.3 From 905d64c89e2a9d71d0606904b7c3908633db6072 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 8 Sep 2010 18:03:54 -0700 Subject: RDS: Remove dead struct from rds.h flows are an obsolete date type. Signed-off-by: Andy Grover --- include/linux/rds.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/rds.h b/include/linux/rds.h index 3576b31b6b7b..91950950aa59 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -112,14 +112,6 @@ struct rds_info_connection { uint8_t flags; } __attribute__((packed)); -struct rds_info_flow { - __be32 laddr; - __be32 faddr; - uint32_t bytes; - __be16 lport; - __be16 fport; -} __attribute__((packed)); - #define RDS_INFO_MESSAGE_FLAG_ACK 0x01 #define RDS_INFO_MESSAGE_FLAG_FAST_ACK 0x02 -- cgit v1.2.3 From 01a08546af311c065f34727787dd0cc8dc0c216f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 31 Aug 2010 10:28:16 +0200 Subject: sched: Add book scheduling domain On top of the SMT and MC scheduling domains this adds the BOOK scheduling domain. This is useful for NUMA like machines which do not have an interface which tells which piece of memory is attached to which node or where the hardware performs striping. Signed-off-by: Heiko Carstens Signed-off-by: Peter Zijlstra LKML-Reference: <20100831082844.253053798@de.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + include/linux/topology.h | 6 ++++ kernel/sched.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 82 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1e2a6db2d7dd..b51c53c285b8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -875,6 +875,7 @@ enum sched_domain_level { SD_LV_NONE = 0, SD_LV_SIBLING, SD_LV_MC, + SD_LV_BOOK, SD_LV_CPU, SD_LV_NODE, SD_LV_ALLNODES, diff --git a/include/linux/topology.h b/include/linux/topology.h index 64e084ff5e5c..b91a40e847d2 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -201,6 +201,12 @@ int arch_update_cpu_topology(void); .balance_interval = 64, \ } +#ifdef CONFIG_SCHED_BOOK +#ifndef SD_BOOK_INIT +#error Please define an appropriate SD_BOOK_INIT in include/asm/topology.h!!! +#endif +#endif /* CONFIG_SCHED_BOOK */ + #ifdef CONFIG_NUMA #ifndef SD_NODE_INIT #error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!! diff --git a/kernel/sched.c b/kernel/sched.c index 1a0c084b1cf9..26f83e2f1534 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6506,6 +6506,7 @@ struct s_data { cpumask_var_t nodemask; cpumask_var_t this_sibling_map; cpumask_var_t this_core_map; + cpumask_var_t this_book_map; cpumask_var_t send_covered; cpumask_var_t tmpmask; struct sched_group **sched_group_nodes; @@ -6517,6 +6518,7 @@ enum s_alloc { sa_rootdomain, sa_tmpmask, sa_send_covered, + sa_this_book_map, sa_this_core_map, sa_this_sibling_map, sa_nodemask, @@ -6570,6 +6572,31 @@ cpu_to_core_group(int cpu, const struct cpumask *cpu_map, } #endif /* CONFIG_SCHED_MC */ +/* + * book sched-domains: + */ +#ifdef CONFIG_SCHED_BOOK +static DEFINE_PER_CPU(struct static_sched_domain, book_domains); +static DEFINE_PER_CPU(struct static_sched_group, sched_group_book); + +static int +cpu_to_book_group(int cpu, const struct cpumask *cpu_map, + struct sched_group **sg, struct cpumask *mask) +{ + int group = cpu; +#ifdef CONFIG_SCHED_MC + cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); + group = cpumask_first(mask); +#elif defined(CONFIG_SCHED_SMT) + cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); + group = cpumask_first(mask); +#endif + if (sg) + *sg = &per_cpu(sched_group_book, group).sg; + return group; +} +#endif /* CONFIG_SCHED_BOOK */ + static DEFINE_PER_CPU(struct static_sched_domain, phys_domains); static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys); @@ -6578,7 +6605,10 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map, struct sched_group **sg, struct cpumask *mask) { int group; -#ifdef CONFIG_SCHED_MC +#ifdef CONFIG_SCHED_BOOK + cpumask_and(mask, cpu_book_mask(cpu), cpu_map); + group = cpumask_first(mask); +#elif defined(CONFIG_SCHED_MC) cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); group = cpumask_first(mask); #elif defined(CONFIG_SCHED_SMT) @@ -6839,6 +6869,9 @@ SD_INIT_FUNC(CPU) #ifdef CONFIG_SCHED_MC SD_INIT_FUNC(MC) #endif +#ifdef CONFIG_SCHED_BOOK + SD_INIT_FUNC(BOOK) +#endif static int default_relax_domain_level = -1; @@ -6888,6 +6921,8 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what, free_cpumask_var(d->tmpmask); /* fall through */ case sa_send_covered: free_cpumask_var(d->send_covered); /* fall through */ + case sa_this_book_map: + free_cpumask_var(d->this_book_map); /* fall through */ case sa_this_core_map: free_cpumask_var(d->this_core_map); /* fall through */ case sa_this_sibling_map: @@ -6934,8 +6969,10 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d, return sa_nodemask; if (!alloc_cpumask_var(&d->this_core_map, GFP_KERNEL)) return sa_this_sibling_map; - if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL)) + if (!alloc_cpumask_var(&d->this_book_map, GFP_KERNEL)) return sa_this_core_map; + if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL)) + return sa_this_book_map; if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL)) return sa_send_covered; d->rd = alloc_rootdomain(); @@ -6993,6 +7030,23 @@ static struct sched_domain *__build_cpu_sched_domain(struct s_data *d, return sd; } +static struct sched_domain *__build_book_sched_domain(struct s_data *d, + const struct cpumask *cpu_map, struct sched_domain_attr *attr, + struct sched_domain *parent, int i) +{ + struct sched_domain *sd = parent; +#ifdef CONFIG_SCHED_BOOK + sd = &per_cpu(book_domains, i).sd; + SD_INIT(sd, BOOK); + set_domain_attribute(sd, attr); + cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i)); + sd->parent = parent; + parent->child = sd; + cpu_to_book_group(i, cpu_map, &sd->groups, d->tmpmask); +#endif + return sd; +} + static struct sched_domain *__build_mc_sched_domain(struct s_data *d, const struct cpumask *cpu_map, struct sched_domain_attr *attr, struct sched_domain *parent, int i) @@ -7049,6 +7103,15 @@ static void build_sched_groups(struct s_data *d, enum sched_domain_level l, &cpu_to_core_group, d->send_covered, d->tmpmask); break; +#endif +#ifdef CONFIG_SCHED_BOOK + case SD_LV_BOOK: /* set up book groups */ + cpumask_and(d->this_book_map, cpu_map, cpu_book_mask(cpu)); + if (cpu == cpumask_first(d->this_book_map)) + init_sched_build_groups(d->this_book_map, cpu_map, + &cpu_to_book_group, + d->send_covered, d->tmpmask); + break; #endif case SD_LV_CPU: /* set up physical groups */ cpumask_and(d->nodemask, cpumask_of_node(cpu), cpu_map); @@ -7097,12 +7160,14 @@ static int __build_sched_domains(const struct cpumask *cpu_map, sd = __build_numa_sched_domains(&d, cpu_map, attr, i); sd = __build_cpu_sched_domain(&d, cpu_map, attr, sd, i); + sd = __build_book_sched_domain(&d, cpu_map, attr, sd, i); sd = __build_mc_sched_domain(&d, cpu_map, attr, sd, i); sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i); } for_each_cpu(i, cpu_map) { build_sched_groups(&d, SD_LV_SIBLING, cpu_map, i); + build_sched_groups(&d, SD_LV_BOOK, cpu_map, i); build_sched_groups(&d, SD_LV_MC, cpu_map, i); } @@ -7133,6 +7198,12 @@ static int __build_sched_domains(const struct cpumask *cpu_map, init_sched_groups_power(i, sd); } #endif +#ifdef CONFIG_SCHED_BOOK + for_each_cpu(i, cpu_map) { + sd = &per_cpu(book_domains, i).sd; + init_sched_groups_power(i, sd); + } +#endif for_each_cpu(i, cpu_map) { sd = &per_cpu(phys_domains, i).sd; @@ -7158,6 +7229,8 @@ static int __build_sched_domains(const struct cpumask *cpu_map, sd = &per_cpu(cpu_domains, i).sd; #elif defined(CONFIG_SCHED_MC) sd = &per_cpu(core_domains, i).sd; +#elif defined(CONFIG_SCHED_BOOK) + sd = &per_cpu(book_domains, i).sd; #else sd = &per_cpu(phys_domains, i).sd; #endif -- cgit v1.2.3 From 51b0fe39549a04858001922919ab355dee9bdfcf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 11 Jun 2010 13:35:57 +0200 Subject: perf: Deconstify struct pmu sed -ie 's/const struct pmu\>/struct pmu/g' `git grep -l "const struct pmu\>"` Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- arch/alpha/kernel/perf_event.c | 4 ++-- arch/arm/kernel/perf_event.c | 2 +- arch/powerpc/kernel/perf_event.c | 8 ++++---- arch/powerpc/kernel/perf_event_fsl_emb.c | 2 +- arch/sh/kernel/perf_event.c | 4 ++-- arch/sparc/kernel/perf_event.c | 10 +++++----- arch/x86/kernel/cpu/perf_event.c | 14 +++++++------- include/linux/perf_event.h | 10 +++++----- kernel/perf_event.c | 26 +++++++++++++------------- 9 files changed, 40 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 51c39fa41693..56fa41590381 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -642,7 +642,7 @@ static int __hw_perf_event_init(struct perf_event *event) return 0; } -static const struct pmu pmu = { +static struct pmu pmu = { .enable = alpha_pmu_enable, .disable = alpha_pmu_disable, .read = alpha_pmu_read, @@ -653,7 +653,7 @@ static const struct pmu pmu = { /* * Main entry point to initialise a HW performance event. */ -const struct pmu *hw_perf_event_init(struct perf_event *event) +struct pmu *hw_perf_event_init(struct perf_event *event) { int err; diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 64ca8c3ab94b..0671e92c5111 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -491,7 +491,7 @@ __hw_perf_event_init(struct perf_event *event) return err; } -const struct pmu * +struct pmu * hw_perf_event_init(struct perf_event *event) { int err = 0; diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index d301a30445e0..5f78681ad902 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -857,7 +857,7 @@ static void power_pmu_unthrottle(struct perf_event *event) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -void power_pmu_start_txn(const struct pmu *pmu) +void power_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); @@ -870,7 +870,7 @@ void power_pmu_start_txn(const struct pmu *pmu) * Clear the flag and pmu::enable() will perform the * schedulability test. */ -void power_pmu_cancel_txn(const struct pmu *pmu) +void power_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); @@ -882,7 +882,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu) * Perform the group schedulability test as a whole * Return 0 if success */ -int power_pmu_commit_txn(const struct pmu *pmu) +int power_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw; long i, n; @@ -1014,7 +1014,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } -const struct pmu *hw_perf_event_init(struct perf_event *event) +struct pmu *hw_perf_event_init(struct perf_event *event) { u64 ev; unsigned long flags; diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index 1ba45471ae43..d7619b5e7a6e 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -428,7 +428,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } -const struct pmu *hw_perf_event_init(struct perf_event *event) +struct pmu *hw_perf_event_init(struct perf_event *event) { u64 ev; struct perf_event *events[MAX_HWEVENTS]; diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 7a3dc3567258..395572c94c6a 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c @@ -257,13 +257,13 @@ static void sh_pmu_read(struct perf_event *event) sh_perf_event_update(event, &event->hw, event->hw.idx); } -static const struct pmu pmu = { +static struct pmu pmu = { .enable = sh_pmu_enable, .disable = sh_pmu_disable, .read = sh_pmu_read, }; -const struct pmu *hw_perf_event_init(struct perf_event *event) +struct pmu *hw_perf_event_init(struct perf_event *event) { int err = __hw_perf_event_init(event); if (unlikely(err)) { diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 4bc402938575..481b894a5018 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1099,7 +1099,7 @@ static int __hw_perf_event_init(struct perf_event *event) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -static void sparc_pmu_start_txn(const struct pmu *pmu) +static void sparc_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); @@ -1111,7 +1111,7 @@ static void sparc_pmu_start_txn(const struct pmu *pmu) * Clear the flag and pmu::enable() will perform the * schedulability test. */ -static void sparc_pmu_cancel_txn(const struct pmu *pmu) +static void sparc_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); @@ -1123,7 +1123,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu) * Perform the group schedulability test as a whole * Return 0 if success */ -static int sparc_pmu_commit_txn(const struct pmu *pmu) +static int sparc_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int n; @@ -1142,7 +1142,7 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu) return 0; } -static const struct pmu pmu = { +static struct pmu pmu = { .enable = sparc_pmu_enable, .disable = sparc_pmu_disable, .read = sparc_pmu_read, @@ -1152,7 +1152,7 @@ static const struct pmu pmu = { .commit_txn = sparc_pmu_commit_txn, }; -const struct pmu *hw_perf_event_init(struct perf_event *event) +struct pmu *hw_perf_event_init(struct perf_event *event) { int err = __hw_perf_event_init(event); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index de6569c04cd0..fdd97f2e9961 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -618,7 +618,7 @@ static void x86_pmu_enable_all(int added) } } -static const struct pmu pmu; +static struct pmu pmu; static inline int is_x86_event(struct perf_event *event) { @@ -1427,7 +1427,7 @@ static inline void x86_pmu_read(struct perf_event *event) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -static void x86_pmu_start_txn(const struct pmu *pmu) +static void x86_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -1440,7 +1440,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu) * Clear the flag and pmu::enable() will perform the * schedulability test. */ -static void x86_pmu_cancel_txn(const struct pmu *pmu) +static void x86_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -1457,7 +1457,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu) * Perform the group schedulability test as a whole * Return 0 if success */ -static int x86_pmu_commit_txn(const struct pmu *pmu) +static int x86_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int assign[X86_PMC_IDX_MAX]; @@ -1483,7 +1483,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu) return 0; } -static const struct pmu pmu = { +static struct pmu pmu = { .enable = x86_pmu_enable, .disable = x86_pmu_disable, .start = x86_pmu_start, @@ -1569,9 +1569,9 @@ out: return ret; } -const struct pmu *hw_perf_event_init(struct perf_event *event) +struct pmu *hw_perf_event_init(struct perf_event *event) { - const struct pmu *tmp; + struct pmu *tmp; int err; err = __hw_perf_event_init(event); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 000610c4de71..09d048b52115 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -578,19 +578,19 @@ struct pmu { * Start the transaction, after this ->enable() doesn't need * to do schedulability tests. */ - void (*start_txn) (const struct pmu *pmu); + void (*start_txn) (struct pmu *pmu); /* * If ->start_txn() disabled the ->enable() schedulability test * then ->commit_txn() is required to perform one. On success * the transaction is closed. On error the transaction is kept * open until ->cancel_txn() is called. */ - int (*commit_txn) (const struct pmu *pmu); + int (*commit_txn) (struct pmu *pmu); /* * Will cancel the transaction, assumes ->disable() is called for * each successfull ->enable() during the transaction. */ - void (*cancel_txn) (const struct pmu *pmu); + void (*cancel_txn) (struct pmu *pmu); }; /** @@ -669,7 +669,7 @@ struct perf_event { int nr_siblings; int group_flags; struct perf_event *group_leader; - const struct pmu *pmu; + struct pmu *pmu; enum perf_event_active_state state; unsigned int attach_state; @@ -849,7 +849,7 @@ struct perf_output_handle { */ extern int perf_max_events; -extern const struct pmu *hw_perf_event_init(struct perf_event *event); +extern struct pmu *hw_perf_event_init(struct perf_event *event); extern void perf_event_task_sched_in(struct task_struct *task); extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 2d74f31220ad..fb46fd13f31f 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -75,7 +75,7 @@ static DEFINE_SPINLOCK(perf_resource_lock); /* * Architecture provided APIs - weak aliases: */ -extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event) +extern __weak struct pmu *hw_perf_event_init(struct perf_event *event) { return NULL; } @@ -691,7 +691,7 @@ group_sched_in(struct perf_event *group_event, struct perf_event_context *ctx) { struct perf_event *event, *partial_group = NULL; - const struct pmu *pmu = group_event->pmu; + struct pmu *pmu = group_event->pmu; bool txn = false; if (group_event->state == PERF_EVENT_STATE_OFF) @@ -4501,7 +4501,7 @@ static int perf_swevent_int(struct perf_event *event) return 0; } -static const struct pmu perf_ops_generic = { +static struct pmu perf_ops_generic = { .enable = perf_swevent_enable, .disable = perf_swevent_disable, .start = perf_swevent_int, @@ -4614,7 +4614,7 @@ static void cpu_clock_perf_event_read(struct perf_event *event) cpu_clock_perf_event_update(event); } -static const struct pmu perf_ops_cpu_clock = { +static struct pmu perf_ops_cpu_clock = { .enable = cpu_clock_perf_event_enable, .disable = cpu_clock_perf_event_disable, .read = cpu_clock_perf_event_read, @@ -4671,7 +4671,7 @@ static void task_clock_perf_event_read(struct perf_event *event) task_clock_perf_event_update(event, time); } -static const struct pmu perf_ops_task_clock = { +static struct pmu perf_ops_task_clock = { .enable = task_clock_perf_event_enable, .disable = task_clock_perf_event_disable, .read = task_clock_perf_event_read, @@ -4785,7 +4785,7 @@ static int swevent_hlist_get(struct perf_event *event) #ifdef CONFIG_EVENT_TRACING -static const struct pmu perf_ops_tracepoint = { +static struct pmu perf_ops_tracepoint = { .enable = perf_trace_enable, .disable = perf_trace_disable, .start = perf_swevent_int, @@ -4849,7 +4849,7 @@ static void tp_perf_event_destroy(struct perf_event *event) perf_trace_destroy(event); } -static const struct pmu *tp_perf_event_init(struct perf_event *event) +static struct pmu *tp_perf_event_init(struct perf_event *event) { int err; @@ -4896,7 +4896,7 @@ static void perf_event_free_filter(struct perf_event *event) #else -static const struct pmu *tp_perf_event_init(struct perf_event *event) +static struct pmu *tp_perf_event_init(struct perf_event *event) { return NULL; } @@ -4918,7 +4918,7 @@ static void bp_perf_event_destroy(struct perf_event *event) release_bp_slot(event); } -static const struct pmu *bp_perf_event_init(struct perf_event *bp) +static struct pmu *bp_perf_event_init(struct perf_event *bp) { int err; @@ -4942,7 +4942,7 @@ void perf_bp_event(struct perf_event *bp, void *data) perf_swevent_add(bp, 1, 1, &sample, regs); } #else -static const struct pmu *bp_perf_event_init(struct perf_event *bp) +static struct pmu *bp_perf_event_init(struct perf_event *bp) { return NULL; } @@ -4964,9 +4964,9 @@ static void sw_perf_event_destroy(struct perf_event *event) swevent_hlist_put(event); } -static const struct pmu *sw_perf_event_init(struct perf_event *event) +static struct pmu *sw_perf_event_init(struct perf_event *event) { - const struct pmu *pmu = NULL; + struct pmu *pmu = NULL; u64 event_id = event->attr.config; /* @@ -5028,7 +5028,7 @@ perf_event_alloc(struct perf_event_attr *attr, perf_overflow_handler_t overflow_handler, gfp_t gfpflags) { - const struct pmu *pmu; + struct pmu *pmu; struct perf_event *event; struct hw_perf_event *hwc; long err; -- cgit v1.2.3 From b0a873ebbf87bf38bf70b5e39a7cadc96099fa13 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 11 Jun 2010 13:35:08 +0200 Subject: perf: Register PMU implementations Simple registration interface for struct pmu, this provides the infrastructure for removing all the weak functions. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- arch/alpha/kernel/perf_event.c | 37 +- arch/arm/kernel/perf_event.c | 38 +- arch/powerpc/kernel/perf_event.c | 46 +-- arch/powerpc/kernel/perf_event_fsl_emb.c | 37 +- arch/sh/kernel/perf_event.c | 35 +- arch/sparc/kernel/perf_event.c | 29 +- arch/x86/kernel/cpu/perf_event.c | 45 ++- include/linux/perf_event.h | 10 +- kernel/hw_breakpoint.c | 35 +- kernel/perf_event.c | 588 +++++++++++++++---------------- 10 files changed, 488 insertions(+), 412 deletions(-) (limited to 'include') diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 56fa41590381..19660b5c298f 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -642,34 +642,39 @@ static int __hw_perf_event_init(struct perf_event *event) return 0; } -static struct pmu pmu = { - .enable = alpha_pmu_enable, - .disable = alpha_pmu_disable, - .read = alpha_pmu_read, - .unthrottle = alpha_pmu_unthrottle, -}; - - /* * Main entry point to initialise a HW performance event. */ -struct pmu *hw_perf_event_init(struct perf_event *event) +static int alpha_pmu_event_init(struct perf_event *event) { int err; + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + return -ENOENT; + } + if (!alpha_pmu) - return ERR_PTR(-ENODEV); + return -ENODEV; /* Do the real initialisation work. */ err = __hw_perf_event_init(event); - if (err) - return ERR_PTR(err); - - return &pmu; + return err; } - +static struct pmu pmu = { + .event_init = alpha_pmu_event_init, + .enable = alpha_pmu_enable, + .disable = alpha_pmu_disable, + .read = alpha_pmu_read, + .unthrottle = alpha_pmu_unthrottle, +}; /* * Main entry point - enable HW performance counters. @@ -838,5 +843,7 @@ void __init init_hw_perf_events(void) /* And set up PMU specification */ alpha_pmu = &ev67_pmu; perf_max_events = alpha_pmu->num_pmcs; + + perf_pmu_register(&pmu); } diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 0671e92c5111..f62f9db35db3 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -306,12 +306,7 @@ out: return err; } -static struct pmu pmu = { - .enable = armpmu_enable, - .disable = armpmu_disable, - .unthrottle = armpmu_unthrottle, - .read = armpmu_read, -}; +static struct pmu pmu; static int validate_event(struct cpu_hw_events *cpuc, @@ -491,20 +486,29 @@ __hw_perf_event_init(struct perf_event *event) return err; } -struct pmu * -hw_perf_event_init(struct perf_event *event) +static int armpmu_event_init(struct perf_event *event) { int err = 0; + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + return -ENOENT; + } + if (!armpmu) - return ERR_PTR(-ENODEV); + return -ENODEV; event->destroy = hw_perf_event_destroy; if (!atomic_inc_not_zero(&active_events)) { if (atomic_read(&active_events) > perf_max_events) { atomic_dec(&active_events); - return ERR_PTR(-ENOSPC); + return -ENOSPC; } mutex_lock(&pmu_reserve_mutex); @@ -518,15 +522,23 @@ hw_perf_event_init(struct perf_event *event) } if (err) - return ERR_PTR(err); + return err; err = __hw_perf_event_init(event); if (err) hw_perf_event_destroy(event); - return err ? ERR_PTR(err) : &pmu; + return err; } +static struct pmu pmu = { + .event_init = armpmu_event_init, + .enable = armpmu_enable, + .disable = armpmu_disable, + .unthrottle = armpmu_unthrottle, + .read = armpmu_read, +}; + void hw_perf_enable(void) { @@ -2994,6 +3006,8 @@ init_hw_perf_events(void) perf_max_events = -1; } + perf_pmu_register(&pmu); + return 0; } arch_initcall(init_hw_perf_events); diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 5f78681ad902..19131b2614b9 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -904,16 +904,6 @@ int power_pmu_commit_txn(struct pmu *pmu) return 0; } -struct pmu power_pmu = { - .enable = power_pmu_enable, - .disable = power_pmu_disable, - .read = power_pmu_read, - .unthrottle = power_pmu_unthrottle, - .start_txn = power_pmu_start_txn, - .cancel_txn = power_pmu_cancel_txn, - .commit_txn = power_pmu_commit_txn, -}; - /* * Return 1 if we might be able to put event on a limited PMC, * or 0 if not. @@ -1014,7 +1004,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } -struct pmu *hw_perf_event_init(struct perf_event *event) +static int power_pmu_event_init(struct perf_event *event) { u64 ev; unsigned long flags; @@ -1026,25 +1016,27 @@ struct pmu *hw_perf_event_init(struct perf_event *event) struct cpu_hw_events *cpuhw; if (!ppmu) - return ERR_PTR(-ENXIO); + return -ENOENT; + switch (event->attr.type) { case PERF_TYPE_HARDWARE: ev = event->attr.config; if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; ev = ppmu->generic_events[ev]; break; case PERF_TYPE_HW_CACHE: err = hw_perf_cache_event(event->attr.config, &ev); if (err) - return ERR_PTR(err); + return err; break; case PERF_TYPE_RAW: ev = event->attr.config; break; default: - return ERR_PTR(-EINVAL); + return -ENOENT; } + event->hw.config_base = ev; event->hw.idx = 0; @@ -1081,7 +1073,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event) */ ev = normal_pmc_alternative(ev, flags); if (!ev) - return ERR_PTR(-EINVAL); + return -EINVAL; } } @@ -1095,19 +1087,19 @@ struct pmu *hw_perf_event_init(struct perf_event *event) n = collect_events(event->group_leader, ppmu->n_counter - 1, ctrs, events, cflags); if (n < 0) - return ERR_PTR(-EINVAL); + return -EINVAL; } events[n] = ev; ctrs[n] = event; cflags[n] = flags; if (check_excludes(ctrs, cflags, n, 1)) - return ERR_PTR(-EINVAL); + return -EINVAL; cpuhw = &get_cpu_var(cpu_hw_events); err = power_check_constraints(cpuhw, events, cflags, n + 1); put_cpu_var(cpu_hw_events); if (err) - return ERR_PTR(-EINVAL); + return -EINVAL; event->hw.config = events[n]; event->hw.event_base = cflags[n]; @@ -1132,11 +1124,20 @@ struct pmu *hw_perf_event_init(struct perf_event *event) } event->destroy = hw_perf_event_destroy; - if (err) - return ERR_PTR(err); - return &power_pmu; + return err; } +struct pmu power_pmu = { + .event_init = power_pmu_event_init, + .enable = power_pmu_enable, + .disable = power_pmu_disable, + .read = power_pmu_read, + .unthrottle = power_pmu_unthrottle, + .start_txn = power_pmu_start_txn, + .cancel_txn = power_pmu_cancel_txn, + .commit_txn = power_pmu_commit_txn, +}; + /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -1342,6 +1343,7 @@ int register_power_pmu(struct power_pmu *pmu) freeze_events_kernel = MMCR0_FCHV; #endif /* CONFIG_PPC64 */ + perf_pmu_register(&power_pmu); perf_cpu_notifier(power_pmu_notifier); return 0; diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index d7619b5e7a6e..ea6a804e43fd 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -378,13 +378,6 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event) local_irq_restore(flags); } -static struct pmu fsl_emb_pmu = { - .enable = fsl_emb_pmu_enable, - .disable = fsl_emb_pmu_disable, - .read = fsl_emb_pmu_read, - .unthrottle = fsl_emb_pmu_unthrottle, -}; - /* * Release the PMU if this is the last perf_event. */ @@ -428,7 +421,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } -struct pmu *hw_perf_event_init(struct perf_event *event) +static int fsl_emb_pmu_event_init(struct perf_event *event) { u64 ev; struct perf_event *events[MAX_HWEVENTS]; @@ -441,14 +434,14 @@ struct pmu *hw_perf_event_init(struct perf_event *event) case PERF_TYPE_HARDWARE: ev = event->attr.config; if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; ev = ppmu->generic_events[ev]; break; case PERF_TYPE_HW_CACHE: err = hw_perf_cache_event(event->attr.config, &ev); if (err) - return ERR_PTR(err); + return err; break; case PERF_TYPE_RAW: @@ -456,12 +449,12 @@ struct pmu *hw_perf_event_init(struct perf_event *event) break; default: - return ERR_PTR(-EINVAL); + return -ENOENT; } event->hw.config = ppmu->xlate_event(ev); if (!(event->hw.config & FSL_EMB_EVENT_VALID)) - return ERR_PTR(-EINVAL); + return -EINVAL; /* * If this is in a group, check if it can go on with all the @@ -473,7 +466,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event) n = collect_events(event->group_leader, ppmu->n_counter - 1, events); if (n < 0) - return ERR_PTR(-EINVAL); + return -EINVAL; } if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { @@ -484,7 +477,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event) } if (num_restricted >= ppmu->n_restricted) - return ERR_PTR(-EINVAL); + return -EINVAL; } event->hw.idx = -1; @@ -497,7 +490,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event) if (event->attr.exclude_kernel) event->hw.config_base |= PMLCA_FCS; if (event->attr.exclude_idle) - return ERR_PTR(-ENOTSUPP); + return -ENOTSUPP; event->hw.last_period = event->hw.sample_period; local64_set(&event->hw.period_left, event->hw.last_period); @@ -523,11 +516,17 @@ struct pmu *hw_perf_event_init(struct perf_event *event) } event->destroy = hw_perf_event_destroy; - if (err) - return ERR_PTR(err); - return &fsl_emb_pmu; + return err; } +static struct pmu fsl_emb_pmu = { + .event_init = fsl_emb_pmu_event_init, + .enable = fsl_emb_pmu_enable, + .disable = fsl_emb_pmu_disable, + .read = fsl_emb_pmu_read, + .unthrottle = fsl_emb_pmu_unthrottle, +}; + /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -651,5 +650,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) pr_info("%s performance monitor hardware support registered\n", pmu->name); + perf_pmu_register(&fsl_emb_pmu); + return 0; } diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 395572c94c6a..8cb206597e0c 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c @@ -257,26 +257,38 @@ static void sh_pmu_read(struct perf_event *event) sh_perf_event_update(event, &event->hw, event->hw.idx); } -static struct pmu pmu = { - .enable = sh_pmu_enable, - .disable = sh_pmu_disable, - .read = sh_pmu_read, -}; - -struct pmu *hw_perf_event_init(struct perf_event *event) +static int sh_pmu_event_init(struct perf_event *event) { - int err = __hw_perf_event_init(event); + int err; + + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HW_CACHE: + case PERF_TYPE_HARDWARE: + err = __hw_perf_event_init(event); + break; + + default: + return -ENOENT; + } + if (unlikely(err)) { if (event->destroy) event->destroy(event); - return ERR_PTR(err); } - return &pmu; + return err; } +static struct pmu pmu = { + .event_init = sh_pmu_event_init, + .enable = sh_pmu_enable, + .disable = sh_pmu_disable, + .read = sh_pmu_read, +}; + static void sh_pmu_setup(int cpu) -{ + struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); memset(cpuhw, 0, sizeof(struct cpu_hw_events)); @@ -325,6 +337,7 @@ int __cpuinit register_sh_pmu(struct sh_pmu *pmu) WARN_ON(pmu->num_events > MAX_HWEVENTS); + perf_pmu_register(&pmu); perf_cpu_notifier(sh_pmu_notifier); return 0; } diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 481b894a5018..bed4327f5a7a 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1025,7 +1025,7 @@ out: return ret; } -static int __hw_perf_event_init(struct perf_event *event) +static int sparc_pmu_event_init(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; struct perf_event *evts[MAX_HWEVENTS]; @@ -1038,17 +1038,27 @@ static int __hw_perf_event_init(struct perf_event *event) if (atomic_read(&nmi_active) < 0) return -ENODEV; - if (attr->type == PERF_TYPE_HARDWARE) { + switch (attr->type) { + case PERF_TYPE_HARDWARE: if (attr->config >= sparc_pmu->max_events) return -EINVAL; pmap = sparc_pmu->event_map(attr->config); - } else if (attr->type == PERF_TYPE_HW_CACHE) { + break; + + case PERF_TYPE_HW_CACHE: pmap = sparc_map_cache_event(attr->config); if (IS_ERR(pmap)) return PTR_ERR(pmap); - } else + break; + + case PERF_TYPE_RAW: return -EOPNOTSUPP; + default: + return -ENOENT; + + } + /* We save the enable bits in the config_base. */ hwc->config_base = sparc_pmu->irq_bit; if (!attr->exclude_user) @@ -1143,6 +1153,7 @@ static int sparc_pmu_commit_txn(struct pmu *pmu) } static struct pmu pmu = { + .event_init = sparc_pmu_event_init, .enable = sparc_pmu_enable, .disable = sparc_pmu_disable, .read = sparc_pmu_read, @@ -1152,15 +1163,6 @@ static struct pmu pmu = { .commit_txn = sparc_pmu_commit_txn, }; -struct pmu *hw_perf_event_init(struct perf_event *event) -{ - int err = __hw_perf_event_init(event); - - if (err) - return ERR_PTR(err); - return &pmu; -} - void perf_event_print_debug(void) { unsigned long flags; @@ -1280,6 +1282,7 @@ void __init init_hw_perf_events(void) /* All sparc64 PMUs currently have 2 events. */ perf_max_events = 2; + perf_pmu_register(&pmu); register_die_notifier(&perf_event_nmi_notifier); } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index fdd97f2e9961..2c89264ee791 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -530,7 +530,7 @@ static int x86_pmu_hw_config(struct perf_event *event) /* * Setup the hardware configuration for a given attr_type */ -static int __hw_perf_event_init(struct perf_event *event) +static int __x86_pmu_event_init(struct perf_event *event) { int err; @@ -1414,6 +1414,7 @@ void __init init_hw_perf_events(void) pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); + perf_pmu_register(&pmu); perf_cpu_notifier(x86_pmu_notifier); } @@ -1483,18 +1484,6 @@ static int x86_pmu_commit_txn(struct pmu *pmu) return 0; } -static struct pmu pmu = { - .enable = x86_pmu_enable, - .disable = x86_pmu_disable, - .start = x86_pmu_start, - .stop = x86_pmu_stop, - .read = x86_pmu_read, - .unthrottle = x86_pmu_unthrottle, - .start_txn = x86_pmu_start_txn, - .cancel_txn = x86_pmu_cancel_txn, - .commit_txn = x86_pmu_commit_txn, -}; - /* * validate that we can schedule this event */ @@ -1569,12 +1558,22 @@ out: return ret; } -struct pmu *hw_perf_event_init(struct perf_event *event) +int x86_pmu_event_init(struct perf_event *event) { struct pmu *tmp; int err; - err = __hw_perf_event_init(event); + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + return -ENOENT; + } + + err = __x86_pmu_event_init(event); if (!err) { /* * we temporarily connect event to its pmu @@ -1594,12 +1593,24 @@ struct pmu *hw_perf_event_init(struct perf_event *event) if (err) { if (event->destroy) event->destroy(event); - return ERR_PTR(err); } - return &pmu; + return err; } +static struct pmu pmu = { + .event_init = x86_pmu_event_init, + .enable = x86_pmu_enable, + .disable = x86_pmu_disable, + .start = x86_pmu_start, + .stop = x86_pmu_stop, + .read = x86_pmu_read, + .unthrottle = x86_pmu_unthrottle, + .start_txn = x86_pmu_start_txn, + .cancel_txn = x86_pmu_cancel_txn, + .commit_txn = x86_pmu_commit_txn, +}; + /* * callchain support */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 09d048b52115..ab72f56eb372 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -561,6 +561,13 @@ struct perf_event; * struct pmu - generic performance monitoring unit */ struct pmu { + struct list_head entry; + + /* + * Should return -ENOENT when the @event doesn't match this pmu + */ + int (*event_init) (struct perf_event *event); + int (*enable) (struct perf_event *event); void (*disable) (struct perf_event *event); int (*start) (struct perf_event *event); @@ -849,7 +856,8 @@ struct perf_output_handle { */ extern int perf_max_events; -extern struct pmu *hw_perf_event_init(struct perf_event *event); +extern int perf_pmu_register(struct pmu *pmu); +extern void perf_pmu_unregister(struct pmu *pmu); extern void perf_event_task_sched_in(struct task_struct *task); extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index d71a987fd2bf..e9c5cfa1fd20 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -565,6 +565,34 @@ static struct notifier_block hw_breakpoint_exceptions_nb = { .priority = 0x7fffffff }; +static void bp_perf_event_destroy(struct perf_event *event) +{ + release_bp_slot(event); +} + +static int hw_breakpoint_event_init(struct perf_event *bp) +{ + int err; + + if (bp->attr.type != PERF_TYPE_BREAKPOINT) + return -ENOENT; + + err = register_perf_hw_breakpoint(bp); + if (err) + return err; + + bp->destroy = bp_perf_event_destroy; + + return 0; +} + +static struct pmu perf_breakpoint = { + .event_init = hw_breakpoint_event_init, + .enable = arch_install_hw_breakpoint, + .disable = arch_uninstall_hw_breakpoint, + .read = hw_breakpoint_pmu_read, +}; + static int __init init_hw_breakpoint(void) { unsigned int **task_bp_pinned; @@ -586,6 +614,8 @@ static int __init init_hw_breakpoint(void) constraints_initialized = 1; + perf_pmu_register(&perf_breakpoint); + return register_die_notifier(&hw_breakpoint_exceptions_nb); err_alloc: @@ -601,8 +631,3 @@ static int __init init_hw_breakpoint(void) core_initcall(init_hw_breakpoint); -struct pmu perf_ops_bp = { - .enable = arch_install_hw_breakpoint, - .disable = arch_uninstall_hw_breakpoint, - .read = hw_breakpoint_pmu_read, -}; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index fb46fd13f31f..288ce43de57c 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -31,7 +31,6 @@ #include #include #include -#include #include @@ -72,14 +71,6 @@ static atomic64_t perf_event_id; */ static DEFINE_SPINLOCK(perf_resource_lock); -/* - * Architecture provided APIs - weak aliases: - */ -extern __weak struct pmu *hw_perf_event_init(struct perf_event *event) -{ - return NULL; -} - void __weak hw_perf_disable(void) { barrier(); } void __weak hw_perf_enable(void) { barrier(); } @@ -4501,182 +4492,6 @@ static int perf_swevent_int(struct perf_event *event) return 0; } -static struct pmu perf_ops_generic = { - .enable = perf_swevent_enable, - .disable = perf_swevent_disable, - .start = perf_swevent_int, - .stop = perf_swevent_void, - .read = perf_swevent_read, - .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */ -}; - -/* - * hrtimer based swevent callback - */ - -static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) -{ - enum hrtimer_restart ret = HRTIMER_RESTART; - struct perf_sample_data data; - struct pt_regs *regs; - struct perf_event *event; - u64 period; - - event = container_of(hrtimer, struct perf_event, hw.hrtimer); - event->pmu->read(event); - - perf_sample_data_init(&data, 0); - data.period = event->hw.last_period; - regs = get_irq_regs(); - - if (regs && !perf_exclude_event(event, regs)) { - if (!(event->attr.exclude_idle && current->pid == 0)) - if (perf_event_overflow(event, 0, &data, regs)) - ret = HRTIMER_NORESTART; - } - - period = max_t(u64, 10000, event->hw.sample_period); - hrtimer_forward_now(hrtimer, ns_to_ktime(period)); - - return ret; -} - -static void perf_swevent_start_hrtimer(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hwc->hrtimer.function = perf_swevent_hrtimer; - if (hwc->sample_period) { - u64 period; - - if (hwc->remaining) { - if (hwc->remaining < 0) - period = 10000; - else - period = hwc->remaining; - hwc->remaining = 0; - } else { - period = max_t(u64, 10000, hwc->sample_period); - } - __hrtimer_start_range_ns(&hwc->hrtimer, - ns_to_ktime(period), 0, - HRTIMER_MODE_REL, 0); - } -} - -static void perf_swevent_cancel_hrtimer(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - if (hwc->sample_period) { - ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); - hwc->remaining = ktime_to_ns(remaining); - - hrtimer_cancel(&hwc->hrtimer); - } -} - -/* - * Software event: cpu wall time clock - */ - -static void cpu_clock_perf_event_update(struct perf_event *event) -{ - int cpu = raw_smp_processor_id(); - s64 prev; - u64 now; - - now = cpu_clock(cpu); - prev = local64_xchg(&event->hw.prev_count, now); - local64_add(now - prev, &event->count); -} - -static int cpu_clock_perf_event_enable(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int cpu = raw_smp_processor_id(); - - local64_set(&hwc->prev_count, cpu_clock(cpu)); - perf_swevent_start_hrtimer(event); - - return 0; -} - -static void cpu_clock_perf_event_disable(struct perf_event *event) -{ - perf_swevent_cancel_hrtimer(event); - cpu_clock_perf_event_update(event); -} - -static void cpu_clock_perf_event_read(struct perf_event *event) -{ - cpu_clock_perf_event_update(event); -} - -static struct pmu perf_ops_cpu_clock = { - .enable = cpu_clock_perf_event_enable, - .disable = cpu_clock_perf_event_disable, - .read = cpu_clock_perf_event_read, -}; - -/* - * Software event: task time clock - */ - -static void task_clock_perf_event_update(struct perf_event *event, u64 now) -{ - u64 prev; - s64 delta; - - prev = local64_xchg(&event->hw.prev_count, now); - delta = now - prev; - local64_add(delta, &event->count); -} - -static int task_clock_perf_event_enable(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - u64 now; - - now = event->ctx->time; - - local64_set(&hwc->prev_count, now); - - perf_swevent_start_hrtimer(event); - - return 0; -} - -static void task_clock_perf_event_disable(struct perf_event *event) -{ - perf_swevent_cancel_hrtimer(event); - task_clock_perf_event_update(event, event->ctx->time); - -} - -static void task_clock_perf_event_read(struct perf_event *event) -{ - u64 time; - - if (!in_nmi()) { - update_context_time(event->ctx); - time = event->ctx->time; - } else { - u64 now = perf_clock(); - u64 delta = now - event->ctx->timestamp; - time = event->ctx->time + delta; - } - - task_clock_perf_event_update(event, time); -} - -static struct pmu perf_ops_task_clock = { - .enable = task_clock_perf_event_enable, - .disable = task_clock_perf_event_disable, - .read = task_clock_perf_event_read, -}; - /* Deref the hlist from the update side */ static inline struct swevent_hlist * swevent_hlist_deref(struct perf_cpu_context *cpuctx) @@ -4783,17 +4598,63 @@ static int swevent_hlist_get(struct perf_event *event) return err; } -#ifdef CONFIG_EVENT_TRACING +atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; -static struct pmu perf_ops_tracepoint = { - .enable = perf_trace_enable, - .disable = perf_trace_disable, +static void sw_perf_event_destroy(struct perf_event *event) +{ + u64 event_id = event->attr.config; + + WARN_ON(event->parent); + + atomic_dec(&perf_swevent_enabled[event_id]); + swevent_hlist_put(event); +} + +static int perf_swevent_init(struct perf_event *event) +{ + int event_id = event->attr.config; + + if (event->attr.type != PERF_TYPE_SOFTWARE) + return -ENOENT; + + switch (event_id) { + case PERF_COUNT_SW_CPU_CLOCK: + case PERF_COUNT_SW_TASK_CLOCK: + return -ENOENT; + + default: + break; + } + + if (event_id > PERF_COUNT_SW_MAX) + return -ENOENT; + + if (!event->parent) { + int err; + + err = swevent_hlist_get(event); + if (err) + return err; + + atomic_inc(&perf_swevent_enabled[event_id]); + event->destroy = sw_perf_event_destroy; + } + + return 0; +} + +static struct pmu perf_swevent = { + .event_init = perf_swevent_init, + .enable = perf_swevent_enable, + .disable = perf_swevent_disable, .start = perf_swevent_int, .stop = perf_swevent_void, .read = perf_swevent_read, - .unthrottle = perf_swevent_void, + .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */ }; +#ifdef CONFIG_EVENT_TRACING + static int perf_tp_filter_match(struct perf_event *event, struct perf_sample_data *data) { @@ -4849,10 +4710,13 @@ static void tp_perf_event_destroy(struct perf_event *event) perf_trace_destroy(event); } -static struct pmu *tp_perf_event_init(struct perf_event *event) +static int perf_tp_event_init(struct perf_event *event) { int err; + if (event->attr.type != PERF_TYPE_TRACEPOINT) + return -ENOENT; + /* * Raw tracepoint data is a severe data leak, only allow root to * have these. @@ -4860,15 +4724,30 @@ static struct pmu *tp_perf_event_init(struct perf_event *event) if ((event->attr.sample_type & PERF_SAMPLE_RAW) && perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) - return ERR_PTR(-EPERM); + return -EPERM; err = perf_trace_init(event); if (err) - return NULL; + return err; event->destroy = tp_perf_event_destroy; - return &perf_ops_tracepoint; + return 0; +} + +static struct pmu perf_tracepoint = { + .event_init = perf_tp_event_init, + .enable = perf_trace_enable, + .disable = perf_trace_disable, + .start = perf_swevent_int, + .stop = perf_swevent_void, + .read = perf_swevent_read, + .unthrottle = perf_swevent_void, +}; + +static inline void perf_tp_register(void) +{ + perf_pmu_register(&perf_tracepoint); } static int perf_event_set_filter(struct perf_event *event, void __user *arg) @@ -4896,9 +4775,8 @@ static void perf_event_free_filter(struct perf_event *event) #else -static struct pmu *tp_perf_event_init(struct perf_event *event) +static inline void perf_tp_register(void) { - return NULL; } static int perf_event_set_filter(struct perf_event *event, void __user *arg) @@ -4913,105 +4791,247 @@ static void perf_event_free_filter(struct perf_event *event) #endif /* CONFIG_EVENT_TRACING */ #ifdef CONFIG_HAVE_HW_BREAKPOINT -static void bp_perf_event_destroy(struct perf_event *event) +void perf_bp_event(struct perf_event *bp, void *data) { - release_bp_slot(event); + struct perf_sample_data sample; + struct pt_regs *regs = data; + + perf_sample_data_init(&sample, bp->attr.bp_addr); + + if (!perf_exclude_event(bp, regs)) + perf_swevent_add(bp, 1, 1, &sample, regs); } +#endif + +/* + * hrtimer based swevent callback + */ -static struct pmu *bp_perf_event_init(struct perf_event *bp) +static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) { - int err; + enum hrtimer_restart ret = HRTIMER_RESTART; + struct perf_sample_data data; + struct pt_regs *regs; + struct perf_event *event; + u64 period; - err = register_perf_hw_breakpoint(bp); - if (err) - return ERR_PTR(err); + event = container_of(hrtimer, struct perf_event, hw.hrtimer); + event->pmu->read(event); + + perf_sample_data_init(&data, 0); + data.period = event->hw.last_period; + regs = get_irq_regs(); + + if (regs && !perf_exclude_event(event, regs)) { + if (!(event->attr.exclude_idle && current->pid == 0)) + if (perf_event_overflow(event, 0, &data, regs)) + ret = HRTIMER_NORESTART; + } - bp->destroy = bp_perf_event_destroy; + period = max_t(u64, 10000, event->hw.sample_period); + hrtimer_forward_now(hrtimer, ns_to_ktime(period)); - return &perf_ops_bp; + return ret; } -void perf_bp_event(struct perf_event *bp, void *data) +static void perf_swevent_start_hrtimer(struct perf_event *event) { - struct perf_sample_data sample; - struct pt_regs *regs = data; + struct hw_perf_event *hwc = &event->hw; - perf_sample_data_init(&sample, bp->attr.bp_addr); + hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hwc->hrtimer.function = perf_swevent_hrtimer; + if (hwc->sample_period) { + u64 period; - if (!perf_exclude_event(bp, regs)) - perf_swevent_add(bp, 1, 1, &sample, regs); + if (hwc->remaining) { + if (hwc->remaining < 0) + period = 10000; + else + period = hwc->remaining; + hwc->remaining = 0; + } else { + period = max_t(u64, 10000, hwc->sample_period); + } + __hrtimer_start_range_ns(&hwc->hrtimer, + ns_to_ktime(period), 0, + HRTIMER_MODE_REL, 0); + } } -#else -static struct pmu *bp_perf_event_init(struct perf_event *bp) + +static void perf_swevent_cancel_hrtimer(struct perf_event *event) { - return NULL; + struct hw_perf_event *hwc = &event->hw; + + if (hwc->sample_period) { + ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); + hwc->remaining = ktime_to_ns(remaining); + + hrtimer_cancel(&hwc->hrtimer); + } } -void perf_bp_event(struct perf_event *bp, void *regs) +/* + * Software event: cpu wall time clock + */ + +static void cpu_clock_event_update(struct perf_event *event) { + int cpu = raw_smp_processor_id(); + s64 prev; + u64 now; + + now = cpu_clock(cpu); + prev = local64_xchg(&event->hw.prev_count, now); + local64_add(now - prev, &event->count); } -#endif -atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; +static int cpu_clock_event_enable(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int cpu = raw_smp_processor_id(); -static void sw_perf_event_destroy(struct perf_event *event) + local64_set(&hwc->prev_count, cpu_clock(cpu)); + perf_swevent_start_hrtimer(event); + + return 0; +} + +static void cpu_clock_event_disable(struct perf_event *event) { - u64 event_id = event->attr.config; + perf_swevent_cancel_hrtimer(event); + cpu_clock_event_update(event); +} - WARN_ON(event->parent); +static void cpu_clock_event_read(struct perf_event *event) +{ + cpu_clock_event_update(event); +} - atomic_dec(&perf_swevent_enabled[event_id]); - swevent_hlist_put(event); +static int cpu_clock_event_init(struct perf_event *event) +{ + if (event->attr.type != PERF_TYPE_SOFTWARE) + return -ENOENT; + + if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) + return -ENOENT; + + return 0; } -static struct pmu *sw_perf_event_init(struct perf_event *event) +static struct pmu perf_cpu_clock = { + .event_init = cpu_clock_event_init, + .enable = cpu_clock_event_enable, + .disable = cpu_clock_event_disable, + .read = cpu_clock_event_read, +}; + +/* + * Software event: task time clock + */ + +static void task_clock_event_update(struct perf_event *event, u64 now) { - struct pmu *pmu = NULL; - u64 event_id = event->attr.config; + u64 prev; + s64 delta; - /* - * Software events (currently) can't in general distinguish - * between user, kernel and hypervisor events. - * However, context switches and cpu migrations are considered - * to be kernel events, and page faults are never hypervisor - * events. - */ - switch (event_id) { - case PERF_COUNT_SW_CPU_CLOCK: - pmu = &perf_ops_cpu_clock; + prev = local64_xchg(&event->hw.prev_count, now); + delta = now - prev; + local64_add(delta, &event->count); +} - break; - case PERF_COUNT_SW_TASK_CLOCK: - /* - * If the user instantiates this as a per-cpu event, - * use the cpu_clock event instead. - */ - if (event->ctx->task) - pmu = &perf_ops_task_clock; - else - pmu = &perf_ops_cpu_clock; +static int task_clock_event_enable(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 now; - break; - case PERF_COUNT_SW_PAGE_FAULTS: - case PERF_COUNT_SW_PAGE_FAULTS_MIN: - case PERF_COUNT_SW_PAGE_FAULTS_MAJ: - case PERF_COUNT_SW_CONTEXT_SWITCHES: - case PERF_COUNT_SW_CPU_MIGRATIONS: - case PERF_COUNT_SW_ALIGNMENT_FAULTS: - case PERF_COUNT_SW_EMULATION_FAULTS: - if (!event->parent) { - int err; - - err = swevent_hlist_get(event); - if (err) - return ERR_PTR(err); + now = event->ctx->time; - atomic_inc(&perf_swevent_enabled[event_id]); - event->destroy = sw_perf_event_destroy; + local64_set(&hwc->prev_count, now); + + perf_swevent_start_hrtimer(event); + + return 0; +} + +static void task_clock_event_disable(struct perf_event *event) +{ + perf_swevent_cancel_hrtimer(event); + task_clock_event_update(event, event->ctx->time); + +} + +static void task_clock_event_read(struct perf_event *event) +{ + u64 time; + + if (!in_nmi()) { + update_context_time(event->ctx); + time = event->ctx->time; + } else { + u64 now = perf_clock(); + u64 delta = now - event->ctx->timestamp; + time = event->ctx->time + delta; + } + + task_clock_event_update(event, time); +} + +static int task_clock_event_init(struct perf_event *event) +{ + if (event->attr.type != PERF_TYPE_SOFTWARE) + return -ENOENT; + + if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) + return -ENOENT; + + return 0; +} + +static struct pmu perf_task_clock = { + .event_init = task_clock_event_init, + .enable = task_clock_event_enable, + .disable = task_clock_event_disable, + .read = task_clock_event_read, +}; + +static LIST_HEAD(pmus); +static DEFINE_MUTEX(pmus_lock); +static struct srcu_struct pmus_srcu; + +int perf_pmu_register(struct pmu *pmu) +{ + mutex_lock(&pmus_lock); + list_add_rcu(&pmu->entry, &pmus); + mutex_unlock(&pmus_lock); + + return 0; +} + +void perf_pmu_unregister(struct pmu *pmu) +{ + mutex_lock(&pmus_lock); + list_del_rcu(&pmu->entry); + mutex_unlock(&pmus_lock); + + synchronize_srcu(&pmus_srcu); +} + +struct pmu *perf_init_event(struct perf_event *event) +{ + struct pmu *pmu = NULL; + int idx; + + idx = srcu_read_lock(&pmus_srcu); + list_for_each_entry_rcu(pmu, &pmus, entry) { + int ret = pmu->event_init(event); + if (!ret) + break; + if (ret != -ENOENT) { + pmu = ERR_PTR(ret); + break; } - pmu = &perf_ops_generic; - break; } + srcu_read_unlock(&pmus_srcu, idx); return pmu; } @@ -5092,29 +5112,8 @@ perf_event_alloc(struct perf_event_attr *attr, if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) goto done; - switch (attr->type) { - case PERF_TYPE_RAW: - case PERF_TYPE_HARDWARE: - case PERF_TYPE_HW_CACHE: - pmu = hw_perf_event_init(event); - break; - - case PERF_TYPE_SOFTWARE: - pmu = sw_perf_event_init(event); - break; - - case PERF_TYPE_TRACEPOINT: - pmu = tp_perf_event_init(event); - break; + pmu = perf_init_event(event); - case PERF_TYPE_BREAKPOINT: - pmu = bp_perf_event_init(event); - break; - - - default: - break; - } done: err = 0; if (!pmu) @@ -5979,22 +5978,15 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) return NOTIFY_OK; } -/* - * This has to have a higher priority than migration_notifier in sched.c. - */ -static struct notifier_block __cpuinitdata perf_cpu_nb = { - .notifier_call = perf_cpu_notify, - .priority = 20, -}; - void __init perf_event_init(void) { perf_event_init_all_cpus(); - perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, - (void *)(long)smp_processor_id()); - perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, - (void *)(long)smp_processor_id()); - register_cpu_notifier(&perf_cpu_nb); + init_srcu_struct(&pmus_srcu); + perf_pmu_register(&perf_swevent); + perf_pmu_register(&perf_cpu_clock); + perf_pmu_register(&perf_task_clock); + perf_tp_register(); + perf_cpu_notifier(perf_cpu_notify); } static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, -- cgit v1.2.3 From 24cd7f54a0d47e1d5b3de29e2456bfbd2d8447b7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 11 Jun 2010 17:32:03 +0200 Subject: perf: Reduce perf_disable() usage Since the current perf_disable() usage is only an optimization, remove it for now. This eases the removal of the __weak hw_perf_enable() interface. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- arch/arm/kernel/perf_event.c | 3 +++ arch/powerpc/kernel/perf_event.c | 3 +++ arch/powerpc/kernel/perf_event_fsl_emb.c | 8 +++++-- arch/sh/kernel/perf_event.c | 11 +++++++--- arch/sparc/kernel/perf_event.c | 3 +++ arch/x86/kernel/cpu/perf_event.c | 22 ++++++++++++------- include/linux/perf_event.h | 20 ++++++++--------- kernel/perf_event.c | 37 +------------------------------- 8 files changed, 48 insertions(+), 59 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index f62f9db35db3..afc92c580d18 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -277,6 +277,8 @@ armpmu_enable(struct perf_event *event) int idx; int err = 0; + perf_disable(); + /* If we don't have a space for the counter then finish early. */ idx = armpmu->get_event_idx(cpuc, hwc); if (idx < 0) { @@ -303,6 +305,7 @@ armpmu_enable(struct perf_event *event) perf_event_update_userpage(event); out: + perf_enable(); return err; } diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 19131b2614b9..c1408821dbc2 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -861,6 +861,7 @@ void power_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + perf_disable(); cpuhw->group_flag |= PERF_EVENT_TXN; cpuhw->n_txn_start = cpuhw->n_events; } @@ -875,6 +876,7 @@ void power_pmu_cancel_txn(struct pmu *pmu) struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; + perf_enable(); } /* @@ -901,6 +903,7 @@ int power_pmu_commit_txn(struct pmu *pmu) cpuhw->event[i]->hw.config = cpuhw->events[i]; cpuhw->group_flag &= ~PERF_EVENT_TXN; + perf_enable(); return 0; } diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index ea6a804e43fd..9bc84a7fd901 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -262,7 +262,7 @@ static int collect_events(struct perf_event *group, int max_count, return n; } -/* perf must be disabled, context locked on entry */ +/* context locked on entry */ static int fsl_emb_pmu_enable(struct perf_event *event) { struct cpu_hw_events *cpuhw; @@ -271,6 +271,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event) u64 val; int i; + perf_disable(); cpuhw = &get_cpu_var(cpu_hw_events); if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) @@ -310,15 +311,17 @@ static int fsl_emb_pmu_enable(struct perf_event *event) ret = 0; out: put_cpu_var(cpu_hw_events); + perf_enable(); return ret; } -/* perf must be disabled, context locked on entry */ +/* context locked on entry */ static void fsl_emb_pmu_disable(struct perf_event *event) { struct cpu_hw_events *cpuhw; int i = event->hw.idx; + perf_disable(); if (i < 0) goto out; @@ -346,6 +349,7 @@ static void fsl_emb_pmu_disable(struct perf_event *event) cpuhw->n_events--; out: + perf_enable(); put_cpu_var(cpu_hw_events); } diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 8cb206597e0c..d042989ceb45 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c @@ -230,11 +230,14 @@ static int sh_pmu_enable(struct perf_event *event) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + int ret = -EAGAIN; + + perf_disable(); if (test_and_set_bit(idx, cpuc->used_mask)) { idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events); if (idx == sh_pmu->num_events) - return -EAGAIN; + goto out; set_bit(idx, cpuc->used_mask); hwc->idx = idx; @@ -248,8 +251,10 @@ static int sh_pmu_enable(struct perf_event *event) sh_pmu->enable(hwc, idx); perf_event_update_userpage(event); - - return 0; + ret = 0; +out: + perf_enable(); + return ret; } static void sh_pmu_read(struct perf_event *event) diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index bed4327f5a7a..d0131deeeaf6 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1113,6 +1113,7 @@ static void sparc_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + perf_disable(); cpuhw->group_flag |= PERF_EVENT_TXN; } @@ -1126,6 +1127,7 @@ static void sparc_pmu_cancel_txn(struct pmu *pmu) struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; + perf_enable(); } /* @@ -1149,6 +1151,7 @@ static int sparc_pmu_commit_txn(struct pmu *pmu) return -EAGAIN; cpuc->group_flag &= ~PERF_EVENT_TXN; + perf_enable(); return 0; } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2c89264ee791..846070ce49c3 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -969,10 +969,11 @@ static int x86_pmu_enable(struct perf_event *event) hwc = &event->hw; + perf_disable(); n0 = cpuc->n_events; - n = collect_events(cpuc, event, false); - if (n < 0) - return n; + ret = n = collect_events(cpuc, event, false); + if (ret < 0) + goto out; /* * If group events scheduling transaction was started, @@ -980,23 +981,26 @@ static int x86_pmu_enable(struct perf_event *event) * at commit time(->commit_txn) as a whole */ if (cpuc->group_flag & PERF_EVENT_TXN) - goto out; + goto done_collect; ret = x86_pmu.schedule_events(cpuc, n, assign); if (ret) - return ret; + goto out; /* * copy new assignment, now we know it is possible * will be used by hw_perf_enable() */ memcpy(cpuc->assign, assign, n*sizeof(int)); -out: +done_collect: cpuc->n_events = n; cpuc->n_added += n - n0; cpuc->n_txn += n - n0; - return 0; + ret = 0; +out: + perf_enable(); + return ret; } static int x86_pmu_start(struct perf_event *event) @@ -1432,6 +1436,7 @@ static void x86_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + perf_disable(); cpuc->group_flag |= PERF_EVENT_TXN; cpuc->n_txn = 0; } @@ -1451,6 +1456,7 @@ static void x86_pmu_cancel_txn(struct pmu *pmu) */ cpuc->n_added -= cpuc->n_txn; cpuc->n_events -= cpuc->n_txn; + perf_enable(); } /* @@ -1480,7 +1486,7 @@ static int x86_pmu_commit_txn(struct pmu *pmu) memcpy(cpuc->assign, assign, n*sizeof(int)); cpuc->group_flag &= ~PERF_EVENT_TXN; - + perf_enable(); return 0; } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ab72f56eb372..243286a8ded7 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -564,26 +564,26 @@ struct pmu { struct list_head entry; /* - * Should return -ENOENT when the @event doesn't match this pmu + * Should return -ENOENT when the @event doesn't match this PMU. */ int (*event_init) (struct perf_event *event); - int (*enable) (struct perf_event *event); + int (*enable) (struct perf_event *event); void (*disable) (struct perf_event *event); - int (*start) (struct perf_event *event); + int (*start) (struct perf_event *event); void (*stop) (struct perf_event *event); void (*read) (struct perf_event *event); void (*unthrottle) (struct perf_event *event); /* - * Group events scheduling is treated as a transaction, add group - * events as a whole and perform one schedulability test. If the test - * fails, roll back the whole group + * Group events scheduling is treated as a transaction, add + * group events as a whole and perform one schedulability test. + * If the test fails, roll back the whole group */ /* - * Start the transaction, after this ->enable() doesn't need - * to do schedulability tests. + * Start the transaction, after this ->enable() doesn't need to + * do schedulability tests. */ void (*start_txn) (struct pmu *pmu); /* @@ -594,8 +594,8 @@ struct pmu { */ int (*commit_txn) (struct pmu *pmu); /* - * Will cancel the transaction, assumes ->disable() is called for - * each successfull ->enable() during the transaction. + * Will cancel the transaction, assumes ->disable() is called + * for each successfull ->enable() during the transaction. */ void (*cancel_txn) (struct pmu *pmu); }; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 149ca18371b7..9a98ce953561 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -478,11 +478,6 @@ static void __perf_event_remove_from_context(void *info) return; raw_spin_lock(&ctx->lock); - /* - * Protect the list operation against NMI by disabling the - * events on a global level. - */ - perf_disable(); event_sched_out(event, cpuctx, ctx); @@ -498,7 +493,6 @@ static void __perf_event_remove_from_context(void *info) perf_max_events - perf_reserved_percpu); } - perf_enable(); raw_spin_unlock(&ctx->lock); } @@ -803,12 +797,6 @@ static void __perf_install_in_context(void *info) ctx->is_active = 1; update_context_time(ctx); - /* - * Protect the list operation against NMI by disabling the - * events on a global level. NOP for non NMI based events. - */ - perf_disable(); - add_event_to_ctx(event, ctx); if (event->cpu != -1 && event->cpu != smp_processor_id()) @@ -850,8 +838,6 @@ static void __perf_install_in_context(void *info) cpuctx->max_pertask--; unlock: - perf_enable(); - raw_spin_unlock(&ctx->lock); } @@ -972,12 +958,10 @@ static void __perf_event_enable(void *info) if (!group_can_go_on(event, cpuctx, 1)) { err = -EEXIST; } else { - perf_disable(); if (event == leader) err = group_sched_in(event, cpuctx, ctx); else err = event_sched_in(event, cpuctx, ctx); - perf_enable(); } if (err) { @@ -1090,9 +1074,8 @@ static void ctx_sched_out(struct perf_event_context *ctx, goto out; update_context_time(ctx); - perf_disable(); if (!ctx->nr_active) - goto out_enable; + goto out; if (event_type & EVENT_PINNED) { list_for_each_entry(event, &ctx->pinned_groups, group_entry) @@ -1103,9 +1086,6 @@ static void ctx_sched_out(struct perf_event_context *ctx, list_for_each_entry(event, &ctx->flexible_groups, group_entry) group_sched_out(event, cpuctx, ctx); } - - out_enable: - perf_enable(); out: raw_spin_unlock(&ctx->lock); } @@ -1364,8 +1344,6 @@ ctx_sched_in(struct perf_event_context *ctx, ctx->timestamp = perf_clock(); - perf_disable(); - /* * First go through the list and put on any pinned groups * in order to give them the best chance of going on. @@ -1377,7 +1355,6 @@ ctx_sched_in(struct perf_event_context *ctx, if (event_type & EVENT_FLEXIBLE) ctx_flexible_sched_in(ctx, cpuctx); - perf_enable(); out: raw_spin_unlock(&ctx->lock); } @@ -1425,8 +1402,6 @@ void perf_event_task_sched_in(struct task_struct *task) if (cpuctx->task_ctx == ctx) return; - perf_disable(); - /* * We want to keep the following priority order: * cpu pinned (that don't need to move), task pinned, @@ -1439,8 +1414,6 @@ void perf_event_task_sched_in(struct task_struct *task) ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); cpuctx->task_ctx = ctx; - - perf_enable(); } #define MAX_INTERRUPTS (~0ULL) @@ -1555,11 +1528,9 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) hwc->sample_period = sample_period; if (local64_read(&hwc->period_left) > 8*sample_period) { - perf_disable(); perf_event_stop(event); local64_set(&hwc->period_left, 0); perf_event_start(event); - perf_enable(); } } @@ -1588,15 +1559,12 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) */ if (interrupts == MAX_INTERRUPTS) { perf_log_throttle(event, 1); - perf_disable(); event->pmu->unthrottle(event); - perf_enable(); } if (!event->attr.freq || !event->attr.sample_freq) continue; - perf_disable(); event->pmu->read(event); now = local64_read(&event->count); delta = now - hwc->freq_count_stamp; @@ -1604,7 +1572,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) if (delta > 0) perf_adjust_period(event, TICK_NSEC, delta); - perf_enable(); } raw_spin_unlock(&ctx->lock); } @@ -1647,7 +1614,6 @@ void perf_event_task_tick(struct task_struct *curr) if (!rotate) return; - perf_disable(); cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); if (ctx) task_ctx_sched_out(ctx, EVENT_FLEXIBLE); @@ -1659,7 +1625,6 @@ void perf_event_task_tick(struct task_struct *curr) cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); if (ctx) task_ctx_sched_in(curr, EVENT_FLEXIBLE); - perf_enable(); } static int event_enable_on_exec(struct perf_event *event, -- cgit v1.2.3 From 33696fc0d141bbbcb12f75b69608ea83282e3117 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Jun 2010 08:49:00 +0200 Subject: perf: Per PMU disable Changes perf_disable() into perf_pmu_disable(). Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- arch/alpha/kernel/perf_event.c | 30 +++++++++++++------------ arch/arm/kernel/perf_event.c | 28 +++++++++++------------ arch/powerpc/kernel/perf_event.c | 24 +++++++++++--------- arch/powerpc/kernel/perf_event_fsl_emb.c | 18 ++++++++------- arch/sh/kernel/perf_event.c | 38 +++++++++++++++++--------------- arch/sparc/kernel/perf_event.c | 20 +++++++++-------- arch/x86/kernel/cpu/perf_event.c | 16 ++++++++------ include/linux/perf_event.h | 13 ++++++----- kernel/perf_event.c | 31 ++++++++++++++++---------- 9 files changed, 119 insertions(+), 99 deletions(-) (limited to 'include') diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 19660b5c298f..3e260731f8e6 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -435,7 +435,7 @@ static int alpha_pmu_enable(struct perf_event *event) * nevertheless we disable the PMCs first to enable a potential * final PMI to occur before we disable interrupts. */ - perf_disable(); + perf_pmu_disable(event->pmu); local_irq_save(flags); /* Default to error to be returned */ @@ -456,7 +456,7 @@ static int alpha_pmu_enable(struct perf_event *event) } local_irq_restore(flags); - perf_enable(); + perf_pmu_enable(event->pmu); return ret; } @@ -474,7 +474,7 @@ static void alpha_pmu_disable(struct perf_event *event) unsigned long flags; int j; - perf_disable(); + perf_pmu_disable(event->pmu); local_irq_save(flags); for (j = 0; j < cpuc->n_events; j++) { @@ -502,7 +502,7 @@ static void alpha_pmu_disable(struct perf_event *event) } local_irq_restore(flags); - perf_enable(); + perf_pmu_enable(event->pmu); } @@ -668,18 +668,10 @@ static int alpha_pmu_event_init(struct perf_event *event) return err; } -static struct pmu pmu = { - .event_init = alpha_pmu_event_init, - .enable = alpha_pmu_enable, - .disable = alpha_pmu_disable, - .read = alpha_pmu_read, - .unthrottle = alpha_pmu_unthrottle, -}; - /* * Main entry point - enable HW performance counters. */ -void hw_perf_enable(void) +static void alpha_pmu_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -705,7 +697,7 @@ void hw_perf_enable(void) * Main entry point - disable HW performance counters. */ -void hw_perf_disable(void) +static void alpha_pmu_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -718,6 +710,16 @@ void hw_perf_disable(void) wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); } +static struct pmu pmu = { + .pmu_enable = alpha_pmu_pmu_enable, + .pmu_disable = alpha_pmu_pmu_disable, + .event_init = alpha_pmu_event_init, + .enable = alpha_pmu_enable, + .disable = alpha_pmu_disable, + .read = alpha_pmu_read, + .unthrottle = alpha_pmu_unthrottle, +}; + /* * Main entry point - don't know when this is called but it diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index afc92c580d18..3343f3f4b973 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -277,7 +277,7 @@ armpmu_enable(struct perf_event *event) int idx; int err = 0; - perf_disable(); + perf_pmu_disable(event->pmu); /* If we don't have a space for the counter then finish early. */ idx = armpmu->get_event_idx(cpuc, hwc); @@ -305,7 +305,7 @@ armpmu_enable(struct perf_event *event) perf_event_update_userpage(event); out: - perf_enable(); + perf_pmu_enable(event->pmu); return err; } @@ -534,16 +534,7 @@ static int armpmu_event_init(struct perf_event *event) return err; } -static struct pmu pmu = { - .event_init = armpmu_event_init, - .enable = armpmu_enable, - .disable = armpmu_disable, - .unthrottle = armpmu_unthrottle, - .read = armpmu_read, -}; - -void -hw_perf_enable(void) +static void armpmu_pmu_enable(struct pmu *pmu) { /* Enable all of the perf events on hardware. */ int idx; @@ -564,13 +555,22 @@ hw_perf_enable(void) armpmu->start(); } -void -hw_perf_disable(void) +static void armpmu_pmu_disable(struct pmu *pmu) { if (armpmu) armpmu->stop(); } +static struct pmu pmu = { + .pmu_enable = armpmu_pmu_enable, + .pmu_disable= armpmu_pmu_disable, + .event_init = armpmu_event_init, + .enable = armpmu_enable, + .disable = armpmu_disable, + .unthrottle = armpmu_unthrottle, + .read = armpmu_read, +}; + /* * ARMv6 Performance counter handling code. * diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index c1408821dbc2..deb84bbcb0e6 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -517,7 +517,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) * Disable all events to prevent PMU interrupts and to allow * events to be added or removed. */ -void hw_perf_disable(void) +static void power_pmu_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -565,7 +565,7 @@ void hw_perf_disable(void) * If we were previously disabled and events were added, then * put the new config on the PMU. */ -void hw_perf_enable(void) +static void power_pmu_pmu_enable(struct pmu *pmu) { struct perf_event *event; struct cpu_hw_events *cpuhw; @@ -735,7 +735,7 @@ static int power_pmu_enable(struct perf_event *event) int ret = -EAGAIN; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); /* * Add the event to the list (if there is room) @@ -769,7 +769,7 @@ nocheck: ret = 0; out: - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); return ret; } @@ -784,7 +784,7 @@ static void power_pmu_disable(struct perf_event *event) unsigned long flags; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); power_pmu_read(event); @@ -821,7 +821,7 @@ static void power_pmu_disable(struct perf_event *event) cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); } - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } @@ -837,7 +837,7 @@ static void power_pmu_unthrottle(struct perf_event *event) if (!event->hw.idx || !event->hw.sample_period) return; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); power_pmu_read(event); left = event->hw.sample_period; event->hw.last_period = left; @@ -848,7 +848,7 @@ static void power_pmu_unthrottle(struct perf_event *event) local64_set(&event->hw.prev_count, val); local64_set(&event->hw.period_left, left); perf_event_update_userpage(event); - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } @@ -861,7 +861,7 @@ void power_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); - perf_disable(); + perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; cpuhw->n_txn_start = cpuhw->n_events; } @@ -876,7 +876,7 @@ void power_pmu_cancel_txn(struct pmu *pmu) struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; - perf_enable(); + perf_pmu_enable(pmu); } /* @@ -903,7 +903,7 @@ int power_pmu_commit_txn(struct pmu *pmu) cpuhw->event[i]->hw.config = cpuhw->events[i]; cpuhw->group_flag &= ~PERF_EVENT_TXN; - perf_enable(); + perf_pmu_enable(pmu); return 0; } @@ -1131,6 +1131,8 @@ static int power_pmu_event_init(struct perf_event *event) } struct pmu power_pmu = { + .pmu_enable = power_pmu_pmu_enable, + .pmu_disable = power_pmu_pmu_disable, .event_init = power_pmu_event_init, .enable = power_pmu_enable, .disable = power_pmu_disable, diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index 9bc84a7fd901..84b1974c628f 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -177,7 +177,7 @@ static void fsl_emb_pmu_read(struct perf_event *event) * Disable all events to prevent PMU interrupts and to allow * events to be added or removed. */ -void hw_perf_disable(void) +static void fsl_emb_pmu_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -216,7 +216,7 @@ void hw_perf_disable(void) * If we were previously disabled and events were added, then * put the new config on the PMU. */ -void hw_perf_enable(void) +static void fsl_emb_pmu_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -271,7 +271,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event) u64 val; int i; - perf_disable(); + perf_pmu_disable(event->pmu); cpuhw = &get_cpu_var(cpu_hw_events); if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) @@ -311,7 +311,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event) ret = 0; out: put_cpu_var(cpu_hw_events); - perf_enable(); + perf_pmu_enable(event->pmu); return ret; } @@ -321,7 +321,7 @@ static void fsl_emb_pmu_disable(struct perf_event *event) struct cpu_hw_events *cpuhw; int i = event->hw.idx; - perf_disable(); + perf_pmu_disable(event->pmu); if (i < 0) goto out; @@ -349,7 +349,7 @@ static void fsl_emb_pmu_disable(struct perf_event *event) cpuhw->n_events--; out: - perf_enable(); + perf_pmu_enable(event->pmu); put_cpu_var(cpu_hw_events); } @@ -367,7 +367,7 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event) if (event->hw.idx < 0 || !event->hw.sample_period) return; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); fsl_emb_pmu_read(event); left = event->hw.sample_period; event->hw.last_period = left; @@ -378,7 +378,7 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event) local64_set(&event->hw.prev_count, val); local64_set(&event->hw.period_left, left); perf_event_update_userpage(event); - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } @@ -524,6 +524,8 @@ static int fsl_emb_pmu_event_init(struct perf_event *event) } static struct pmu fsl_emb_pmu = { + .pmu_enable = fsl_emb_pmu_pmu_enable, + .pmu_disable = fsl_emb_pmu_pmu_disable, .event_init = fsl_emb_pmu_event_init, .enable = fsl_emb_pmu_enable, .disable = fsl_emb_pmu_disable, diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index d042989ceb45..4bbe19058a58 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c @@ -232,7 +232,7 @@ static int sh_pmu_enable(struct perf_event *event) int idx = hwc->idx; int ret = -EAGAIN; - perf_disable(); + perf_pmu_disable(event->pmu); if (test_and_set_bit(idx, cpuc->used_mask)) { idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events); @@ -253,7 +253,7 @@ static int sh_pmu_enable(struct perf_event *event) perf_event_update_userpage(event); ret = 0; out: - perf_enable(); + perf_pmu_enable(event->pmu); return ret; } @@ -285,7 +285,25 @@ static int sh_pmu_event_init(struct perf_event *event) return err; } +static void sh_pmu_pmu_enable(struct pmu *pmu) +{ + if (!sh_pmu_initialized()) + return; + + sh_pmu->enable_all(); +} + +static void sh_pmu_pmu_disable(struct pmu *pmu) +{ + if (!sh_pmu_initialized()) + return; + + sh_pmu->disable_all(); +} + static struct pmu pmu = { + .pmu_enable = sh_pmu_pmu_enable, + .pmu_disable = sh_pmu_pmu_disable, .event_init = sh_pmu_event_init, .enable = sh_pmu_enable, .disable = sh_pmu_disable, @@ -316,22 +334,6 @@ sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) return NOTIFY_OK; } -void hw_perf_enable(void) -{ - if (!sh_pmu_initialized()) - return; - - sh_pmu->enable_all(); -} - -void hw_perf_disable(void) -{ - if (!sh_pmu_initialized()) - return; - - sh_pmu->disable_all(); -} - int __cpuinit register_sh_pmu(struct sh_pmu *pmu) { if (sh_pmu) diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index d0131deeeaf6..37cae676536c 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -664,7 +664,7 @@ out: return pcr; } -void hw_perf_enable(void) +static void sparc_pmu_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); u64 pcr; @@ -691,7 +691,7 @@ void hw_perf_enable(void) pcr_ops->write(cpuc->pcr); } -void hw_perf_disable(void) +static void sparc_pmu_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); u64 val; @@ -718,7 +718,7 @@ static void sparc_pmu_disable(struct perf_event *event) int i; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event[i]) { @@ -748,7 +748,7 @@ static void sparc_pmu_disable(struct perf_event *event) } } - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } @@ -991,7 +991,7 @@ static int sparc_pmu_enable(struct perf_event *event) unsigned long flags; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); n0 = cpuc->n_events; if (n0 >= perf_max_events) @@ -1020,7 +1020,7 @@ nocheck: ret = 0; out: - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); return ret; } @@ -1113,7 +1113,7 @@ static void sparc_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); - perf_disable(); + perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; } @@ -1127,7 +1127,7 @@ static void sparc_pmu_cancel_txn(struct pmu *pmu) struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; - perf_enable(); + perf_pmu_enable(pmu); } /* @@ -1151,11 +1151,13 @@ static int sparc_pmu_commit_txn(struct pmu *pmu) return -EAGAIN; cpuc->group_flag &= ~PERF_EVENT_TXN; - perf_enable(); + perf_pmu_enable(pmu); return 0; } static struct pmu pmu = { + .pmu_enable = sparc_pmu_pmu_enable, + .pmu_disable = sparc_pmu_pmu_disable, .event_init = sparc_pmu_event_init, .enable = sparc_pmu_enable, .disable = sparc_pmu_disable, diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 846070ce49c3..79705ac45019 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -583,7 +583,7 @@ static void x86_pmu_disable_all(void) } } -void hw_perf_disable(void) +static void x86_pmu_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -803,7 +803,7 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc, static int x86_pmu_start(struct perf_event *event); static void x86_pmu_stop(struct perf_event *event); -void hw_perf_enable(void) +static void x86_pmu_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct perf_event *event; @@ -969,7 +969,7 @@ static int x86_pmu_enable(struct perf_event *event) hwc = &event->hw; - perf_disable(); + perf_pmu_disable(event->pmu); n0 = cpuc->n_events; ret = n = collect_events(cpuc, event, false); if (ret < 0) @@ -999,7 +999,7 @@ done_collect: ret = 0; out: - perf_enable(); + perf_pmu_enable(event->pmu); return ret; } @@ -1436,7 +1436,7 @@ static void x86_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - perf_disable(); + perf_pmu_disable(pmu); cpuc->group_flag |= PERF_EVENT_TXN; cpuc->n_txn = 0; } @@ -1456,7 +1456,7 @@ static void x86_pmu_cancel_txn(struct pmu *pmu) */ cpuc->n_added -= cpuc->n_txn; cpuc->n_events -= cpuc->n_txn; - perf_enable(); + perf_pmu_enable(pmu); } /* @@ -1486,7 +1486,7 @@ static int x86_pmu_commit_txn(struct pmu *pmu) memcpy(cpuc->assign, assign, n*sizeof(int)); cpuc->group_flag &= ~PERF_EVENT_TXN; - perf_enable(); + perf_pmu_enable(pmu); return 0; } @@ -1605,6 +1605,8 @@ int x86_pmu_event_init(struct perf_event *event) } static struct pmu pmu = { + .pmu_enable = x86_pmu_pmu_enable, + .pmu_disable = x86_pmu_pmu_disable, .event_init = x86_pmu_event_init, .enable = x86_pmu_enable, .disable = x86_pmu_disable, diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 243286a8ded7..6abf103fb7f8 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -563,6 +563,11 @@ struct perf_event; struct pmu { struct list_head entry; + int *pmu_disable_count; + + void (*pmu_enable) (struct pmu *pmu); + void (*pmu_disable) (struct pmu *pmu); + /* * Should return -ENOENT when the @event doesn't match this PMU. */ @@ -868,10 +873,8 @@ extern void perf_event_free_task(struct task_struct *task); extern void set_perf_event_pending(void); extern void perf_event_do_pending(void); extern void perf_event_print_debug(void); -extern void __perf_disable(void); -extern bool __perf_enable(void); -extern void perf_disable(void); -extern void perf_enable(void); +extern void perf_pmu_disable(struct pmu *pmu); +extern void perf_pmu_enable(struct pmu *pmu); extern int perf_event_task_disable(void); extern int perf_event_task_enable(void); extern void perf_event_update_userpage(struct perf_event *event); @@ -1056,8 +1059,6 @@ static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } static inline void perf_event_do_pending(void) { } static inline void perf_event_print_debug(void) { } -static inline void perf_disable(void) { } -static inline void perf_enable(void) { } static inline int perf_event_task_disable(void) { return -EINVAL; } static inline int perf_event_task_enable(void) { return -EINVAL; } diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 9a98ce953561..5ed0c06765bb 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -71,23 +71,20 @@ static atomic64_t perf_event_id; */ static DEFINE_SPINLOCK(perf_resource_lock); -void __weak hw_perf_disable(void) { barrier(); } -void __weak hw_perf_enable(void) { barrier(); } - void __weak perf_event_print_debug(void) { } -static DEFINE_PER_CPU(int, perf_disable_count); - -void perf_disable(void) +void perf_pmu_disable(struct pmu *pmu) { - if (!__get_cpu_var(perf_disable_count)++) - hw_perf_disable(); + int *count = this_cpu_ptr(pmu->pmu_disable_count); + if (!(*count)++) + pmu->pmu_disable(pmu); } -void perf_enable(void) +void perf_pmu_enable(struct pmu *pmu) { - if (!--__get_cpu_var(perf_disable_count)) - hw_perf_enable(); + int *count = this_cpu_ptr(pmu->pmu_disable_count); + if (!--(*count)) + pmu->pmu_enable(pmu); } static void get_ctx(struct perf_event_context *ctx) @@ -4970,11 +4967,19 @@ static struct srcu_struct pmus_srcu; int perf_pmu_register(struct pmu *pmu) { + int ret; + mutex_lock(&pmus_lock); + ret = -ENOMEM; + pmu->pmu_disable_count = alloc_percpu(int); + if (!pmu->pmu_disable_count) + goto unlock; list_add_rcu(&pmu->entry, &pmus); + ret = 0; +unlock: mutex_unlock(&pmus_lock); - return 0; + return ret; } void perf_pmu_unregister(struct pmu *pmu) @@ -4984,6 +4989,8 @@ void perf_pmu_unregister(struct pmu *pmu) mutex_unlock(&pmus_lock); synchronize_srcu(&pmus_srcu); + + free_percpu(pmu->pmu_disable_count); } struct pmu *perf_init_event(struct perf_event *event) -- cgit v1.2.3 From ad5133b7030d04ce7701aa7cbe98f561347c79c2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Jun 2010 12:22:39 +0200 Subject: perf: Default PMU ops Provide default implementations for the pmu txn methods, this allows us to remove some conditional code. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 10 ++++---- kernel/perf_event.c | 64 +++++++++++++++++++++++++++++++++++++--------- 2 files changed, 57 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 6abf103fb7f8..bf85733597ec 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -565,8 +565,8 @@ struct pmu { int *pmu_disable_count; - void (*pmu_enable) (struct pmu *pmu); - void (*pmu_disable) (struct pmu *pmu); + void (*pmu_enable) (struct pmu *pmu); /* optional */ + void (*pmu_disable) (struct pmu *pmu); /* optional */ /* * Should return -ENOENT when the @event doesn't match this PMU. @@ -590,19 +590,19 @@ struct pmu { * Start the transaction, after this ->enable() doesn't need to * do schedulability tests. */ - void (*start_txn) (struct pmu *pmu); + void (*start_txn) (struct pmu *pmu); /* optional */ /* * If ->start_txn() disabled the ->enable() schedulability test * then ->commit_txn() is required to perform one. On success * the transaction is closed. On error the transaction is kept * open until ->cancel_txn() is called. */ - int (*commit_txn) (struct pmu *pmu); + int (*commit_txn) (struct pmu *pmu); /* optional */ /* * Will cancel the transaction, assumes ->disable() is called * for each successfull ->enable() during the transaction. */ - void (*cancel_txn) (struct pmu *pmu); + void (*cancel_txn) (struct pmu *pmu); /* optional */ }; /** diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 5ed0c06765bb..8ef4ba3bcb1f 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -674,21 +674,14 @@ group_sched_in(struct perf_event *group_event, { struct perf_event *event, *partial_group = NULL; struct pmu *pmu = group_event->pmu; - bool txn = false; if (group_event->state == PERF_EVENT_STATE_OFF) return 0; - /* Check if group transaction availabe */ - if (pmu->start_txn) - txn = true; - - if (txn) - pmu->start_txn(pmu); + pmu->start_txn(pmu); if (event_sched_in(group_event, cpuctx, ctx)) { - if (txn) - pmu->cancel_txn(pmu); + pmu->cancel_txn(pmu); return -EAGAIN; } @@ -702,7 +695,7 @@ group_sched_in(struct perf_event *group_event, } } - if (!txn || !pmu->commit_txn(pmu)) + if (!pmu->commit_txn(pmu)) return 0; group_error: @@ -717,8 +710,7 @@ group_error: } event_sched_out(group_event, cpuctx, ctx); - if (txn) - pmu->cancel_txn(pmu); + pmu->cancel_txn(pmu); return -EAGAIN; } @@ -4965,6 +4957,31 @@ static LIST_HEAD(pmus); static DEFINE_MUTEX(pmus_lock); static struct srcu_struct pmus_srcu; +static void perf_pmu_nop_void(struct pmu *pmu) +{ +} + +static int perf_pmu_nop_int(struct pmu *pmu) +{ + return 0; +} + +static void perf_pmu_start_txn(struct pmu *pmu) +{ + perf_pmu_disable(pmu); +} + +static int perf_pmu_commit_txn(struct pmu *pmu) +{ + perf_pmu_enable(pmu); + return 0; +} + +static void perf_pmu_cancel_txn(struct pmu *pmu) +{ + perf_pmu_enable(pmu); +} + int perf_pmu_register(struct pmu *pmu) { int ret; @@ -4974,6 +4991,29 @@ int perf_pmu_register(struct pmu *pmu) pmu->pmu_disable_count = alloc_percpu(int); if (!pmu->pmu_disable_count) goto unlock; + + if (!pmu->start_txn) { + if (pmu->pmu_enable) { + /* + * If we have pmu_enable/pmu_disable calls, install + * transaction stubs that use that to try and batch + * hardware accesses. + */ + pmu->start_txn = perf_pmu_start_txn; + pmu->commit_txn = perf_pmu_commit_txn; + pmu->cancel_txn = perf_pmu_cancel_txn; + } else { + pmu->start_txn = perf_pmu_nop_void; + pmu->commit_txn = perf_pmu_nop_int; + pmu->cancel_txn = perf_pmu_nop_void; + } + } + + if (!pmu->pmu_enable) { + pmu->pmu_enable = perf_pmu_nop_void; + pmu->pmu_disable = perf_pmu_nop_void; + } + list_add_rcu(&pmu->entry, &pmus); ret = 0; unlock: -- cgit v1.2.3 From fa407f35e0298d841e4088f95a7f9cf6e725c6d5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 24 Jun 2010 12:35:12 +0200 Subject: perf: Shrink hw_perf_event Use hw_perf_event::period_left instead of hw_perf_event::remaining and win back 8 bytes. Signed-off-by: Peter Zijlstra Cc: paulus Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 - kernel/perf_event.c | 13 ++++++------- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index bf85733597ec..8cafa15af60d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -529,7 +529,6 @@ struct hw_perf_event { int last_cpu; }; struct { /* software */ - s64 remaining; struct hrtimer hrtimer; }; #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 8ef4ba3bcb1f..1a6cdbf0d091 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4800,14 +4800,13 @@ static void perf_swevent_start_hrtimer(struct perf_event *event) hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hwc->hrtimer.function = perf_swevent_hrtimer; if (hwc->sample_period) { - u64 period; + s64 period = local64_read(&hwc->period_left); - if (hwc->remaining) { - if (hwc->remaining < 0) + if (period) { + if (period < 0) period = 10000; - else - period = hwc->remaining; - hwc->remaining = 0; + + local64_set(&hwc->period_left, 0); } else { period = max_t(u64, 10000, hwc->sample_period); } @@ -4823,7 +4822,7 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event) if (hwc->sample_period) { ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); - hwc->remaining = ktime_to_ns(remaining); + local64_set(&hwc->period_left, ktime_to_ns(remaining)); hrtimer_cancel(&hwc->hrtimer); } -- cgit v1.2.3 From a4eaf7f14675cb512d69f0c928055e73d0c6d252 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Jun 2010 14:37:10 +0200 Subject: perf: Rework the PMU methods Replace pmu::{enable,disable,start,stop,unthrottle} with pmu::{add,del,start,stop}, all of which take a flags argument. The new interface extends the capability to stop a counter while keeping it scheduled on the PMU. We replace the throttled state with the generic stopped state. This also allows us to efficiently stop/start counters over certain code paths (like IRQ handlers). It also allows scheduling a counter without it starting, allowing for a generic frozen state (useful for rotating stopped counters). The stopped state is implemented in two different ways, depending on how the architecture implemented the throttled state: 1) We disable the counter: a) the pmu has per-counter enable bits, we flip that b) we program a NOP event, preserving the counter state 2) We store the counter state and ignore all read/overflow events Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- arch/alpha/kernel/perf_event.c | 71 +++++++++++---- arch/arm/kernel/perf_event.c | 96 ++++++++++++-------- arch/powerpc/kernel/perf_event.c | 105 ++++++++++++++-------- arch/powerpc/kernel/perf_event_fsl_emb.c | 107 ++++++++++++++--------- arch/sh/kernel/perf_event.c | 75 +++++++++++----- arch/sparc/kernel/perf_event.c | 109 ++++++++++++++--------- arch/x86/kernel/cpu/perf_event.c | 106 ++++++++++++---------- arch/x86/kernel/cpu/perf_event_intel.c | 2 +- arch/x86/kernel/cpu/perf_event_intel_ds.c | 2 +- include/linux/ftrace_event.h | 4 +- include/linux/perf_event.h | 54 +++++++++--- kernel/hw_breakpoint.c | 29 ++++++- kernel/perf_event.c | 140 +++++++++++++++--------------- kernel/trace/trace_event_perf.c | 7 +- 14 files changed, 576 insertions(+), 331 deletions(-) (limited to 'include') diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 3e260731f8e6..380ef02d557a 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -307,7 +307,7 @@ again: new_raw_count) != prev_raw_count) goto again; - delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf; + delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf; /* It is possible on very rare occasions that the PMC has overflowed * but the interrupt is yet to come. Detect and fix this situation. @@ -402,14 +402,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc) struct hw_perf_event *hwc = &pe->hw; int idx = hwc->idx; - if (cpuc->current_idx[j] != PMC_NO_INDEX) { - cpuc->idx_mask |= (1<current_idx[j]); - continue; + if (cpuc->current_idx[j] == PMC_NO_INDEX) { + alpha_perf_event_set_period(pe, hwc, idx); + cpuc->current_idx[j] = idx; } - alpha_perf_event_set_period(pe, hwc, idx); - cpuc->current_idx[j] = idx; - cpuc->idx_mask |= (1<current_idx[j]); + if (!(hwc->state & PERF_HES_STOPPED)) + cpuc->idx_mask |= (1<current_idx[j]); } cpuc->config = cpuc->event[0]->hw.config_base; } @@ -420,7 +419,7 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc) * - this function is called from outside this module via the pmu struct * returned from perf event initialisation. */ -static int alpha_pmu_enable(struct perf_event *event) +static int alpha_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int n0; @@ -455,6 +454,10 @@ static int alpha_pmu_enable(struct perf_event *event) } } + hwc->state = PERF_HES_UPTODATE; + if (!(flags & PERF_EF_START)) + hwc->state |= PERF_HES_STOPPED; + local_irq_restore(flags); perf_pmu_enable(event->pmu); @@ -467,7 +470,7 @@ static int alpha_pmu_enable(struct perf_event *event) * - this function is called from outside this module via the pmu struct * returned from perf event initialisation. */ -static void alpha_pmu_disable(struct perf_event *event) +static void alpha_pmu_del(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; @@ -514,13 +517,44 @@ static void alpha_pmu_read(struct perf_event *event) } -static void alpha_pmu_unthrottle(struct perf_event *event) +static void alpha_pmu_stop(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + if (!(hwc->state & PERF_HES_STOPPED)) { + cpuc->idx_mask &= !(1UL<idx); + hwc->state |= PERF_HES_STOPPED; + } + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + alpha_perf_event_update(event, hwc, hwc->idx, 0); + hwc->state |= PERF_HES_UPTODATE; + } + + if (cpuc->enabled) + wrperfmon(PERFMON_CMD_ENABLE, (1UL<idx)); +} + + +static void alpha_pmu_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + alpha_perf_event_set_period(event, hwc, hwc->idx); + } + + hwc->state = 0; + cpuc->idx_mask |= 1UL<idx; - wrperfmon(PERFMON_CMD_ENABLE, (1UL<idx)); + if (cpuc->enabled) + wrperfmon(PERFMON_CMD_ENABLE, (1UL<idx)); } @@ -671,7 +705,7 @@ static int alpha_pmu_event_init(struct perf_event *event) /* * Main entry point - enable HW performance counters. */ -static void alpha_pmu_pmu_enable(struct pmu *pmu) +static void alpha_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -697,7 +731,7 @@ static void alpha_pmu_pmu_enable(struct pmu *pmu) * Main entry point - disable HW performance counters. */ -static void alpha_pmu_pmu_disable(struct pmu *pmu) +static void alpha_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -711,13 +745,14 @@ static void alpha_pmu_pmu_disable(struct pmu *pmu) } static struct pmu pmu = { - .pmu_enable = alpha_pmu_pmu_enable, - .pmu_disable = alpha_pmu_pmu_disable, + .pmu_enable = alpha_pmu_enable, + .pmu_disable = alpha_pmu_disable, .event_init = alpha_pmu_event_init, - .enable = alpha_pmu_enable, - .disable = alpha_pmu_disable, + .add = alpha_pmu_add, + .del = alpha_pmu_del, + .start = alpha_pmu_start, + .stop = alpha_pmu_stop, .read = alpha_pmu_read, - .unthrottle = alpha_pmu_unthrottle, }; diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 3343f3f4b973..448cfa6b3ef0 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -221,46 +221,56 @@ again: } static void -armpmu_disable(struct perf_event *event) +armpmu_read(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - WARN_ON(idx < 0); - - clear_bit(idx, cpuc->active_mask); - armpmu->disable(hwc, idx); - - barrier(); - armpmu_event_update(event, hwc, idx); - cpuc->events[idx] = NULL; - clear_bit(idx, cpuc->used_mask); + /* Don't read disabled counters! */ + if (hwc->idx < 0) + return; - perf_event_update_userpage(event); + armpmu_event_update(event, hwc, hwc->idx); } static void -armpmu_read(struct perf_event *event) +armpmu_stop(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; - /* Don't read disabled counters! */ - if (hwc->idx < 0) + if (!armpmu) return; - armpmu_event_update(event, hwc, hwc->idx); + /* + * ARM pmu always has to update the counter, so ignore + * PERF_EF_UPDATE, see comments in armpmu_start(). + */ + if (!(hwc->state & PERF_HES_STOPPED)) { + armpmu->disable(hwc, hwc->idx); + barrier(); /* why? */ + armpmu_event_update(event, hwc, hwc->idx); + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + } } static void -armpmu_unthrottle(struct perf_event *event) +armpmu_start(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; + if (!armpmu) + return; + + /* + * ARM pmu always has to reprogram the period, so ignore + * PERF_EF_RELOAD, see the comment below. + */ + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; /* * Set the period again. Some counters can't be stopped, so when we - * were throttled we simply disabled the IRQ source and the counter + * were stopped we simply disabled the IRQ source and the counter * may have been left counting. If we don't do this step then we may * get an interrupt too soon or *way* too late if the overflow has * happened since disabling. @@ -269,8 +279,25 @@ armpmu_unthrottle(struct perf_event *event) armpmu->enable(hwc, hwc->idx); } +static void +armpmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + WARN_ON(idx < 0); + + clear_bit(idx, cpuc->active_mask); + armpmu_stop(event, PERF_EF_UPDATE); + cpuc->events[idx] = NULL; + clear_bit(idx, cpuc->used_mask); + + perf_event_update_userpage(event); +} + static int -armpmu_enable(struct perf_event *event) +armpmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; @@ -295,11 +322,9 @@ armpmu_enable(struct perf_event *event) cpuc->events[idx] = event; set_bit(idx, cpuc->active_mask); - /* Set the period for the event. */ - armpmu_event_set_period(event, hwc, idx); - - /* Enable the event. */ - armpmu->enable(hwc, idx); + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + if (flags & PERF_EF_START) + armpmu_start(event, PERF_EF_RELOAD); /* Propagate our changes to the userspace mapping. */ perf_event_update_userpage(event); @@ -534,7 +559,7 @@ static int armpmu_event_init(struct perf_event *event) return err; } -static void armpmu_pmu_enable(struct pmu *pmu) +static void armpmu_enable(struct pmu *pmu) { /* Enable all of the perf events on hardware. */ int idx; @@ -555,20 +580,21 @@ static void armpmu_pmu_enable(struct pmu *pmu) armpmu->start(); } -static void armpmu_pmu_disable(struct pmu *pmu) +static void armpmu_disable(struct pmu *pmu) { if (armpmu) armpmu->stop(); } static struct pmu pmu = { - .pmu_enable = armpmu_pmu_enable, - .pmu_disable= armpmu_pmu_disable, - .event_init = armpmu_event_init, - .enable = armpmu_enable, - .disable = armpmu_disable, - .unthrottle = armpmu_unthrottle, - .read = armpmu_read, + .pmu_enable = armpmu_enable, + .pmu_disable = armpmu_disable, + .event_init = armpmu_event_init, + .add = armpmu_add, + .del = armpmu_del, + .start = armpmu_start, + .stop = armpmu_stop, + .read = armpmu_read, }; /* diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index deb84bbcb0e6..9cb4924b6c07 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -402,6 +402,9 @@ static void power_pmu_read(struct perf_event *event) { s64 val, delta, prev; + if (event->hw.state & PERF_HES_STOPPED) + return; + if (!event->hw.idx) return; /* @@ -517,7 +520,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) * Disable all events to prevent PMU interrupts and to allow * events to be added or removed. */ -static void power_pmu_pmu_disable(struct pmu *pmu) +static void power_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -565,7 +568,7 @@ static void power_pmu_pmu_disable(struct pmu *pmu) * If we were previously disabled and events were added, then * put the new config on the PMU. */ -static void power_pmu_pmu_enable(struct pmu *pmu) +static void power_pmu_enable(struct pmu *pmu) { struct perf_event *event; struct cpu_hw_events *cpuhw; @@ -672,6 +675,8 @@ static void power_pmu_pmu_enable(struct pmu *pmu) } local64_set(&event->hw.prev_count, val); event->hw.idx = idx; + if (event->hw.state & PERF_HES_STOPPED) + val = 0; write_pmc(idx, val); perf_event_update_userpage(event); } @@ -727,7 +732,7 @@ static int collect_events(struct perf_event *group, int max_count, * re-enable the PMU in order to get hw_perf_enable to do the * actual work of reconfiguring the PMU. */ -static int power_pmu_enable(struct perf_event *event) +static int power_pmu_add(struct perf_event *event, int ef_flags) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -749,6 +754,9 @@ static int power_pmu_enable(struct perf_event *event) cpuhw->events[n0] = event->hw.config; cpuhw->flags[n0] = event->hw.event_base; + if (!(ef_flags & PERF_EF_START)) + event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + /* * If group events scheduling transaction was started, * skip the schedulability test here, it will be peformed @@ -777,7 +785,7 @@ nocheck: /* * Remove a event from the PMU. */ -static void power_pmu_disable(struct perf_event *event) +static void power_pmu_del(struct perf_event *event, int ef_flags) { struct cpu_hw_events *cpuhw; long i; @@ -826,27 +834,53 @@ static void power_pmu_disable(struct perf_event *event) } /* - * Re-enable interrupts on a event after they were throttled - * because they were coming too fast. + * POWER-PMU does not support disabling individual counters, hence + * program their cycle counter to their max value and ignore the interrupts. */ -static void power_pmu_unthrottle(struct perf_event *event) + +static void power_pmu_start(struct perf_event *event, int ef_flags) { - s64 val, left; unsigned long flags; + s64 left; if (!event->hw.idx || !event->hw.sample_period) return; + + if (!(event->hw.state & PERF_HES_STOPPED)) + return; + + if (ef_flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + local_irq_save(flags); + perf_pmu_disable(event->pmu); + + event->hw.state = 0; + left = local64_read(&event->hw.period_left); + write_pmc(event->hw.idx, left); + + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); + local_irq_restore(flags); +} + +static void power_pmu_stop(struct perf_event *event, int ef_flags) +{ + unsigned long flags; + + if (!event->hw.idx || !event->hw.sample_period) + return; + + if (event->hw.state & PERF_HES_STOPPED) + return; + local_irq_save(flags); perf_pmu_disable(event->pmu); + power_pmu_read(event); - left = event->hw.sample_period; - event->hw.last_period = left; - val = 0; - if (left < 0x80000000L) - val = 0x80000000L - left; - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); + event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + write_pmc(event->hw.idx, 0); + perf_event_update_userpage(event); perf_pmu_enable(event->pmu); local_irq_restore(flags); @@ -1131,13 +1165,14 @@ static int power_pmu_event_init(struct perf_event *event) } struct pmu power_pmu = { - .pmu_enable = power_pmu_pmu_enable, - .pmu_disable = power_pmu_pmu_disable, + .pmu_enable = power_pmu_enable, + .pmu_disable = power_pmu_disable, .event_init = power_pmu_event_init, - .enable = power_pmu_enable, - .disable = power_pmu_disable, + .add = power_pmu_add, + .del = power_pmu_del, + .start = power_pmu_start, + .stop = power_pmu_stop, .read = power_pmu_read, - .unthrottle = power_pmu_unthrottle, .start_txn = power_pmu_start_txn, .cancel_txn = power_pmu_cancel_txn, .commit_txn = power_pmu_commit_txn, @@ -1155,6 +1190,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, s64 prev, delta, left; int record = 0; + if (event->hw.state & PERF_HES_STOPPED) { + write_pmc(event->hw.idx, 0); + return; + } + /* we don't have to worry about interrupts here */ prev = local64_read(&event->hw.prev_count); delta = (val - prev) & 0xfffffffful; @@ -1177,6 +1217,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, val = 0x80000000LL - left; } + write_pmc(event->hw.idx, val); + local64_set(&event->hw.prev_count, val); + local64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); + /* * Finally record data if requested. */ @@ -1189,23 +1234,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (event->attr.sample_type & PERF_SAMPLE_ADDR) perf_get_data_addr(regs, &data.addr); - if (perf_event_overflow(event, nmi, &data, regs)) { - /* - * Interrupts are coming too fast - throttle them - * by setting the event to 0, so it will be - * at least 2^30 cycles until the next interrupt - * (assuming each event counts at most 2 counts - * per cycle). - */ - val = 0; - left = ~0ULL >> 1; - } + if (perf_event_overflow(event, nmi, &data, regs)) + power_pmu_stop(event, 0); } - - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); - perf_event_update_userpage(event); } /* diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index 84b1974c628f..7ecca59ddf77 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -156,6 +156,9 @@ static void fsl_emb_pmu_read(struct perf_event *event) { s64 val, delta, prev; + if (event->hw.state & PERF_HES_STOPPED) + return; + /* * Performance monitor interrupts come even when interrupts * are soft-disabled, as long as interrupts are hard-enabled. @@ -177,7 +180,7 @@ static void fsl_emb_pmu_read(struct perf_event *event) * Disable all events to prevent PMU interrupts and to allow * events to be added or removed. */ -static void fsl_emb_pmu_pmu_disable(struct pmu *pmu) +static void fsl_emb_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -216,7 +219,7 @@ static void fsl_emb_pmu_pmu_disable(struct pmu *pmu) * If we were previously disabled and events were added, then * put the new config on the PMU. */ -static void fsl_emb_pmu_pmu_enable(struct pmu *pmu) +static void fsl_emb_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -263,7 +266,7 @@ static int collect_events(struct perf_event *group, int max_count, } /* context locked on entry */ -static int fsl_emb_pmu_enable(struct perf_event *event) +static int fsl_emb_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuhw; int ret = -EAGAIN; @@ -302,6 +305,12 @@ static int fsl_emb_pmu_enable(struct perf_event *event) val = 0x80000000L - left; } local64_set(&event->hw.prev_count, val); + + if (!(flags & PERF_EF_START)) { + event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + val = 0; + } + write_pmc(i, val); perf_event_update_userpage(event); @@ -316,7 +325,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event) } /* context locked on entry */ -static void fsl_emb_pmu_disable(struct perf_event *event) +static void fsl_emb_pmu_del(struct perf_event *event, int flags) { struct cpu_hw_events *cpuhw; int i = event->hw.idx; @@ -353,30 +362,49 @@ static void fsl_emb_pmu_disable(struct perf_event *event) put_cpu_var(cpu_hw_events); } -/* - * Re-enable interrupts on a event after they were throttled - * because they were coming too fast. - * - * Context is locked on entry, but perf is not disabled. - */ -static void fsl_emb_pmu_unthrottle(struct perf_event *event) +static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags) +{ + unsigned long flags; + s64 left; + + if (event->hw.idx < 0 || !event->hw.sample_period) + return; + + if (!(event->hw.state & PERF_HES_STOPPED)) + return; + + if (ef_flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + local_irq_save(flags); + perf_pmu_disable(event->pmu); + + event->hw.state = 0; + left = local64_read(&event->hw.period_left); + write_pmc(event->hw.idx, left); + + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); + local_irq_restore(flags); +} + +static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags) { - s64 val, left; unsigned long flags; if (event->hw.idx < 0 || !event->hw.sample_period) return; + + if (event->hw.state & PERF_HES_STOPPED) + return; + local_irq_save(flags); perf_pmu_disable(event->pmu); + fsl_emb_pmu_read(event); - left = event->hw.sample_period; - event->hw.last_period = left; - val = 0; - if (left < 0x80000000L) - val = 0x80000000L - left; - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); + event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + write_pmc(event->hw.idx, 0); + perf_event_update_userpage(event); perf_pmu_enable(event->pmu); local_irq_restore(flags); @@ -524,13 +552,14 @@ static int fsl_emb_pmu_event_init(struct perf_event *event) } static struct pmu fsl_emb_pmu = { - .pmu_enable = fsl_emb_pmu_pmu_enable, - .pmu_disable = fsl_emb_pmu_pmu_disable, + .pmu_enable = fsl_emb_pmu_enable, + .pmu_disable = fsl_emb_pmu_disable, .event_init = fsl_emb_pmu_event_init, - .enable = fsl_emb_pmu_enable, - .disable = fsl_emb_pmu_disable, + .add = fsl_emb_pmu_add, + .del = fsl_emb_pmu_del, + .start = fsl_emb_pmu_start, + .stop = fsl_emb_pmu_stop, .read = fsl_emb_pmu_read, - .unthrottle = fsl_emb_pmu_unthrottle, }; /* @@ -545,6 +574,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, s64 prev, delta, left; int record = 0; + if (event->hw.state & PERF_HES_STOPPED) { + write_pmc(event->hw.idx, 0); + return; + } + /* we don't have to worry about interrupts here */ prev = local64_read(&event->hw.prev_count); delta = (val - prev) & 0xfffffffful; @@ -567,6 +601,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, val = 0x80000000LL - left; } + write_pmc(event->hw.idx, val); + local64_set(&event->hw.prev_count, val); + local64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); + /* * Finally record data if requested. */ @@ -576,23 +615,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, perf_sample_data_init(&data, 0); data.period = event->hw.last_period; - if (perf_event_overflow(event, nmi, &data, regs)) { - /* - * Interrupts are coming too fast - throttle them - * by setting the event to 0, so it will be - * at least 2^30 cycles until the next interrupt - * (assuming each event counts at most 2 counts - * per cycle). - */ - val = 0; - left = ~0ULL >> 1; - } + if (perf_event_overflow(event, nmi, &data, regs)) + fsl_emb_pmu_stop(event, 0); } - - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); - perf_event_update_userpage(event); } static void perf_event_interrupt(struct pt_regs *regs) diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 4bbe19058a58..cf39c4873468 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c @@ -206,26 +206,52 @@ again: local64_add(delta, &event->count); } -static void sh_pmu_disable(struct perf_event *event) +static void sh_pmu_stop(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - clear_bit(idx, cpuc->active_mask); - sh_pmu->disable(hwc, idx); + if (!(event->hw.state & PERF_HES_STOPPED)) { + sh_pmu->disable(hwc, idx); + cpuc->events[idx] = NULL; + event->hw.state |= PERF_HES_STOPPED; + } - barrier(); + if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { + sh_perf_event_update(event, &event->hw, idx); + event->hw.state |= PERF_HES_UPTODATE; + } +} - sh_perf_event_update(event, &event->hw, idx); +static void sh_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; - cpuc->events[idx] = NULL; - clear_bit(idx, cpuc->used_mask); + if (WARN_ON_ONCE(idx == -1)) + return; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + cpuc->events[idx] = event; + event->hw.state = 0; + sh_pmu->enable(hwc, idx); +} + +static void sh_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + sh_pmu_stop(event, PERF_EF_UPDATE); + __clear_bit(event->hw.idx, cpuc->used_mask); perf_event_update_userpage(event); } -static int sh_pmu_enable(struct perf_event *event) +static int sh_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; @@ -234,21 +260,20 @@ static int sh_pmu_enable(struct perf_event *event) perf_pmu_disable(event->pmu); - if (test_and_set_bit(idx, cpuc->used_mask)) { + if (__test_and_set_bit(idx, cpuc->used_mask)) { idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events); if (idx == sh_pmu->num_events) goto out; - set_bit(idx, cpuc->used_mask); + __set_bit(idx, cpuc->used_mask); hwc->idx = idx; } sh_pmu->disable(hwc, idx); - cpuc->events[idx] = event; - set_bit(idx, cpuc->active_mask); - - sh_pmu->enable(hwc, idx); + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (flags & PERF_EF_START) + sh_pmu_start(event, PERF_EF_RELOAD); perf_event_update_userpage(event); ret = 0; @@ -285,7 +310,7 @@ static int sh_pmu_event_init(struct perf_event *event) return err; } -static void sh_pmu_pmu_enable(struct pmu *pmu) +static void sh_pmu_enable(struct pmu *pmu) { if (!sh_pmu_initialized()) return; @@ -293,7 +318,7 @@ static void sh_pmu_pmu_enable(struct pmu *pmu) sh_pmu->enable_all(); } -static void sh_pmu_pmu_disable(struct pmu *pmu) +static void sh_pmu_disable(struct pmu *pmu) { if (!sh_pmu_initialized()) return; @@ -302,11 +327,13 @@ static void sh_pmu_pmu_disable(struct pmu *pmu) } static struct pmu pmu = { - .pmu_enable = sh_pmu_pmu_enable, - .pmu_disable = sh_pmu_pmu_disable, + .pmu_enable = sh_pmu_enable, + .pmu_disable = sh_pmu_disable, .event_init = sh_pmu_event_init, - .enable = sh_pmu_enable, - .disable = sh_pmu_disable, + .add = sh_pmu_add, + .del = sh_pmu_del, + .start = sh_pmu_start, + .stop = sh_pmu_stop, .read = sh_pmu_read, }; @@ -334,15 +361,15 @@ sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) return NOTIFY_OK; } -int __cpuinit register_sh_pmu(struct sh_pmu *pmu) +int __cpuinit register_sh_pmu(struct sh_pmu *_pmu) { if (sh_pmu) return -EBUSY; - sh_pmu = pmu; + sh_pmu = _pmu; - pr_info("Performance Events: %s support registered\n", pmu->name); + pr_info("Performance Events: %s support registered\n", _pmu->name); - WARN_ON(pmu->num_events > MAX_HWEVENTS); + WARN_ON(_pmu->num_events > MAX_HWEVENTS); perf_pmu_register(&pmu); perf_cpu_notifier(sh_pmu_notifier); diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 37cae676536c..516be2314b54 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -658,13 +658,16 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) enc = perf_event_get_enc(cpuc->events[i]); pcr &= ~mask_for_index(idx); - pcr |= event_encoding(enc, idx); + if (hwc->state & PERF_HES_STOPPED) + pcr |= nop_for_index(idx); + else + pcr |= event_encoding(enc, idx); } out: return pcr; } -static void sparc_pmu_pmu_enable(struct pmu *pmu) +static void sparc_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); u64 pcr; @@ -691,7 +694,7 @@ static void sparc_pmu_pmu_enable(struct pmu *pmu) pcr_ops->write(cpuc->pcr); } -static void sparc_pmu_pmu_disable(struct pmu *pmu) +static void sparc_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); u64 val; @@ -710,10 +713,53 @@ static void sparc_pmu_pmu_disable(struct pmu *pmu) pcr_ops->write(cpuc->pcr); } -static void sparc_pmu_disable(struct perf_event *event) +static int active_event_index(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + int i; + + for (i = 0; i < cpuc->n_events; i++) { + if (cpuc->event[i] == event) + break; + } + BUG_ON(i == cpuc->n_events); + return cpuc->current_idx[i]; +} + +static void sparc_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx = active_event_index(cpuc, event); + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + sparc_perf_event_set_period(event, &event->hw, idx); + } + + event->hw.state = 0; + + sparc_pmu_enable_event(cpuc, &event->hw, idx); +} + +static void sparc_pmu_stop(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx = active_event_index(cpuc, event); + + if (!(event->hw.state & PERF_HES_STOPPED)) { + sparc_pmu_disable_event(cpuc, &event->hw, idx); + event->hw.state |= PERF_HES_STOPPED; + } + + if (!(event->hw.state & PERF_HES_UPTODATE) && (flags & PERF_EF_UPDATE)) { + sparc_perf_event_update(event, &event->hw, idx); + event->hw.state |= PERF_HES_UPTODATE; + } +} + +static void sparc_pmu_del(struct perf_event *event, int _flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; unsigned long flags; int i; @@ -722,7 +768,10 @@ static void sparc_pmu_disable(struct perf_event *event) for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event[i]) { - int idx = cpuc->current_idx[i]; + /* Absorb the final count and turn off the + * event. + */ + sparc_pmu_stop(event, PERF_EF_UPDATE); /* Shift remaining entries down into * the existing slot. @@ -734,13 +783,6 @@ static void sparc_pmu_disable(struct perf_event *event) cpuc->current_idx[i]; } - /* Absorb the final count and turn off the - * event. - */ - sparc_pmu_disable_event(cpuc, hwc, idx); - barrier(); - sparc_perf_event_update(event, hwc, idx); - perf_event_update_userpage(event); cpuc->n_events--; @@ -752,19 +794,6 @@ static void sparc_pmu_disable(struct perf_event *event) local_irq_restore(flags); } -static int active_event_index(struct cpu_hw_events *cpuc, - struct perf_event *event) -{ - int i; - - for (i = 0; i < cpuc->n_events; i++) { - if (cpuc->event[i] == event) - break; - } - BUG_ON(i == cpuc->n_events); - return cpuc->current_idx[i]; -} - static void sparc_pmu_read(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -774,15 +803,6 @@ static void sparc_pmu_read(struct perf_event *event) sparc_perf_event_update(event, hwc, idx); } -static void sparc_pmu_unthrottle(struct perf_event *event) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - int idx = active_event_index(cpuc, event); - struct hw_perf_event *hwc = &event->hw; - - sparc_pmu_enable_event(cpuc, hwc, idx); -} - static atomic_t active_events = ATOMIC_INIT(0); static DEFINE_MUTEX(pmc_grab_mutex); @@ -984,7 +1004,7 @@ static int collect_events(struct perf_event *group, int max_count, return n; } -static int sparc_pmu_enable(struct perf_event *event) +static int sparc_pmu_add(struct perf_event *event, int ef_flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int n0, ret = -EAGAIN; @@ -1001,6 +1021,10 @@ static int sparc_pmu_enable(struct perf_event *event) cpuc->events[n0] = event->hw.event_base; cpuc->current_idx[n0] = PIC_NO_INDEX; + event->hw.state = PERF_HES_UPTODATE; + if (!(ef_flags & PERF_EF_START)) + event->hw.state |= PERF_HES_STOPPED; + /* * If group events scheduling transaction was started, * skip the schedulability test here, it will be peformed @@ -1156,13 +1180,14 @@ static int sparc_pmu_commit_txn(struct pmu *pmu) } static struct pmu pmu = { - .pmu_enable = sparc_pmu_pmu_enable, - .pmu_disable = sparc_pmu_pmu_disable, + .pmu_enable = sparc_pmu_enable, + .pmu_disable = sparc_pmu_disable, .event_init = sparc_pmu_event_init, - .enable = sparc_pmu_enable, - .disable = sparc_pmu_disable, + .add = sparc_pmu_add, + .del = sparc_pmu_del, + .start = sparc_pmu_start, + .stop = sparc_pmu_stop, .read = sparc_pmu_read, - .unthrottle = sparc_pmu_unthrottle, .start_txn = sparc_pmu_start_txn, .cancel_txn = sparc_pmu_cancel_txn, .commit_txn = sparc_pmu_commit_txn, @@ -1243,7 +1268,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, continue; if (perf_event_overflow(event, 1, &data, regs)) - sparc_pmu_disable_event(cpuc, hwc, idx); + sparc_pmu_stop(event, 0); } return NOTIFY_STOP; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 79705ac45019..dd6fec710677 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -583,7 +583,7 @@ static void x86_pmu_disable_all(void) } } -static void x86_pmu_pmu_disable(struct pmu *pmu) +static void x86_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -800,10 +800,10 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc, hwc->last_tag == cpuc->tags[i]; } -static int x86_pmu_start(struct perf_event *event); -static void x86_pmu_stop(struct perf_event *event); +static void x86_pmu_start(struct perf_event *event, int flags); +static void x86_pmu_stop(struct perf_event *event, int flags); -static void x86_pmu_pmu_enable(struct pmu *pmu) +static void x86_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct perf_event *event; @@ -839,7 +839,14 @@ static void x86_pmu_pmu_enable(struct pmu *pmu) match_prev_assignment(hwc, cpuc, i)) continue; - x86_pmu_stop(event); + /* + * Ensure we don't accidentally enable a stopped + * counter simply because we rescheduled. + */ + if (hwc->state & PERF_HES_STOPPED) + hwc->state |= PERF_HES_ARCH; + + x86_pmu_stop(event, PERF_EF_UPDATE); } for (i = 0; i < cpuc->n_events; i++) { @@ -851,7 +858,10 @@ static void x86_pmu_pmu_enable(struct pmu *pmu) else if (i < n_running) continue; - x86_pmu_start(event); + if (hwc->state & PERF_HES_ARCH) + continue; + + x86_pmu_start(event, PERF_EF_RELOAD); } cpuc->n_added = 0; perf_events_lapic_init(); @@ -952,15 +962,12 @@ static void x86_pmu_enable_event(struct perf_event *event) } /* - * activate a single event + * Add a single event to the PMU. * * The event is added to the group of enabled events * but only if it can be scehduled with existing events. - * - * Called with PMU disabled. If successful and return value 1, - * then guaranteed to call perf_enable() and hw_perf_enable() */ -static int x86_pmu_enable(struct perf_event *event) +static int x86_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc; @@ -975,10 +982,14 @@ static int x86_pmu_enable(struct perf_event *event) if (ret < 0) goto out; + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (!(flags & PERF_EF_START)) + hwc->state |= PERF_HES_ARCH; + /* * If group events scheduling transaction was started, * skip the schedulability test here, it will be peformed - * at commit time(->commit_txn) as a whole + * at commit time (->commit_txn) as a whole */ if (cpuc->group_flag & PERF_EVENT_TXN) goto done_collect; @@ -1003,27 +1014,28 @@ out: return ret; } -static int x86_pmu_start(struct perf_event *event) +static void x86_pmu_start(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx = event->hw.idx; - if (idx == -1) - return -EAGAIN; + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + if (WARN_ON_ONCE(idx == -1)) + return; + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + x86_perf_event_set_period(event); + } + + event->hw.state = 0; - x86_perf_event_set_period(event); cpuc->events[idx] = event; __set_bit(idx, cpuc->active_mask); x86_pmu.enable(event); perf_event_update_userpage(event); - - return 0; -} - -static void x86_pmu_unthrottle(struct perf_event *event) -{ - int ret = x86_pmu_start(event); - WARN_ON_ONCE(ret); } void perf_event_print_debug(void) @@ -1080,27 +1092,29 @@ void perf_event_print_debug(void) local_irq_restore(flags); } -static void x86_pmu_stop(struct perf_event *event) +static void x86_pmu_stop(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - if (!__test_and_clear_bit(idx, cpuc->active_mask)) - return; - - x86_pmu.disable(event); - /* - * Drain the remaining delta count out of a event - * that we are disabling: - */ - x86_perf_event_update(event); + if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) { + x86_pmu.disable(event); + cpuc->events[hwc->idx] = NULL; + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + } - cpuc->events[idx] = NULL; + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + /* + * Drain the remaining delta count out of a event + * that we are disabling: + */ + x86_perf_event_update(event); + hwc->state |= PERF_HES_UPTODATE; + } } -static void x86_pmu_disable(struct perf_event *event) +static void x86_pmu_del(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int i; @@ -1113,7 +1127,7 @@ static void x86_pmu_disable(struct perf_event *event) if (cpuc->group_flag & PERF_EVENT_TXN) return; - x86_pmu_stop(event); + x86_pmu_stop(event, PERF_EF_UPDATE); for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event_list[i]) { @@ -1165,7 +1179,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) continue; if (perf_event_overflow(event, 1, &data, regs)) - x86_pmu_stop(event); + x86_pmu_stop(event, 0); } if (handled) @@ -1605,15 +1619,17 @@ int x86_pmu_event_init(struct perf_event *event) } static struct pmu pmu = { - .pmu_enable = x86_pmu_pmu_enable, - .pmu_disable = x86_pmu_pmu_disable, + .pmu_enable = x86_pmu_enable, + .pmu_disable = x86_pmu_disable, + .event_init = x86_pmu_event_init, - .enable = x86_pmu_enable, - .disable = x86_pmu_disable, + + .add = x86_pmu_add, + .del = x86_pmu_del, .start = x86_pmu_start, .stop = x86_pmu_stop, .read = x86_pmu_read, - .unthrottle = x86_pmu_unthrottle, + .start_txn = x86_pmu_start_txn, .cancel_txn = x86_pmu_cancel_txn, .commit_txn = x86_pmu_commit_txn, diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index ee05c90012d2..82395f2378ec 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -763,7 +763,7 @@ again: data.period = event->hw.last_period; if (perf_event_overflow(event, 1, &data, regs)) - x86_pmu_stop(event); + x86_pmu_stop(event, 0); } /* diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 18018d1311cd..9893a2f77b7a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -491,7 +491,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, regs.flags &= ~PERF_EFLAGS_EXACT; if (perf_event_overflow(event, 1, &data, ®s)) - x86_pmu_stop(event); + x86_pmu_stop(event, 0); } static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 5f8ad7bec636..8beabb958f61 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -252,8 +252,8 @@ DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); extern int perf_trace_init(struct perf_event *event); extern void perf_trace_destroy(struct perf_event *event); -extern int perf_trace_enable(struct perf_event *event); -extern void perf_trace_disable(struct perf_event *event); +extern int perf_trace_add(struct perf_event *event, int flags); +extern void perf_trace_del(struct perf_event *event, int flags); extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); extern void ftrace_profile_free_filter(struct perf_event *event); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8cafa15af60d..402073c61669 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -538,6 +538,7 @@ struct hw_perf_event { }; #endif }; + int state; local64_t prev_count; u64 sample_period; u64 last_period; @@ -549,6 +550,13 @@ struct hw_perf_event { #endif }; +/* + * hw_perf_event::state flags + */ +#define PERF_HES_STOPPED 0x01 /* the counter is stopped */ +#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ +#define PERF_HES_ARCH 0x04 + struct perf_event; /* @@ -564,42 +572,62 @@ struct pmu { int *pmu_disable_count; + /* + * Fully disable/enable this PMU, can be used to protect from the PMI + * as well as for lazy/batch writing of the MSRs. + */ void (*pmu_enable) (struct pmu *pmu); /* optional */ void (*pmu_disable) (struct pmu *pmu); /* optional */ /* + * Try and initialize the event for this PMU. * Should return -ENOENT when the @event doesn't match this PMU. */ int (*event_init) (struct perf_event *event); - int (*enable) (struct perf_event *event); - void (*disable) (struct perf_event *event); - int (*start) (struct perf_event *event); - void (*stop) (struct perf_event *event); +#define PERF_EF_START 0x01 /* start the counter when adding */ +#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ +#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ + + /* + * Adds/Removes a counter to/from the PMU, can be done inside + * a transaction, see the ->*_txn() methods. + */ + int (*add) (struct perf_event *event, int flags); + void (*del) (struct perf_event *event, int flags); + + /* + * Starts/Stops a counter present on the PMU. The PMI handler + * should stop the counter when perf_event_overflow() returns + * !0. ->start() will be used to continue. + */ + void (*start) (struct perf_event *event, int flags); + void (*stop) (struct perf_event *event, int flags); + + /* + * Updates the counter value of the event. + */ void (*read) (struct perf_event *event); - void (*unthrottle) (struct perf_event *event); /* * Group events scheduling is treated as a transaction, add * group events as a whole and perform one schedulability test. * If the test fails, roll back the whole group - */ - - /* - * Start the transaction, after this ->enable() doesn't need to + * + * Start the transaction, after this ->add() doesn't need to * do schedulability tests. */ void (*start_txn) (struct pmu *pmu); /* optional */ /* - * If ->start_txn() disabled the ->enable() schedulability test + * If ->start_txn() disabled the ->add() schedulability test * then ->commit_txn() is required to perform one. On success * the transaction is closed. On error the transaction is kept * open until ->cancel_txn() is called. */ int (*commit_txn) (struct pmu *pmu); /* optional */ /* - * Will cancel the transaction, assumes ->disable() is called - * for each successfull ->enable() during the transaction. + * Will cancel the transaction, assumes ->del() is called + * for each successfull ->add() during the transaction. */ void (*cancel_txn) (struct pmu *pmu); /* optional */ }; @@ -680,7 +708,7 @@ struct perf_event { int nr_siblings; int group_flags; struct perf_event *group_leader; - struct pmu *pmu; + struct pmu *pmu; enum perf_event_active_state state; unsigned int attach_state; diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index e9c5cfa1fd20..6f150095cafe 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -586,10 +586,35 @@ static int hw_breakpoint_event_init(struct perf_event *bp) return 0; } +static int hw_breakpoint_add(struct perf_event *bp, int flags) +{ + if (!(flags & PERF_EF_START)) + bp->hw.state = PERF_HES_STOPPED; + + return arch_install_hw_breakpoint(bp); +} + +static void hw_breakpoint_del(struct perf_event *bp, int flags) +{ + arch_uninstall_hw_breakpoint(bp); +} + +static void hw_breakpoint_start(struct perf_event *bp, int flags) +{ + bp->hw.state = 0; +} + +static void hw_breakpoint_stop(struct perf_event *bp, int flags) +{ + bp->hw.state = PERF_HES_STOPPED; +} + static struct pmu perf_breakpoint = { .event_init = hw_breakpoint_event_init, - .enable = arch_install_hw_breakpoint, - .disable = arch_uninstall_hw_breakpoint, + .add = hw_breakpoint_add, + .del = hw_breakpoint_del, + .start = hw_breakpoint_start, + .stop = hw_breakpoint_stop, .read = hw_breakpoint_pmu_read, }; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 1a6cdbf0d091..3bace4fd0355 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -424,7 +424,7 @@ event_sched_out(struct perf_event *event, event->state = PERF_EVENT_STATE_OFF; } event->tstamp_stopped = ctx->time; - event->pmu->disable(event); + event->pmu->del(event, 0); event->oncpu = -1; if (!is_software_event(event)) @@ -649,7 +649,7 @@ event_sched_in(struct perf_event *event, */ smp_wmb(); - if (event->pmu->enable(event)) { + if (event->pmu->add(event, PERF_EF_START)) { event->state = PERF_EVENT_STATE_INACTIVE; event->oncpu = -1; return -EAGAIN; @@ -1482,22 +1482,6 @@ do { \ return div64_u64(dividend, divisor); } -static void perf_event_stop(struct perf_event *event) -{ - if (!event->pmu->stop) - return event->pmu->disable(event); - - return event->pmu->stop(event); -} - -static int perf_event_start(struct perf_event *event) -{ - if (!event->pmu->start) - return event->pmu->enable(event); - - return event->pmu->start(event); -} - static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) { struct hw_perf_event *hwc = &event->hw; @@ -1517,9 +1501,9 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) hwc->sample_period = sample_period; if (local64_read(&hwc->period_left) > 8*sample_period) { - perf_event_stop(event); + event->pmu->stop(event, PERF_EF_UPDATE); local64_set(&hwc->period_left, 0); - perf_event_start(event); + event->pmu->start(event, PERF_EF_RELOAD); } } @@ -1548,7 +1532,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) */ if (interrupts == MAX_INTERRUPTS) { perf_log_throttle(event, 1); - event->pmu->unthrottle(event); + event->pmu->start(event, 0); } if (!event->attr.freq || !event->attr.sample_freq) @@ -2506,6 +2490,9 @@ int perf_event_task_disable(void) static int perf_event_index(struct perf_event *event) { + if (event->hw.state & PERF_HES_STOPPED) + return 0; + if (event->state != PERF_EVENT_STATE_ACTIVE) return 0; @@ -4120,8 +4107,6 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, struct hw_perf_event *hwc = &event->hw; int ret = 0; - throttle = (throttle && event->pmu->unthrottle != NULL); - if (!throttle) { hwc->interrupts++; } else { @@ -4246,7 +4231,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow, } } -static void perf_swevent_add(struct perf_event *event, u64 nr, +static void perf_swevent_event(struct perf_event *event, u64 nr, int nmi, struct perf_sample_data *data, struct pt_regs *regs) { @@ -4272,6 +4257,9 @@ static void perf_swevent_add(struct perf_event *event, u64 nr, static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs) { + if (event->hw.state & PERF_HES_STOPPED) + return 0; + if (regs) { if (event->attr.exclude_user && user_mode(regs)) return 1; @@ -4371,7 +4359,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, hlist_for_each_entry_rcu(event, node, head, hlist_entry) { if (perf_swevent_match(event, type, event_id, data, regs)) - perf_swevent_add(event, nr, nmi, data, regs); + perf_swevent_event(event, nr, nmi, data, regs); } end: rcu_read_unlock(); @@ -4415,7 +4403,7 @@ static void perf_swevent_read(struct perf_event *event) { } -static int perf_swevent_enable(struct perf_event *event) +static int perf_swevent_add(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; struct perf_cpu_context *cpuctx; @@ -4428,6 +4416,8 @@ static int perf_swevent_enable(struct perf_event *event) perf_swevent_set_period(event); } + hwc->state = !(flags & PERF_EF_START); + head = find_swevent_head(cpuctx, event); if (WARN_ON_ONCE(!head)) return -EINVAL; @@ -4437,18 +4427,19 @@ static int perf_swevent_enable(struct perf_event *event) return 0; } -static void perf_swevent_disable(struct perf_event *event) +static void perf_swevent_del(struct perf_event *event, int flags) { hlist_del_rcu(&event->hlist_entry); } -static void perf_swevent_void(struct perf_event *event) +static void perf_swevent_start(struct perf_event *event, int flags) { + event->hw.state = 0; } -static int perf_swevent_int(struct perf_event *event) +static void perf_swevent_stop(struct perf_event *event, int flags) { - return 0; + event->hw.state = PERF_HES_STOPPED; } /* Deref the hlist from the update side */ @@ -4604,12 +4595,11 @@ static int perf_swevent_init(struct perf_event *event) static struct pmu perf_swevent = { .event_init = perf_swevent_init, - .enable = perf_swevent_enable, - .disable = perf_swevent_disable, - .start = perf_swevent_int, - .stop = perf_swevent_void, + .add = perf_swevent_add, + .del = perf_swevent_del, + .start = perf_swevent_start, + .stop = perf_swevent_stop, .read = perf_swevent_read, - .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */ }; #ifdef CONFIG_EVENT_TRACING @@ -4657,7 +4647,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, hlist_for_each_entry_rcu(event, node, head, hlist_entry) { if (perf_tp_event_match(event, &data, regs)) - perf_swevent_add(event, count, 1, &data, regs); + perf_swevent_event(event, count, 1, &data, regs); } perf_swevent_put_recursion_context(rctx); @@ -4696,12 +4686,11 @@ static int perf_tp_event_init(struct perf_event *event) static struct pmu perf_tracepoint = { .event_init = perf_tp_event_init, - .enable = perf_trace_enable, - .disable = perf_trace_disable, - .start = perf_swevent_int, - .stop = perf_swevent_void, + .add = perf_trace_add, + .del = perf_trace_del, + .start = perf_swevent_start, + .stop = perf_swevent_stop, .read = perf_swevent_read, - .unthrottle = perf_swevent_void, }; static inline void perf_tp_register(void) @@ -4757,8 +4746,8 @@ void perf_bp_event(struct perf_event *bp, void *data) perf_sample_data_init(&sample, bp->attr.bp_addr); - if (!perf_exclude_event(bp, regs)) - perf_swevent_add(bp, 1, 1, &sample, regs); + if (!bp->hw.state && !perf_exclude_event(bp, regs)) + perf_swevent_event(bp, 1, 1, &sample, regs); } #endif @@ -4834,32 +4823,39 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event) static void cpu_clock_event_update(struct perf_event *event) { - int cpu = raw_smp_processor_id(); s64 prev; u64 now; - now = cpu_clock(cpu); + now = local_clock(); prev = local64_xchg(&event->hw.prev_count, now); local64_add(now - prev, &event->count); } -static int cpu_clock_event_enable(struct perf_event *event) +static void cpu_clock_event_start(struct perf_event *event, int flags) { - struct hw_perf_event *hwc = &event->hw; - int cpu = raw_smp_processor_id(); - - local64_set(&hwc->prev_count, cpu_clock(cpu)); + local64_set(&event->hw.prev_count, local_clock()); perf_swevent_start_hrtimer(event); - - return 0; } -static void cpu_clock_event_disable(struct perf_event *event) +static void cpu_clock_event_stop(struct perf_event *event, int flags) { perf_swevent_cancel_hrtimer(event); cpu_clock_event_update(event); } +static int cpu_clock_event_add(struct perf_event *event, int flags) +{ + if (flags & PERF_EF_START) + cpu_clock_event_start(event, flags); + + return 0; +} + +static void cpu_clock_event_del(struct perf_event *event, int flags) +{ + cpu_clock_event_stop(event, flags); +} + static void cpu_clock_event_read(struct perf_event *event) { cpu_clock_event_update(event); @@ -4878,8 +4874,10 @@ static int cpu_clock_event_init(struct perf_event *event) static struct pmu perf_cpu_clock = { .event_init = cpu_clock_event_init, - .enable = cpu_clock_event_enable, - .disable = cpu_clock_event_disable, + .add = cpu_clock_event_add, + .del = cpu_clock_event_del, + .start = cpu_clock_event_start, + .stop = cpu_clock_event_stop, .read = cpu_clock_event_read, }; @@ -4897,25 +4895,29 @@ static void task_clock_event_update(struct perf_event *event, u64 now) local64_add(delta, &event->count); } -static int task_clock_event_enable(struct perf_event *event) +static void task_clock_event_start(struct perf_event *event, int flags) { - struct hw_perf_event *hwc = &event->hw; - u64 now; - - now = event->ctx->time; - - local64_set(&hwc->prev_count, now); - + local64_set(&event->hw.prev_count, event->ctx->time); perf_swevent_start_hrtimer(event); - - return 0; } -static void task_clock_event_disable(struct perf_event *event) +static void task_clock_event_stop(struct perf_event *event, int flags) { perf_swevent_cancel_hrtimer(event); task_clock_event_update(event, event->ctx->time); +} + +static int task_clock_event_add(struct perf_event *event, int flags) +{ + if (flags & PERF_EF_START) + task_clock_event_start(event, flags); + return 0; +} + +static void task_clock_event_del(struct perf_event *event, int flags) +{ + task_clock_event_stop(event, PERF_EF_UPDATE); } static void task_clock_event_read(struct perf_event *event) @@ -4947,8 +4949,10 @@ static int task_clock_event_init(struct perf_event *event) static struct pmu perf_task_clock = { .event_init = task_clock_event_init, - .enable = task_clock_event_enable, - .disable = task_clock_event_disable, + .add = task_clock_event_add, + .del = task_clock_event_del, + .start = task_clock_event_start, + .stop = task_clock_event_stop, .read = task_clock_event_read, }; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index f3bbcd1c90c8..39c059ca670e 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -101,7 +101,7 @@ int perf_trace_init(struct perf_event *p_event) return ret; } -int perf_trace_enable(struct perf_event *p_event) +int perf_trace_add(struct perf_event *p_event, int flags) { struct ftrace_event_call *tp_event = p_event->tp_event; struct hlist_head __percpu *pcpu_list; @@ -111,13 +111,16 @@ int perf_trace_enable(struct perf_event *p_event) if (WARN_ON_ONCE(!pcpu_list)) return -EINVAL; + if (!(flags & PERF_EF_START)) + p_event->hw.state = PERF_HES_STOPPED; + list = this_cpu_ptr(pcpu_list); hlist_add_head_rcu(&p_event->hlist_entry, list); return 0; } -void perf_trace_disable(struct perf_event *p_event) +void perf_trace_del(struct perf_event *p_event, int flags) { hlist_del_rcu(&p_event->hlist_entry); } -- cgit v1.2.3 From 15ac9a395a753cb28c674e7ea80386ffdff21785 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Sep 2010 15:51:45 +0200 Subject: perf: Remove the sysfs bits Neither the overcommit nor the reservation sysfs parameter were actually working, remove them as they'll only get in the way. Signed-off-by: Peter Zijlstra Cc: paulus LKML-Reference: Signed-off-by: Ingo Molnar --- arch/alpha/kernel/perf_event.c | 3 +- arch/arm/kernel/perf_event.c | 9 +-- arch/sparc/kernel/perf_event.c | 9 +-- arch/x86/kernel/cpu/perf_event.c | 1 - include/linux/perf_event.h | 6 -- kernel/perf_event.c | 124 --------------------------------------- 6 files changed, 5 insertions(+), 147 deletions(-) (limited to 'include') diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 380ef02d557a..9bb8c024080c 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -808,7 +808,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); /* la_ptr is the counter that overflowed. */ - if (unlikely(la_ptr >= perf_max_events)) { + if (unlikely(la_ptr >= alpha_pmu->num_pmcs)) { /* This should never occur! */ irq_err_count++; pr_warning("PMI: silly index %ld\n", la_ptr); @@ -879,7 +879,6 @@ void __init init_hw_perf_events(void) /* And set up PMU specification */ alpha_pmu = &ev67_pmu; - perf_max_events = alpha_pmu->num_pmcs; perf_pmu_register(&pmu); } diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 448cfa6b3ef0..45d6a35217c1 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -534,7 +534,7 @@ static int armpmu_event_init(struct perf_event *event) event->destroy = hw_perf_event_destroy; if (!atomic_inc_not_zero(&active_events)) { - if (atomic_read(&active_events) > perf_max_events) { + if (atomic_read(&active_events) > armpmu.num_events) { atomic_dec(&active_events); return -ENOSPC; } @@ -2974,14 +2974,12 @@ init_hw_perf_events(void) armpmu = &armv6pmu; memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, sizeof(armv6_perf_cache_map)); - perf_max_events = armv6pmu.num_events; break; case 0xB020: /* ARM11mpcore */ armpmu = &armv6mpcore_pmu; memcpy(armpmu_perf_cache_map, armv6mpcore_perf_cache_map, sizeof(armv6mpcore_perf_cache_map)); - perf_max_events = armv6mpcore_pmu.num_events; break; case 0xC080: /* Cortex-A8 */ armv7pmu.id = ARM_PERF_PMU_ID_CA8; @@ -2993,7 +2991,6 @@ init_hw_perf_events(void) /* Reset PMNC and read the nb of CNTx counters supported */ armv7pmu.num_events = armv7_reset_read_pmnc(); - perf_max_events = armv7pmu.num_events; break; case 0xC090: /* Cortex-A9 */ armv7pmu.id = ARM_PERF_PMU_ID_CA9; @@ -3005,7 +3002,6 @@ init_hw_perf_events(void) /* Reset PMNC and read the nb of CNTx counters supported */ armv7pmu.num_events = armv7_reset_read_pmnc(); - perf_max_events = armv7pmu.num_events; break; } /* Intel CPUs [xscale]. */ @@ -3016,13 +3012,11 @@ init_hw_perf_events(void) armpmu = &xscale1pmu; memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, sizeof(xscale_perf_cache_map)); - perf_max_events = xscale1pmu.num_events; break; case 2: armpmu = &xscale2pmu; memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, sizeof(xscale_perf_cache_map)); - perf_max_events = xscale2pmu.num_events; break; } } @@ -3032,7 +3026,6 @@ init_hw_perf_events(void) arm_pmu_names[armpmu->id], armpmu->num_events); } else { pr_info("no hardware support available\n"); - perf_max_events = -1; } perf_pmu_register(&pmu); diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 516be2314b54..f9a706759364 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -897,7 +897,7 @@ static int sparc_check_constraints(struct perf_event **evts, if (!n_ev) return 0; - if (n_ev > perf_max_events) + if (n_ev > MAX_HWEVENTS) return -1; msk0 = perf_event_get_msk(events[0]); @@ -1014,7 +1014,7 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) perf_pmu_disable(event->pmu); n0 = cpuc->n_events; - if (n0 >= perf_max_events) + if (n0 >= MAX_HWEVENTS) goto out; cpuc->event[n0] = event; @@ -1097,7 +1097,7 @@ static int sparc_pmu_event_init(struct perf_event *event) n = 0; if (event->group_leader != event) { n = collect_events(event->group_leader, - perf_max_events - 1, + MAX_HWEVENTS - 1, evts, events, current_idx_dmy); if (n < 0) return -EINVAL; @@ -1309,9 +1309,6 @@ void __init init_hw_perf_events(void) pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); - /* All sparc64 PMUs currently have 2 events. */ - perf_max_events = 2; - perf_pmu_register(&pmu); register_die_notifier(&perf_event_nmi_notifier); } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index dd6fec710677..0fb17050360f 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1396,7 +1396,6 @@ void __init init_hw_perf_events(void) x86_pmu.num_counters = X86_PMC_MAX_GENERIC; } x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; - perf_max_events = x86_pmu.num_counters; if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 402073c61669..b22176d3ebdf 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -860,7 +860,6 @@ struct perf_cpu_context { struct perf_event_context ctx; struct perf_event_context *task_ctx; int active_oncpu; - int max_pertask; int exclusive; struct swevent_hlist *swevent_hlist; struct mutex hlist_mutex; @@ -883,11 +882,6 @@ struct perf_output_handle { #ifdef CONFIG_PERF_EVENTS -/* - * Set by architecture code: - */ -extern int perf_max_events; - extern int perf_pmu_register(struct pmu *pmu); extern void perf_pmu_unregister(struct pmu *pmu); diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 3bace4fd0355..8462e69409ae 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -39,10 +39,6 @@ */ static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); -int perf_max_events __read_mostly = 1; -static int perf_reserved_percpu __read_mostly; -static int perf_overcommit __read_mostly = 1; - static atomic_t nr_events __read_mostly; static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; @@ -66,11 +62,6 @@ int sysctl_perf_event_sample_rate __read_mostly = 100000; static atomic64_t perf_event_id; -/* - * Lock for (sysadmin-configurable) event reservations: - */ -static DEFINE_SPINLOCK(perf_resource_lock); - void __weak perf_event_print_debug(void) { } void perf_pmu_disable(struct pmu *pmu) @@ -480,16 +471,6 @@ static void __perf_event_remove_from_context(void *info) list_del_event(event, ctx); - if (!ctx->task) { - /* - * Allow more per task events with respect to the - * reservation: - */ - cpuctx->max_pertask = - min(perf_max_events - ctx->nr_events, - perf_max_events - perf_reserved_percpu); - } - raw_spin_unlock(&ctx->lock); } @@ -823,9 +804,6 @@ static void __perf_install_in_context(void *info) } } - if (!err && !ctx->task && cpuctx->max_pertask) - cpuctx->max_pertask--; - unlock: raw_spin_unlock(&ctx->lock); } @@ -5930,10 +5908,6 @@ static void __cpuinit perf_event_init_cpu(int cpu) cpuctx = &per_cpu(perf_cpu_context, cpu); - spin_lock(&perf_resource_lock); - cpuctx->max_pertask = perf_max_events - perf_reserved_percpu; - spin_unlock(&perf_resource_lock); - mutex_lock(&cpuctx->hlist_mutex); if (cpuctx->hlist_refcount > 0) { struct swevent_hlist *hlist; @@ -6008,101 +5982,3 @@ void __init perf_event_init(void) perf_tp_register(); perf_cpu_notifier(perf_cpu_notify); } - -static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, - struct sysdev_class_attribute *attr, - char *buf) -{ - return sprintf(buf, "%d\n", perf_reserved_percpu); -} - -static ssize_t -perf_set_reserve_percpu(struct sysdev_class *class, - struct sysdev_class_attribute *attr, - const char *buf, - size_t count) -{ - struct perf_cpu_context *cpuctx; - unsigned long val; - int err, cpu, mpt; - - err = strict_strtoul(buf, 10, &val); - if (err) - return err; - if (val > perf_max_events) - return -EINVAL; - - spin_lock(&perf_resource_lock); - perf_reserved_percpu = val; - for_each_online_cpu(cpu) { - cpuctx = &per_cpu(perf_cpu_context, cpu); - raw_spin_lock_irq(&cpuctx->ctx.lock); - mpt = min(perf_max_events - cpuctx->ctx.nr_events, - perf_max_events - perf_reserved_percpu); - cpuctx->max_pertask = mpt; - raw_spin_unlock_irq(&cpuctx->ctx.lock); - } - spin_unlock(&perf_resource_lock); - - return count; -} - -static ssize_t perf_show_overcommit(struct sysdev_class *class, - struct sysdev_class_attribute *attr, - char *buf) -{ - return sprintf(buf, "%d\n", perf_overcommit); -} - -static ssize_t -perf_set_overcommit(struct sysdev_class *class, - struct sysdev_class_attribute *attr, - const char *buf, size_t count) -{ - unsigned long val; - int err; - - err = strict_strtoul(buf, 10, &val); - if (err) - return err; - if (val > 1) - return -EINVAL; - - spin_lock(&perf_resource_lock); - perf_overcommit = val; - spin_unlock(&perf_resource_lock); - - return count; -} - -static SYSDEV_CLASS_ATTR( - reserve_percpu, - 0644, - perf_show_reserve_percpu, - perf_set_reserve_percpu - ); - -static SYSDEV_CLASS_ATTR( - overcommit, - 0644, - perf_show_overcommit, - perf_set_overcommit - ); - -static struct attribute *perfclass_attrs[] = { - &attr_reserve_percpu.attr, - &attr_overcommit.attr, - NULL -}; - -static struct attribute_group perfclass_attr_group = { - .attrs = perfclass_attrs, - .name = "perf_events", -}; - -static int __init perf_event_sysfs_init(void) -{ - return sysfs_create_group(&cpu_sysdev_class.kset.kobj, - &perfclass_attr_group); -} -device_initcall(perf_event_sysfs_init); -- cgit v1.2.3 From b28ab83c595e767f2028276b7398d17f2253cec0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Sep 2010 14:48:15 +0200 Subject: perf: Remove the swevent hash-table from the cpu context Separate the swevent hash-table from the cpu_context bits in preparation for per pmu cpu contexts. This keeps the swevent hash a global entity. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Lin Ming Cc: Yanmin LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 6 --- kernel/perf_event.c | 104 +++++++++++++++++++++++++-------------------- 2 files changed, 58 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b22176d3ebdf..4ab4f0ca09a1 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -861,12 +861,6 @@ struct perf_cpu_context { struct perf_event_context *task_ctx; int active_oncpu; int exclusive; - struct swevent_hlist *swevent_hlist; - struct mutex hlist_mutex; - int hlist_refcount; - - /* Recursion avoidance in each contexts */ - int recursion[PERF_NR_CONTEXTS]; }; struct perf_output_handle { diff --git a/kernel/perf_event.c b/kernel/perf_event.c index a3c86a8335c4..2c47ed6c4f26 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4154,6 +4154,17 @@ int perf_event_overflow(struct perf_event *event, int nmi, * Generic software event infrastructure */ +struct swevent_htable { + struct swevent_hlist *swevent_hlist; + struct mutex hlist_mutex; + int hlist_refcount; + + /* Recursion avoidance in each contexts */ + int recursion[PERF_NR_CONTEXTS]; +}; + +static DEFINE_PER_CPU(struct swevent_htable, swevent_htable); + /* * We directly increment event->count and keep a second value in * event->hw.period_left to count intervals. This period event @@ -4286,11 +4297,11 @@ __find_swevent_head(struct swevent_hlist *hlist, u64 type, u32 event_id) /* For the read side: events when they trigger */ static inline struct hlist_head * -find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id) +find_swevent_head_rcu(struct swevent_htable *swhash, u64 type, u32 event_id) { struct swevent_hlist *hlist; - hlist = rcu_dereference(ctx->swevent_hlist); + hlist = rcu_dereference(swhash->swevent_hlist); if (!hlist) return NULL; @@ -4299,7 +4310,7 @@ find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id) /* For the event head insertion and removal in the hlist */ static inline struct hlist_head * -find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event) +find_swevent_head(struct swevent_htable *swhash, struct perf_event *event) { struct swevent_hlist *hlist; u32 event_id = event->attr.config; @@ -4310,7 +4321,7 @@ find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event) * and release. Which makes the protected version suitable here. * The context lock guarantees that. */ - hlist = rcu_dereference_protected(ctx->swevent_hlist, + hlist = rcu_dereference_protected(swhash->swevent_hlist, lockdep_is_held(&event->ctx->lock)); if (!hlist) return NULL; @@ -4323,17 +4334,13 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, struct perf_sample_data *data, struct pt_regs *regs) { - struct perf_cpu_context *cpuctx; + struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); struct perf_event *event; struct hlist_node *node; struct hlist_head *head; - cpuctx = &__get_cpu_var(perf_cpu_context); - rcu_read_lock(); - - head = find_swevent_head_rcu(cpuctx, type, event_id); - + head = find_swevent_head_rcu(swhash, type, event_id); if (!head) goto end; @@ -4347,17 +4354,17 @@ end: int perf_swevent_get_recursion_context(void) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); - return get_recursion_context(cpuctx->recursion); + return get_recursion_context(swhash->recursion); } EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); void inline perf_swevent_put_recursion_context(int rctx) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); - put_recursion_context(cpuctx->recursion, rctx); + put_recursion_context(swhash->recursion, rctx); } void __perf_sw_event(u32 event_id, u64 nr, int nmi, @@ -4385,12 +4392,10 @@ static void perf_swevent_read(struct perf_event *event) static int perf_swevent_add(struct perf_event *event, int flags) { + struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); struct hw_perf_event *hwc = &event->hw; - struct perf_cpu_context *cpuctx; struct hlist_head *head; - cpuctx = &__get_cpu_var(perf_cpu_context); - if (hwc->sample_period) { hwc->last_period = hwc->sample_period; perf_swevent_set_period(event); @@ -4398,7 +4403,7 @@ static int perf_swevent_add(struct perf_event *event, int flags) hwc->state = !(flags & PERF_EF_START); - head = find_swevent_head(cpuctx, event); + head = find_swevent_head(swhash, event); if (WARN_ON_ONCE(!head)) return -EINVAL; @@ -4424,10 +4429,10 @@ static void perf_swevent_stop(struct perf_event *event, int flags) /* Deref the hlist from the update side */ static inline struct swevent_hlist * -swevent_hlist_deref(struct perf_cpu_context *cpuctx) +swevent_hlist_deref(struct swevent_htable *swhash) { - return rcu_dereference_protected(cpuctx->swevent_hlist, - lockdep_is_held(&cpuctx->hlist_mutex)); + return rcu_dereference_protected(swhash->swevent_hlist, + lockdep_is_held(&swhash->hlist_mutex)); } static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) @@ -4438,27 +4443,27 @@ static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) kfree(hlist); } -static void swevent_hlist_release(struct perf_cpu_context *cpuctx) +static void swevent_hlist_release(struct swevent_htable *swhash) { - struct swevent_hlist *hlist = swevent_hlist_deref(cpuctx); + struct swevent_hlist *hlist = swevent_hlist_deref(swhash); if (!hlist) return; - rcu_assign_pointer(cpuctx->swevent_hlist, NULL); + rcu_assign_pointer(swhash->swevent_hlist, NULL); call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu); } static void swevent_hlist_put_cpu(struct perf_event *event, int cpu) { - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); - mutex_lock(&cpuctx->hlist_mutex); + mutex_lock(&swhash->hlist_mutex); - if (!--cpuctx->hlist_refcount) - swevent_hlist_release(cpuctx); + if (!--swhash->hlist_refcount) + swevent_hlist_release(swhash); - mutex_unlock(&cpuctx->hlist_mutex); + mutex_unlock(&swhash->hlist_mutex); } static void swevent_hlist_put(struct perf_event *event) @@ -4476,12 +4481,12 @@ static void swevent_hlist_put(struct perf_event *event) static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) { - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); int err = 0; - mutex_lock(&cpuctx->hlist_mutex); + mutex_lock(&swhash->hlist_mutex); - if (!swevent_hlist_deref(cpuctx) && cpu_online(cpu)) { + if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) { struct swevent_hlist *hlist; hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); @@ -4489,11 +4494,11 @@ static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) err = -ENOMEM; goto exit; } - rcu_assign_pointer(cpuctx->swevent_hlist, hlist); + rcu_assign_pointer(swhash->swevent_hlist, hlist); } - cpuctx->hlist_refcount++; + swhash->hlist_refcount++; exit: - mutex_unlock(&cpuctx->hlist_mutex); + mutex_unlock(&swhash->hlist_mutex); return err; } @@ -5889,12 +5894,15 @@ int perf_event_init_task(struct task_struct *child) static void __init perf_event_init_all_cpus(void) { - int cpu; struct perf_cpu_context *cpuctx; + struct swevent_htable *swhash; + int cpu; for_each_possible_cpu(cpu) { + swhash = &per_cpu(swevent_htable, cpu); + mutex_init(&swhash->hlist_mutex); + cpuctx = &per_cpu(perf_cpu_context, cpu); - mutex_init(&cpuctx->hlist_mutex); __perf_event_init_context(&cpuctx->ctx, NULL); } } @@ -5902,18 +5910,21 @@ static void __init perf_event_init_all_cpus(void) static void __cpuinit perf_event_init_cpu(int cpu) { struct perf_cpu_context *cpuctx; + struct swevent_htable *swhash; cpuctx = &per_cpu(perf_cpu_context, cpu); - mutex_lock(&cpuctx->hlist_mutex); - if (cpuctx->hlist_refcount > 0) { + swhash = &per_cpu(swevent_htable, cpu); + + mutex_lock(&swhash->hlist_mutex); + if (swhash->hlist_refcount > 0) { struct swevent_hlist *hlist; - hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); - WARN_ON_ONCE(!hlist); - rcu_assign_pointer(cpuctx->swevent_hlist, hlist); + hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu)); + WARN_ON(!hlist); + rcu_assign_pointer(swhash->swevent_hlist, hlist); } - mutex_unlock(&cpuctx->hlist_mutex); + mutex_unlock(&swhash->hlist_mutex); } #ifdef CONFIG_HOTPLUG_CPU @@ -5931,11 +5942,12 @@ static void __perf_event_exit_cpu(void *info) static void perf_event_exit_cpu(int cpu) { struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); struct perf_event_context *ctx = &cpuctx->ctx; - mutex_lock(&cpuctx->hlist_mutex); - swevent_hlist_release(cpuctx); - mutex_unlock(&cpuctx->hlist_mutex); + mutex_lock(&swhash->hlist_mutex); + swevent_hlist_release(swhash); + mutex_unlock(&swhash->hlist_mutex); mutex_lock(&ctx->mutex); smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1); -- cgit v1.2.3 From b5ab4cd563e7ab49b27957704112a8ecade54e1f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Sep 2010 16:32:21 +0200 Subject: perf: Per cpu-context rotation timer Give each cpu-context its own timer so that it is a self contained entity, this eases the way for per-pmu-per-cpu contexts as well as provides the basic infrastructure to allow different rotation times per pmu. Things to look at: - folding the tick and these TICK_NSEC timers - separate task context rotation Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Lin Ming Cc: Yanmin LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 5 ++- kernel/perf_event.c | 80 ++++++++++++++++++++++++++++++++++++---------- kernel/sched.c | 2 -- 3 files changed, 65 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4ab4f0ca09a1..fa04537df55b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -861,6 +861,8 @@ struct perf_cpu_context { struct perf_event_context *task_ctx; int active_oncpu; int exclusive; + u64 timer_interval; + struct hrtimer timer; }; struct perf_output_handle { @@ -881,7 +883,6 @@ extern void perf_pmu_unregister(struct pmu *pmu); extern void perf_event_task_sched_in(struct task_struct *task); extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); -extern void perf_event_task_tick(struct task_struct *task); extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); @@ -1067,8 +1068,6 @@ perf_event_task_sched_in(struct task_struct *task) { } static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) { } -static inline void -perf_event_task_tick(struct task_struct *task) { } static inline int perf_event_init_task(struct task_struct *child) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 2c47ed6c4f26..d75e4c8727f9 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -78,6 +78,25 @@ void perf_pmu_enable(struct pmu *pmu) pmu->pmu_enable(pmu); } +static void perf_pmu_rotate_start(void) +{ + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + + if (hrtimer_active(&cpuctx->timer)) + return; + + __hrtimer_start_range_ns(&cpuctx->timer, + ns_to_ktime(cpuctx->timer_interval), 0, + HRTIMER_MODE_REL_PINNED, 0); +} + +static void perf_pmu_rotate_stop(void) +{ + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + + hrtimer_cancel(&cpuctx->timer); +} + static void get_ctx(struct perf_event_context *ctx) { WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); @@ -281,6 +300,8 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) } list_add_rcu(&event->event_entry, &ctx->event_list); + if (!ctx->nr_events) + perf_pmu_rotate_start(); ctx->nr_events++; if (event->attr.inherit_stat) ctx->nr_stat++; @@ -1383,6 +1404,12 @@ void perf_event_task_sched_in(struct task_struct *task) ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); cpuctx->task_ctx = ctx; + + /* + * Since these rotations are per-cpu, we need to ensure the + * cpu-context we got scheduled on is actually rotating. + */ + perf_pmu_rotate_start(); } #define MAX_INTERRUPTS (~0ULL) @@ -1487,7 +1514,7 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) } } -static void perf_ctx_adjust_freq(struct perf_event_context *ctx) +static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) { struct perf_event *event; struct hw_perf_event *hwc; @@ -1524,7 +1551,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) hwc->freq_count_stamp = now; if (delta > 0) - perf_adjust_period(event, TICK_NSEC, delta); + perf_adjust_period(event, period, delta); } raw_spin_unlock(&ctx->lock); } @@ -1542,30 +1569,39 @@ static void rotate_ctx(struct perf_event_context *ctx) raw_spin_unlock(&ctx->lock); } -void perf_event_task_tick(struct task_struct *curr) +/* + * Cannot race with ->pmu_rotate_start() because this is ran from hardirq + * context, and ->pmu_rotate_start() is called with irqs disabled (both are + * cpu affine, so there are no SMP races). + */ +static enum hrtimer_restart perf_event_context_tick(struct hrtimer *timer) { + enum hrtimer_restart restart = HRTIMER_NORESTART; struct perf_cpu_context *cpuctx; struct perf_event_context *ctx; int rotate = 0; - if (!atomic_read(&nr_events)) - return; + cpuctx = container_of(timer, struct perf_cpu_context, timer); - cpuctx = &__get_cpu_var(perf_cpu_context); - if (cpuctx->ctx.nr_events && - cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) - rotate = 1; + if (cpuctx->ctx.nr_events) { + restart = HRTIMER_RESTART; + if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) + rotate = 1; + } - ctx = curr->perf_event_ctxp; - if (ctx && ctx->nr_events && ctx->nr_events != ctx->nr_active) - rotate = 1; + ctx = current->perf_event_ctxp; + if (ctx && ctx->nr_events) { + restart = HRTIMER_RESTART; + if (ctx->nr_events != ctx->nr_active) + rotate = 1; + } - perf_ctx_adjust_freq(&cpuctx->ctx); + perf_ctx_adjust_freq(&cpuctx->ctx, cpuctx->timer_interval); if (ctx) - perf_ctx_adjust_freq(ctx); + perf_ctx_adjust_freq(ctx, cpuctx->timer_interval); if (!rotate) - return; + goto done; cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); if (ctx) @@ -1577,7 +1613,12 @@ void perf_event_task_tick(struct task_struct *curr) cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); if (ctx) - task_ctx_sched_in(curr, EVENT_FLEXIBLE); + task_ctx_sched_in(current, EVENT_FLEXIBLE); + +done: + hrtimer_forward_now(timer, ns_to_ktime(cpuctx->timer_interval)); + + return restart; } static int event_enable_on_exec(struct perf_event *event, @@ -4786,7 +4827,7 @@ static void perf_swevent_start_hrtimer(struct perf_event *event) } __hrtimer_start_range_ns(&hwc->hrtimer, ns_to_ktime(period), 0, - HRTIMER_MODE_REL, 0); + HRTIMER_MODE_REL_PINNED, 0); } } @@ -5904,6 +5945,9 @@ static void __init perf_event_init_all_cpus(void) cpuctx = &per_cpu(perf_cpu_context, cpu); __perf_event_init_context(&cpuctx->ctx, NULL); + cpuctx->timer_interval = TICK_NSEC; + hrtimer_init(&cpuctx->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + cpuctx->timer.function = perf_event_context_tick; } } @@ -5934,6 +5978,8 @@ static void __perf_event_exit_cpu(void *info) struct perf_event_context *ctx = &cpuctx->ctx; struct perf_event *event, *tmp; + perf_pmu_rotate_stop(); + list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) __perf_event_remove_from_context(event); list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) diff --git a/kernel/sched.c b/kernel/sched.c index 09b574e7f4df..66a02ba83c01 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3578,8 +3578,6 @@ void scheduler_tick(void) curr->sched_class->task_tick(rq, curr, 0); raw_spin_unlock(&rq->lock); - perf_event_task_tick(curr); - #ifdef CONFIG_SMP rq->idle_at_tick = idle_cpu(cpu); trigger_load_balance(rq, cpu); -- cgit v1.2.3 From 108b02cfce04ee90b0a07ee0b104baffd39f5934 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Sep 2010 14:32:03 +0200 Subject: perf: Per-pmu-per-cpu contexts Allocate per-cpu contexts per pmu. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Lin Ming Cc: Yanmin LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 4 +- kernel/perf_event.c | 178 ++++++++++++++++++++++++++++----------------- 2 files changed, 113 insertions(+), 69 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fa04537df55b..22155ef3b362 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -570,7 +570,8 @@ struct perf_event; struct pmu { struct list_head entry; - int *pmu_disable_count; + int * __percpu pmu_disable_count; + struct perf_cpu_context * __percpu pmu_cpu_context; /* * Fully disable/enable this PMU, can be used to protect from the PMI @@ -808,6 +809,7 @@ struct perf_event { * Used as a container for task events and CPU events as well: */ struct perf_event_context { + struct pmu *pmu; /* * Protect the states of the events in the list, * nr_active, and the list: diff --git a/kernel/perf_event.c b/kernel/perf_event.c index d75e4c8727f9..8ca6e690ffe3 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -34,16 +34,15 @@ #include -/* - * Each CPU has a list of per CPU events: - */ -static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); - static atomic_t nr_events __read_mostly; static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; static atomic_t nr_task_events __read_mostly; +static LIST_HEAD(pmus); +static DEFINE_MUTEX(pmus_lock); +static struct srcu_struct pmus_srcu; + /* * perf event paranoia level: * -1 - not paranoid at all @@ -78,9 +77,9 @@ void perf_pmu_enable(struct pmu *pmu) pmu->pmu_enable(pmu); } -static void perf_pmu_rotate_start(void) +static void perf_pmu_rotate_start(struct pmu *pmu) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); if (hrtimer_active(&cpuctx->timer)) return; @@ -90,9 +89,9 @@ static void perf_pmu_rotate_start(void) HRTIMER_MODE_REL_PINNED, 0); } -static void perf_pmu_rotate_stop(void) +static void perf_pmu_rotate_stop(struct pmu *pmu) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); hrtimer_cancel(&cpuctx->timer); } @@ -301,7 +300,7 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) list_add_rcu(&event->event_entry, &ctx->event_list); if (!ctx->nr_events) - perf_pmu_rotate_start(); + perf_pmu_rotate_start(ctx->pmu); ctx->nr_events++; if (event->attr.inherit_stat) ctx->nr_stat++; @@ -466,6 +465,12 @@ group_sched_out(struct perf_event *group_event, cpuctx->exclusive = 0; } +static inline struct perf_cpu_context * +__get_cpu_context(struct perf_event_context *ctx) +{ + return this_cpu_ptr(ctx->pmu->pmu_cpu_context); +} + /* * Cross CPU call to remove a performance event * @@ -474,9 +479,9 @@ group_sched_out(struct perf_event *group_event, */ static void __perf_event_remove_from_context(void *info) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); /* * If this is a task context, we need to check whether it is @@ -556,8 +561,8 @@ retry: static void __perf_event_disable(void *info) { struct perf_event *event = info; - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event_context *ctx = event->ctx; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); /* * If this is a per-task event, need to check whether this @@ -765,10 +770,10 @@ static void add_event_to_ctx(struct perf_event *event, */ static void __perf_install_in_context(void *info) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; struct perf_event *leader = event->group_leader; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); int err; /* @@ -912,9 +917,9 @@ static void __perf_event_mark_enabled(struct perf_event *event, static void __perf_event_enable(void *info) { struct perf_event *event = info; - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event_context *ctx = event->ctx; struct perf_event *leader = event->group_leader; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); int err; /* @@ -1188,15 +1193,19 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event_context *ctx = task->perf_event_ctxp; struct perf_event_context *next_ctx; struct perf_event_context *parent; + struct perf_cpu_context *cpuctx; int do_switch = 1; perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); - if (likely(!ctx || !cpuctx->task_ctx)) + if (likely(!ctx)) + return; + + cpuctx = __get_cpu_context(ctx); + if (!cpuctx->task_ctx) return; rcu_read_lock(); @@ -1242,7 +1251,7 @@ void perf_event_task_sched_out(struct task_struct *task, static void task_ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); if (!cpuctx->task_ctx) return; @@ -1360,8 +1369,8 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, static void task_ctx_sched_in(struct task_struct *task, enum event_type_t event_type) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event_context *ctx = task->perf_event_ctxp; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); if (likely(!ctx)) return; @@ -1383,12 +1392,13 @@ static void task_ctx_sched_in(struct task_struct *task, */ void perf_event_task_sched_in(struct task_struct *task) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event_context *ctx = task->perf_event_ctxp; + struct perf_cpu_context *cpuctx; if (likely(!ctx)) return; + cpuctx = __get_cpu_context(ctx); if (cpuctx->task_ctx == ctx) return; @@ -1409,7 +1419,7 @@ void perf_event_task_sched_in(struct task_struct *task) * Since these rotations are per-cpu, we need to ensure the * cpu-context we got scheduled on is actually rotating. */ - perf_pmu_rotate_start(); + perf_pmu_rotate_start(ctx->pmu); } #define MAX_INTERRUPTS (~0ULL) @@ -1687,9 +1697,9 @@ out: */ static void __perf_event_read(void *info) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); /* * If this is a task context, we need to check whether it is @@ -1962,7 +1972,8 @@ __perf_event_init_context(struct perf_event_context *ctx, ctx->task = task; } -static struct perf_event_context *find_get_context(pid_t pid, int cpu) +static struct perf_event_context * +find_get_context(struct pmu *pmu, pid_t pid, int cpu) { struct perf_event_context *ctx; struct perf_cpu_context *cpuctx; @@ -1986,7 +1997,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) if (!cpu_online(cpu)) return ERR_PTR(-ENODEV); - cpuctx = &per_cpu(perf_cpu_context, cpu); + cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); ctx = &cpuctx->ctx; get_ctx(ctx); @@ -2030,6 +2041,7 @@ retry: if (!ctx) goto errout; __perf_event_init_context(ctx, task); + ctx->pmu = pmu; get_ctx(ctx); if (cmpxchg(&task->perf_event_ctxp, NULL, ctx)) { /* @@ -3745,18 +3757,20 @@ static void perf_event_task_ctx(struct perf_event_context *ctx, static void perf_event_task_event(struct perf_task_event *task_event) { - struct perf_cpu_context *cpuctx; struct perf_event_context *ctx = task_event->task_ctx; + struct perf_cpu_context *cpuctx; + struct pmu *pmu; - rcu_read_lock(); - cpuctx = &get_cpu_var(perf_cpu_context); - perf_event_task_ctx(&cpuctx->ctx, task_event); + rcu_read_lock_sched(); + list_for_each_entry_rcu(pmu, &pmus, entry) { + cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); + perf_event_task_ctx(&cpuctx->ctx, task_event); + } if (!ctx) ctx = rcu_dereference(current->perf_event_ctxp); if (ctx) perf_event_task_ctx(ctx, task_event); - put_cpu_var(perf_cpu_context); - rcu_read_unlock(); + rcu_read_unlock_sched(); } static void perf_event_task(struct task_struct *task, @@ -3861,6 +3875,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) struct perf_cpu_context *cpuctx; struct perf_event_context *ctx; unsigned int size; + struct pmu *pmu; char comm[TASK_COMM_LEN]; memset(comm, 0, sizeof(comm)); @@ -3872,14 +3887,15 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; - rcu_read_lock(); - cpuctx = &get_cpu_var(perf_cpu_context); - perf_event_comm_ctx(&cpuctx->ctx, comm_event); + rcu_read_lock_sched(); + list_for_each_entry_rcu(pmu, &pmus, entry) { + cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); + perf_event_comm_ctx(&cpuctx->ctx, comm_event); + } ctx = rcu_dereference(current->perf_event_ctxp); if (ctx) perf_event_comm_ctx(ctx, comm_event); - put_cpu_var(perf_cpu_context); - rcu_read_unlock(); + rcu_read_unlock_sched(); } void perf_event_comm(struct task_struct *task) @@ -3989,6 +4005,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) char tmp[16]; char *buf = NULL; const char *name; + struct pmu *pmu; memset(tmp, 0, sizeof(tmp)); @@ -4040,14 +4057,16 @@ got_name: mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; - rcu_read_lock(); - cpuctx = &get_cpu_var(perf_cpu_context); - perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC); + rcu_read_lock_sched(); + list_for_each_entry_rcu(pmu, &pmus, entry) { + cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); + perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, + vma->vm_flags & VM_EXEC); + } ctx = rcu_dereference(current->perf_event_ctxp); if (ctx) perf_event_mmap_ctx(ctx, mmap_event, vma->vm_flags & VM_EXEC); - put_cpu_var(perf_cpu_context); - rcu_read_unlock(); + rcu_read_unlock_sched(); kfree(buf); } @@ -4982,10 +5001,6 @@ static struct pmu perf_task_clock = { .read = task_clock_event_read, }; -static LIST_HEAD(pmus); -static DEFINE_MUTEX(pmus_lock); -static struct srcu_struct pmus_srcu; - static void perf_pmu_nop_void(struct pmu *pmu) { } @@ -5013,7 +5028,7 @@ static void perf_pmu_cancel_txn(struct pmu *pmu) int perf_pmu_register(struct pmu *pmu) { - int ret; + int cpu, ret; mutex_lock(&pmus_lock); ret = -ENOMEM; @@ -5021,6 +5036,21 @@ int perf_pmu_register(struct pmu *pmu) if (!pmu->pmu_disable_count) goto unlock; + pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); + if (!pmu->pmu_cpu_context) + goto free_pdc; + + for_each_possible_cpu(cpu) { + struct perf_cpu_context *cpuctx; + + cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); + __perf_event_init_context(&cpuctx->ctx, NULL); + cpuctx->ctx.pmu = pmu; + cpuctx->timer_interval = TICK_NSEC; + hrtimer_init(&cpuctx->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + cpuctx->timer.function = perf_event_context_tick; + } + if (!pmu->start_txn) { if (pmu->pmu_enable) { /* @@ -5049,6 +5079,10 @@ unlock: mutex_unlock(&pmus_lock); return ret; + +free_pdc: + free_percpu(pmu->pmu_disable_count); + goto unlock; } void perf_pmu_unregister(struct pmu *pmu) @@ -5057,9 +5091,14 @@ void perf_pmu_unregister(struct pmu *pmu) list_del_rcu(&pmu->entry); mutex_unlock(&pmus_lock); + /* + * We use the pmu list either under SRCU or preempt_disable, + * synchronize_srcu() implies synchronize_sched() so we're good. + */ synchronize_srcu(&pmus_srcu); free_percpu(pmu->pmu_disable_count); + free_percpu(pmu->pmu_cpu_context); } struct pmu *perf_init_event(struct perf_event *event) @@ -5374,7 +5413,7 @@ SYSCALL_DEFINE5(perf_event_open, /* * Get the target context (task or percpu): */ - ctx = find_get_context(pid, cpu); + ctx = find_get_context(event->pmu, pid, cpu); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto err_alloc; @@ -5489,7 +5528,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, goto err; } - ctx = find_get_context(pid, cpu); + ctx = find_get_context(event->pmu, pid, cpu); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto err_free; @@ -5833,6 +5872,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, return -ENOMEM; __perf_event_init_context(child_ctx, child); + child_ctx->pmu = event->pmu; child->perf_event_ctxp = child_ctx; get_task_struct(child); } @@ -5935,30 +5975,18 @@ int perf_event_init_task(struct task_struct *child) static void __init perf_event_init_all_cpus(void) { - struct perf_cpu_context *cpuctx; struct swevent_htable *swhash; int cpu; for_each_possible_cpu(cpu) { swhash = &per_cpu(swevent_htable, cpu); mutex_init(&swhash->hlist_mutex); - - cpuctx = &per_cpu(perf_cpu_context, cpu); - __perf_event_init_context(&cpuctx->ctx, NULL); - cpuctx->timer_interval = TICK_NSEC; - hrtimer_init(&cpuctx->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - cpuctx->timer.function = perf_event_context_tick; } } static void __cpuinit perf_event_init_cpu(int cpu) { - struct perf_cpu_context *cpuctx; - struct swevent_htable *swhash; - - cpuctx = &per_cpu(perf_cpu_context, cpu); - - swhash = &per_cpu(swevent_htable, cpu); + struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); mutex_lock(&swhash->hlist_mutex); if (swhash->hlist_refcount > 0) { @@ -5972,32 +6000,46 @@ static void __cpuinit perf_event_init_cpu(int cpu) } #ifdef CONFIG_HOTPLUG_CPU -static void __perf_event_exit_cpu(void *info) +static void __perf_event_exit_context(void *__info) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_event_context *ctx = &cpuctx->ctx; + struct perf_event_context *ctx = __info; struct perf_event *event, *tmp; - perf_pmu_rotate_stop(); + perf_pmu_rotate_stop(ctx->pmu); list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) __perf_event_remove_from_context(event); list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) __perf_event_remove_from_context(event); } + +static void perf_event_exit_cpu_context(int cpu) +{ + struct perf_event_context *ctx; + struct pmu *pmu; + int idx; + + idx = srcu_read_lock(&pmus_srcu); + list_for_each_entry_rcu(pmu, &pmus, entry) { + ctx = &this_cpu_ptr(pmu->pmu_cpu_context)->ctx; + + mutex_lock(&ctx->mutex); + smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1); + mutex_unlock(&ctx->mutex); + } + srcu_read_unlock(&pmus_srcu, idx); + +} + static void perf_event_exit_cpu(int cpu) { - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); - struct perf_event_context *ctx = &cpuctx->ctx; mutex_lock(&swhash->hlist_mutex); swevent_hlist_release(swhash); mutex_unlock(&swhash->hlist_mutex); - mutex_lock(&ctx->mutex); - smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1); - mutex_unlock(&ctx->mutex); + perf_event_exit_cpu_context(cpu); } #else static inline void perf_event_exit_cpu(int cpu) { } -- cgit v1.2.3 From 8dc85d547285668e509f86c177bcd4ea055bcaaf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 2 Sep 2010 16:50:03 +0200 Subject: perf: Multiple task contexts Provide the infrastructure for multiple task contexts. A more flexible approach would have resulted in more pointer chases in the scheduling hot-paths. This approach has the limitation of a static number of task contexts. Since I expect most external PMUs to be system wide, or at least node wide (as per the intel uncore unit) they won't actually need a task context. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Lin Ming Cc: Yanmin LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + include/linux/sched.h | 8 +- kernel/perf_event.c | 336 +++++++++++++++++++++++++++++++-------------- 3 files changed, 239 insertions(+), 106 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 22155ef3b362..9ecfd856ce6e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -572,6 +572,7 @@ struct pmu { int * __percpu pmu_disable_count; struct perf_cpu_context * __percpu pmu_cpu_context; + int task_ctx_nr; /* * Fully disable/enable this PMU, can be used to protect from the PMI diff --git a/include/linux/sched.h b/include/linux/sched.h index 1e2a6db2d7dd..89d6023c6f82 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1160,6 +1160,12 @@ struct sched_rt_entity { struct rcu_node; +enum perf_event_task_context { + perf_invalid_context = -1, + perf_hw_context = 0, + perf_nr_task_contexts, +}; + struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ void *stack; @@ -1431,7 +1437,7 @@ struct task_struct { struct futex_pi_state *pi_state_cache; #endif #ifdef CONFIG_PERF_EVENTS - struct perf_event_context *perf_event_ctxp; + struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; struct mutex perf_event_mutex; struct list_head perf_event_list; #endif diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 13d98d756347..7223ea875861 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -148,13 +148,13 @@ static u64 primary_event_id(struct perf_event *event) * the context could get moved to another task. */ static struct perf_event_context * -perf_lock_task_context(struct task_struct *task, unsigned long *flags) +perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags) { struct perf_event_context *ctx; rcu_read_lock(); retry: - ctx = rcu_dereference(task->perf_event_ctxp); + ctx = rcu_dereference(task->perf_event_ctxp[ctxn]); if (ctx) { /* * If this context is a clone of another, it might @@ -167,7 +167,7 @@ retry: * can't get swapped on us any more. */ raw_spin_lock_irqsave(&ctx->lock, *flags); - if (ctx != rcu_dereference(task->perf_event_ctxp)) { + if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) { raw_spin_unlock_irqrestore(&ctx->lock, *flags); goto retry; } @@ -186,12 +186,13 @@ retry: * can't get swapped to another task. This also increments its * reference count so that the context can't get freed. */ -static struct perf_event_context *perf_pin_task_context(struct task_struct *task) +static struct perf_event_context * +perf_pin_task_context(struct task_struct *task, int ctxn) { struct perf_event_context *ctx; unsigned long flags; - ctx = perf_lock_task_context(task, &flags); + ctx = perf_lock_task_context(task, ctxn, &flags); if (ctx) { ++ctx->pin_count; raw_spin_unlock_irqrestore(&ctx->lock, flags); @@ -1179,28 +1180,15 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, } } -/* - * Called from scheduler to remove the events of the current task, - * with interrupts disabled. - * - * We stop each event and update the event value in event->count. - * - * This does not protect us against NMI, but disable() - * sets the disabled bit in the control field of event _before_ - * accessing the event control register. If a NMI hits, then it will - * not restart the event. - */ -void perf_event_task_sched_out(struct task_struct *task, - struct task_struct *next) +void perf_event_context_sched_out(struct task_struct *task, int ctxn, + struct task_struct *next) { - struct perf_event_context *ctx = task->perf_event_ctxp; + struct perf_event_context *ctx = task->perf_event_ctxp[ctxn]; struct perf_event_context *next_ctx; struct perf_event_context *parent; struct perf_cpu_context *cpuctx; int do_switch = 1; - perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); - if (likely(!ctx)) return; @@ -1210,7 +1198,7 @@ void perf_event_task_sched_out(struct task_struct *task, rcu_read_lock(); parent = rcu_dereference(ctx->parent_ctx); - next_ctx = next->perf_event_ctxp; + next_ctx = next->perf_event_ctxp[ctxn]; if (parent && next_ctx && rcu_dereference(next_ctx->parent_ctx) == parent) { /* @@ -1229,8 +1217,8 @@ void perf_event_task_sched_out(struct task_struct *task, * XXX do we need a memory barrier of sorts * wrt to rcu_dereference() of perf_event_ctxp */ - task->perf_event_ctxp = next_ctx; - next->perf_event_ctxp = ctx; + task->perf_event_ctxp[ctxn] = next_ctx; + next->perf_event_ctxp[ctxn] = ctx; ctx->task = next; next_ctx->task = task; do_switch = 0; @@ -1248,6 +1236,31 @@ void perf_event_task_sched_out(struct task_struct *task, } } +#define for_each_task_context_nr(ctxn) \ + for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++) + +/* + * Called from scheduler to remove the events of the current task, + * with interrupts disabled. + * + * We stop each event and update the event value in event->count. + * + * This does not protect us against NMI, but disable() + * sets the disabled bit in the control field of event _before_ + * accessing the event control register. If a NMI hits, then it will + * not restart the event. + */ +void perf_event_task_sched_out(struct task_struct *task, + struct task_struct *next) +{ + int ctxn; + + perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); + + for_each_task_context_nr(ctxn) + perf_event_context_sched_out(task, ctxn, next); +} + static void task_ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type) { @@ -1366,38 +1379,23 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, ctx_sched_in(ctx, cpuctx, event_type); } -static void task_ctx_sched_in(struct task_struct *task, +static void task_ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type) { - struct perf_event_context *ctx = task->perf_event_ctxp; - struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); + struct perf_cpu_context *cpuctx; - if (likely(!ctx)) - return; + cpuctx = __get_cpu_context(ctx); if (cpuctx->task_ctx == ctx) return; + ctx_sched_in(ctx, cpuctx, event_type); cpuctx->task_ctx = ctx; } -/* - * Called from scheduler to add the events of the current task - * with interrupts disabled. - * - * We restore the event value and then enable it. - * - * This does not protect us against NMI, but enable() - * sets the enabled bit in the control field of event _before_ - * accessing the event control register. If a NMI hits, then it will - * keep the event running. - */ -void perf_event_task_sched_in(struct task_struct *task) + +void perf_event_context_sched_in(struct perf_event_context *ctx) { - struct perf_event_context *ctx = task->perf_event_ctxp; struct perf_cpu_context *cpuctx; - if (likely(!ctx)) - return; - cpuctx = __get_cpu_context(ctx); if (cpuctx->task_ctx == ctx) return; @@ -1422,6 +1420,31 @@ void perf_event_task_sched_in(struct task_struct *task) perf_pmu_rotate_start(ctx->pmu); } +/* + * Called from scheduler to add the events of the current task + * with interrupts disabled. + * + * We restore the event value and then enable it. + * + * This does not protect us against NMI, but enable() + * sets the enabled bit in the control field of event _before_ + * accessing the event control register. If a NMI hits, then it will + * keep the event running. + */ +void perf_event_task_sched_in(struct task_struct *task) +{ + struct perf_event_context *ctx; + int ctxn; + + for_each_task_context_nr(ctxn) { + ctx = task->perf_event_ctxp[ctxn]; + if (likely(!ctx)) + continue; + + perf_event_context_sched_in(ctx); + } +} + #define MAX_INTERRUPTS (~0ULL) static void perf_log_throttle(struct perf_event *event, int enable); @@ -1588,7 +1611,7 @@ static enum hrtimer_restart perf_event_context_tick(struct hrtimer *timer) { enum hrtimer_restart restart = HRTIMER_NORESTART; struct perf_cpu_context *cpuctx; - struct perf_event_context *ctx; + struct perf_event_context *ctx = NULL; int rotate = 0; cpuctx = container_of(timer, struct perf_cpu_context, timer); @@ -1599,7 +1622,7 @@ static enum hrtimer_restart perf_event_context_tick(struct hrtimer *timer) rotate = 1; } - ctx = current->perf_event_ctxp; + ctx = cpuctx->task_ctx; if (ctx && ctx->nr_events) { restart = HRTIMER_RESTART; if (ctx->nr_events != ctx->nr_active) @@ -1623,7 +1646,7 @@ static enum hrtimer_restart perf_event_context_tick(struct hrtimer *timer) cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); if (ctx) - task_ctx_sched_in(current, EVENT_FLEXIBLE); + task_ctx_sched_in(ctx, EVENT_FLEXIBLE); done: hrtimer_forward_now(timer, ns_to_ktime(cpuctx->timer_interval)); @@ -1650,20 +1673,18 @@ static int event_enable_on_exec(struct perf_event *event, * Enable all of a task's events that have been marked enable-on-exec. * This expects task == current. */ -static void perf_event_enable_on_exec(struct task_struct *task) +static void perf_event_enable_on_exec(struct perf_event_context *ctx) { - struct perf_event_context *ctx; struct perf_event *event; unsigned long flags; int enabled = 0; int ret; local_irq_save(flags); - ctx = task->perf_event_ctxp; if (!ctx || !ctx->nr_events) goto out; - __perf_event_task_sched_out(ctx); + task_ctx_sched_out(ctx, EVENT_ALL); raw_spin_lock(&ctx->lock); @@ -1687,7 +1708,7 @@ static void perf_event_enable_on_exec(struct task_struct *task) raw_spin_unlock(&ctx->lock); - perf_event_task_sched_in(task); + perf_event_context_sched_in(ctx); out: local_irq_restore(flags); } @@ -1995,7 +2016,7 @@ find_get_context(struct pmu *pmu, pid_t pid, int cpu) struct perf_cpu_context *cpuctx; struct task_struct *task; unsigned long flags; - int err; + int ctxn, err; if (pid == -1 && cpu != -1) { /* Must be root to operate on a CPU event: */ @@ -2044,8 +2065,13 @@ find_get_context(struct pmu *pmu, pid_t pid, int cpu) if (!ptrace_may_access(task, PTRACE_MODE_READ)) goto errout; + err = -EINVAL; + ctxn = pmu->task_ctx_nr; + if (ctxn < 0) + goto errout; + retry: - ctx = perf_lock_task_context(task, &flags); + ctx = perf_lock_task_context(task, ctxn, &flags); if (ctx) { unclone_ctx(ctx); raw_spin_unlock_irqrestore(&ctx->lock, flags); @@ -2059,7 +2085,7 @@ retry: get_ctx(ctx); - if (cmpxchg(&task->perf_event_ctxp, NULL, ctx)) { + if (cmpxchg(&task->perf_event_ctxp[ctxn], NULL, ctx)) { /* * We raced with some other task; use * the context they set. @@ -3773,19 +3799,26 @@ static void perf_event_task_ctx(struct perf_event_context *ctx, static void perf_event_task_event(struct perf_task_event *task_event) { - struct perf_event_context *ctx = task_event->task_ctx; struct perf_cpu_context *cpuctx; + struct perf_event_context *ctx; struct pmu *pmu; + int ctxn; rcu_read_lock_sched(); list_for_each_entry_rcu(pmu, &pmus, entry) { cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); perf_event_task_ctx(&cpuctx->ctx, task_event); + + ctx = task_event->task_ctx; + if (!ctx) { + ctxn = pmu->task_ctx_nr; + if (ctxn < 0) + continue; + ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); + } + if (ctx) + perf_event_task_ctx(ctx, task_event); } - if (!ctx) - ctx = rcu_dereference(current->perf_event_ctxp); - if (ctx) - perf_event_task_ctx(ctx, task_event); rcu_read_unlock_sched(); } @@ -3890,9 +3923,10 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) { struct perf_cpu_context *cpuctx; struct perf_event_context *ctx; + char comm[TASK_COMM_LEN]; unsigned int size; struct pmu *pmu; - char comm[TASK_COMM_LEN]; + int ctxn; memset(comm, 0, sizeof(comm)); strlcpy(comm, comm_event->task->comm, sizeof(comm)); @@ -3907,19 +3941,31 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) list_for_each_entry_rcu(pmu, &pmus, entry) { cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); perf_event_comm_ctx(&cpuctx->ctx, comm_event); + + ctxn = pmu->task_ctx_nr; + if (ctxn < 0) + continue; + + ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); + if (ctx) + perf_event_comm_ctx(ctx, comm_event); } - ctx = rcu_dereference(current->perf_event_ctxp); - if (ctx) - perf_event_comm_ctx(ctx, comm_event); rcu_read_unlock_sched(); } void perf_event_comm(struct task_struct *task) { struct perf_comm_event comm_event; + struct perf_event_context *ctx; + int ctxn; - if (task->perf_event_ctxp) - perf_event_enable_on_exec(task); + for_each_task_context_nr(ctxn) { + ctx = task->perf_event_ctxp[ctxn]; + if (!ctx) + continue; + + perf_event_enable_on_exec(ctx); + } if (!atomic_read(&nr_comm_events)) return; @@ -4022,6 +4068,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) char *buf = NULL; const char *name; struct pmu *pmu; + int ctxn; memset(tmp, 0, sizeof(tmp)); @@ -4078,10 +4125,17 @@ got_name: cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC); + + ctxn = pmu->task_ctx_nr; + if (ctxn < 0) + continue; + + ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); + if (ctx) { + perf_event_mmap_ctx(ctx, mmap_event, + vma->vm_flags & VM_EXEC); + } } - ctx = rcu_dereference(current->perf_event_ctxp); - if (ctx) - perf_event_mmap_ctx(ctx, mmap_event, vma->vm_flags & VM_EXEC); rcu_read_unlock_sched(); kfree(buf); @@ -5042,6 +5096,43 @@ static void perf_pmu_cancel_txn(struct pmu *pmu) perf_pmu_enable(pmu); } +/* + * Ensures all contexts with the same task_ctx_nr have the same + * pmu_cpu_context too. + */ +static void *find_pmu_context(int ctxn) +{ + struct pmu *pmu; + + if (ctxn < 0) + return NULL; + + list_for_each_entry(pmu, &pmus, entry) { + if (pmu->task_ctx_nr == ctxn) + return pmu->pmu_cpu_context; + } + + return NULL; +} + +static void free_pmu_context(void * __percpu cpu_context) +{ + struct pmu *pmu; + + mutex_lock(&pmus_lock); + /* + * Like a real lame refcount. + */ + list_for_each_entry(pmu, &pmus, entry) { + if (pmu->pmu_cpu_context == cpu_context) + goto out; + } + + free_percpu(cpu_context); +out: + mutex_unlock(&pmus_lock); +} + int perf_pmu_register(struct pmu *pmu) { int cpu, ret; @@ -5052,6 +5143,10 @@ int perf_pmu_register(struct pmu *pmu) if (!pmu->pmu_disable_count) goto unlock; + pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr); + if (pmu->pmu_cpu_context) + goto got_cpu_context; + pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); if (!pmu->pmu_cpu_context) goto free_pdc; @@ -5067,6 +5162,7 @@ int perf_pmu_register(struct pmu *pmu) cpuctx->timer.function = perf_event_context_tick; } +got_cpu_context: if (!pmu->start_txn) { if (pmu->pmu_enable) { /* @@ -5114,7 +5210,7 @@ void perf_pmu_unregister(struct pmu *pmu) synchronize_srcu(&pmus_srcu); free_percpu(pmu->pmu_disable_count); - free_percpu(pmu->pmu_cpu_context); + free_pmu_context(pmu->pmu_cpu_context); } struct pmu *perf_init_event(struct perf_event *event) @@ -5628,16 +5724,13 @@ __perf_event_exit_task(struct perf_event *child_event, } } -/* - * When a child task exits, feed back event values to parent events. - */ -void perf_event_exit_task(struct task_struct *child) +static void perf_event_exit_task_context(struct task_struct *child, int ctxn) { struct perf_event *child_event, *tmp; struct perf_event_context *child_ctx; unsigned long flags; - if (likely(!child->perf_event_ctxp)) { + if (likely(!child->perf_event_ctxp[ctxn])) { perf_event_task(child, NULL, 0); return; } @@ -5649,7 +5742,7 @@ void perf_event_exit_task(struct task_struct *child) * scheduled, so we are now safe from rescheduling changing * our context. */ - child_ctx = child->perf_event_ctxp; + child_ctx = child->perf_event_ctxp[ctxn]; __perf_event_task_sched_out(child_ctx); /* @@ -5658,7 +5751,7 @@ void perf_event_exit_task(struct task_struct *child) * incremented the context's refcount before we do put_ctx below. */ raw_spin_lock(&child_ctx->lock); - child->perf_event_ctxp = NULL; + child->perf_event_ctxp[ctxn] = NULL; /* * If this context is a clone; unclone it so it can't get * swapped to another process while we're removing all @@ -5711,6 +5804,17 @@ again: put_ctx(child_ctx); } +/* + * When a child task exits, feed back event values to parent events. + */ +void perf_event_exit_task(struct task_struct *child) +{ + int ctxn; + + for_each_task_context_nr(ctxn) + perf_event_exit_task_context(child, ctxn); +} + static void perf_free_event(struct perf_event *event, struct perf_event_context *ctx) { @@ -5732,32 +5836,37 @@ static void perf_free_event(struct perf_event *event, /* * free an unexposed, unused context as created by inheritance by - * init_task below, used by fork() in case of fail. + * perf_event_init_task below, used by fork() in case of fail. */ void perf_event_free_task(struct task_struct *task) { - struct perf_event_context *ctx = task->perf_event_ctxp; + struct perf_event_context *ctx; struct perf_event *event, *tmp; + int ctxn; - if (!ctx) - return; + for_each_task_context_nr(ctxn) { + ctx = task->perf_event_ctxp[ctxn]; + if (!ctx) + continue; - mutex_lock(&ctx->mutex); + mutex_lock(&ctx->mutex); again: - list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) - perf_free_event(event, ctx); + list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, + group_entry) + perf_free_event(event, ctx); - list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, - group_entry) - perf_free_event(event, ctx); + list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, + group_entry) + perf_free_event(event, ctx); - if (!list_empty(&ctx->pinned_groups) || - !list_empty(&ctx->flexible_groups)) - goto again; + if (!list_empty(&ctx->pinned_groups) || + !list_empty(&ctx->flexible_groups)) + goto again; - mutex_unlock(&ctx->mutex); + mutex_unlock(&ctx->mutex); - put_ctx(ctx); + put_ctx(ctx); + } } /* @@ -5863,17 +5972,18 @@ static int inherit_group(struct perf_event *parent_event, static int inherit_task_group(struct perf_event *event, struct task_struct *parent, struct perf_event_context *parent_ctx, - struct task_struct *child, + struct task_struct *child, int ctxn, int *inherited_all) { int ret; - struct perf_event_context *child_ctx = child->perf_event_ctxp; + struct perf_event_context *child_ctx; if (!event->attr.inherit) { *inherited_all = 0; return 0; } + child_ctx = child->perf_event_ctxp[ctxn]; if (!child_ctx) { /* * This is executed from the parent task context, so @@ -5886,7 +5996,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, if (!child_ctx) return -ENOMEM; - child->perf_event_ctxp = child_ctx; + child->perf_event_ctxp[ctxn] = child_ctx; } ret = inherit_group(event, parent, parent_ctx, @@ -5901,7 +6011,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, /* * Initialize the perf_event context in task_struct */ -int perf_event_init_task(struct task_struct *child) +int perf_event_init_context(struct task_struct *child, int ctxn) { struct perf_event_context *child_ctx, *parent_ctx; struct perf_event_context *cloned_ctx; @@ -5910,19 +6020,19 @@ int perf_event_init_task(struct task_struct *child) int inherited_all = 1; int ret = 0; - child->perf_event_ctxp = NULL; + child->perf_event_ctxp[ctxn] = NULL; mutex_init(&child->perf_event_mutex); INIT_LIST_HEAD(&child->perf_event_list); - if (likely(!parent->perf_event_ctxp)) + if (likely(!parent->perf_event_ctxp[ctxn])) return 0; /* * If the parent's context is a clone, pin it so it won't get * swapped under us. */ - parent_ctx = perf_pin_task_context(parent); + parent_ctx = perf_pin_task_context(parent, ctxn); /* * No need to check if parent_ctx != NULL here; since we saw @@ -5942,20 +6052,20 @@ int perf_event_init_task(struct task_struct *child) * the list, not manipulating it: */ list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { - ret = inherit_task_group(event, parent, parent_ctx, child, - &inherited_all); + ret = inherit_task_group(event, parent, parent_ctx, + child, ctxn, &inherited_all); if (ret) break; } list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { - ret = inherit_task_group(event, parent, parent_ctx, child, - &inherited_all); + ret = inherit_task_group(event, parent, parent_ctx, + child, ctxn, &inherited_all); if (ret) break; } - child_ctx = child->perf_event_ctxp; + child_ctx = child->perf_event_ctxp[ctxn]; if (child_ctx && inherited_all) { /* @@ -5984,6 +6094,22 @@ int perf_event_init_task(struct task_struct *child) return ret; } +/* + * Initialize the perf_event context in task_struct + */ +int perf_event_init_task(struct task_struct *child) +{ + int ctxn, ret; + + for_each_task_context_nr(ctxn) { + ret = perf_event_init_context(child, ctxn); + if (ret) + return ret; + } + + return 0; +} + static void __init perf_event_init_all_cpus(void) { struct swevent_htable *swhash; -- cgit v1.2.3 From 89a1e18731959e9953fae15ddc1a983eb15a4f19 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 7 Sep 2010 17:34:50 +0200 Subject: perf: Provide a separate task context for swevents Since software events are always schedulable, mixing them up with hardware events (who are not) can lead to funny scheduling oddities. Giving them their own context solves this. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Lin Ming Cc: Yanmin LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 9 +-------- include/linux/sched.h | 1 + kernel/hw_breakpoint.c | 2 ++ kernel/perf_event.c | 40 +++++++++++++++++++++++++++++----------- 4 files changed, 33 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9ecfd856ce6e..c1173520f14d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -952,14 +952,7 @@ extern int perf_event_overflow(struct perf_event *event, int nmi, */ static inline int is_software_event(struct perf_event *event) { - switch (event->attr.type) { - case PERF_TYPE_SOFTWARE: - case PERF_TYPE_TRACEPOINT: - /* for now the breakpoint stuff also works as software event */ - case PERF_TYPE_BREAKPOINT: - return 1; - } - return 0; + return event->pmu->task_ctx_nr == perf_sw_context; } extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; diff --git a/include/linux/sched.h b/include/linux/sched.h index 89d6023c6f82..eb3c1ceec06e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1163,6 +1163,7 @@ struct rcu_node; enum perf_event_task_context { perf_invalid_context = -1, perf_hw_context = 0, + perf_sw_context, perf_nr_task_contexts, }; diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 6f150095cafe..3b2aaffb65f0 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -610,6 +610,8 @@ static void hw_breakpoint_stop(struct perf_event *bp, int flags) } static struct pmu perf_breakpoint = { + .task_ctx_nr = perf_sw_context, /* could eventually get its own */ + .event_init = hw_breakpoint_event_init, .add = hw_breakpoint_add, .del = hw_breakpoint_del, diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 7223ea875861..357ee8d5e8ae 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4709,6 +4709,8 @@ static int perf_swevent_init(struct perf_event *event) } static struct pmu perf_swevent = { + .task_ctx_nr = perf_sw_context, + .event_init = perf_swevent_init, .add = perf_swevent_add, .del = perf_swevent_del, @@ -4800,6 +4802,8 @@ static int perf_tp_event_init(struct perf_event *event) } static struct pmu perf_tracepoint = { + .task_ctx_nr = perf_sw_context, + .event_init = perf_tp_event_init, .add = perf_trace_add, .del = perf_trace_del, @@ -4988,6 +4992,8 @@ static int cpu_clock_event_init(struct perf_event *event) } static struct pmu perf_cpu_clock = { + .task_ctx_nr = perf_sw_context, + .event_init = cpu_clock_event_init, .add = cpu_clock_event_add, .del = cpu_clock_event_del, @@ -5063,6 +5069,8 @@ static int task_clock_event_init(struct perf_event *event) } static struct pmu perf_task_clock = { + .task_ctx_nr = perf_sw_context, + .event_init = task_clock_event_init, .add = task_clock_event_add, .del = task_clock_event_del, @@ -5490,6 +5498,7 @@ SYSCALL_DEFINE5(perf_event_open, struct perf_event_context *ctx; struct file *event_file = NULL; struct file *group_file = NULL; + struct pmu *pmu; int event_fd; int fput_needed = 0; int err; @@ -5522,20 +5531,11 @@ SYSCALL_DEFINE5(perf_event_open, goto err_fd; } - /* - * Get the target context (task or percpu): - */ - ctx = find_get_context(event->pmu, pid, cpu); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto err_alloc; - } - if (group_fd != -1) { group_leader = perf_fget_light(group_fd, &fput_needed); if (IS_ERR(group_leader)) { err = PTR_ERR(group_leader); - goto err_context; + goto err_alloc; } group_file = group_leader->filp; if (flags & PERF_FLAG_FD_OUTPUT) @@ -5544,6 +5544,23 @@ SYSCALL_DEFINE5(perf_event_open, group_leader = NULL; } + /* + * Special case software events and allow them to be part of + * any hardware group. + */ + pmu = event->pmu; + if ((pmu->task_ctx_nr == perf_sw_context) && group_leader) + pmu = group_leader->pmu; + + /* + * Get the target context (task or percpu): + */ + ctx = find_get_context(pmu, pid, cpu); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto err_group_fd; + } + /* * Look up the group leader (we will attach this event to it): */ @@ -5605,8 +5622,9 @@ SYSCALL_DEFINE5(perf_event_open, return event_fd; err_context: - fput_light(group_file, fput_needed); put_ctx(ctx); +err_group_fd: + fput_light(group_file, fput_needed); err_alloc: free_event(event); err_fd: -- cgit v1.2.3 From 4e231c7962ce711c7d8c2a4dc23ecd1e8fc28363 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 9 Sep 2010 21:01:59 +0200 Subject: perf: Fix up delayed_put_task_struct() I missed a perf_event_ctxp user when converting it to an array. Pull this last user into perf_event.c as well and fix it up. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 ++ kernel/exit.c | 4 +--- kernel/perf_event.c | 8 ++++++++ 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c1173520f14d..93bf53aa50e5 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -889,6 +889,7 @@ extern void perf_event_task_sched_out(struct task_struct *task, struct task_stru extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); +extern void perf_event_delayed_put(struct task_struct *task); extern void set_perf_event_pending(void); extern void perf_event_do_pending(void); extern void perf_event_print_debug(void); @@ -1067,6 +1068,7 @@ perf_event_task_sched_out(struct task_struct *task, static inline int perf_event_init_task(struct task_struct *child) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } +static inline void perf_event_delayed_put(struct task_struct *task) { } static inline void perf_event_do_pending(void) { } static inline void perf_event_print_debug(void) { } static inline int perf_event_task_disable(void) { return -EINVAL; } diff --git a/kernel/exit.c b/kernel/exit.c index 03120229db28..e2bdf37f9fde 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -149,9 +149,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp) { struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); -#ifdef CONFIG_PERF_EVENTS - WARN_ON_ONCE(tsk->perf_event_ctxp); -#endif + perf_event_delayed_put(tsk); trace_sched_process_free(tsk); put_task_struct(tsk); } diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 9819a69a61a1..eaf1c5de6dcc 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -5893,6 +5893,14 @@ again: } } +void perf_event_delayed_put(struct task_struct *task) +{ + int ctxn; + + for_each_task_context_nr(ctxn) + WARN_ON_ONCE(task->perf_event_ctxp[ctxn]); +} + /* * inherit a event from parent task to child task: */ -- cgit v1.2.3 From 3b8fad3e2f5f69bfd8e42d099ca8582fb2342edf Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Sep 2010 14:26:00 +0200 Subject: irq: Fix circular headers dependency asm-generic/hardirq.h needs asm/irq.h which might include linux/interrupt.h as in the sparc 32 case. At this point we need irq_cpustat generic definitions, but those are included later in asm-generic/hardirq.h. Then delay a bit the inclusion of irq.h from asm-generic/hardirq.h, it doesn't need to be included early. This fixes: include/linux/interrupt.h: In function '__raise_softirq_irqoff': include/linux/interrupt.h:414: error: implicit declaration of function 'local_softirq_pending' include/linux/interrupt.h:414: error: lvalue required as left operand of assignment Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Lai Jiangshan Cc: Koki Sanagi Cc: mathieu.desnoyers@efficios.com Cc: rostedt@goodmis.org Cc: nhorman@tuxdriver.com Cc: scott.a.mcmillan@intel.com Cc: eric.dumazet@gmail.com Cc: kaneshige.kenji@jp.fujitsu.com Cc: davem@davemloft.net Cc: izumi.taku@jp.fujitsu.com Cc: kosaki.motohiro@jp.fujitsu.com LKML-Reference: <20100908122557.GA5310@nowhere> Signed-off-by: Ingo Molnar --- include/asm-generic/hardirq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/hardirq.h b/include/asm-generic/hardirq.h index 62f59080e5cc..04d0a977cd43 100644 --- a/include/asm-generic/hardirq.h +++ b/include/asm-generic/hardirq.h @@ -3,13 +3,13 @@ #include #include -#include typedef struct { unsigned int __softirq_pending; } ____cacheline_aligned irq_cpustat_t; #include /* Standard mappings for irq_cpustat_t above */ +#include #ifndef ack_bad_irq static inline void ack_bad_irq(unsigned int irq) -- cgit v1.2.3 From 8613e4c2872a87cc309a42de2c7091744dc54d0e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 9 Sep 2010 21:54:22 -0700 Subject: Input: add support for large scancodes Several devices use a high number of bits for scancodes. One important group is the Remote Controllers. Some new protocols like RC-6 define a scancode space of 64 bits. The current EVIO[CS]GKEYCODE ioctls allow replace the scancode/keycode translation tables, but it is limited to up to 32 bits for scancode. Also, if userspace wants to clean the existing table, replacing it by a new one, it needs to run a loop calling the ioctls over the entire sparse scancode space. To solve those problems, this patch extends the ioctls to allow drivers handle scancodes up to 32 bytes long (the length could be extended in the future should such need arise) and allow userspace to query and set scancode to keycode mappings not only by scancode but also by index. Compatibility code were also added to handle the old format of EVIO[CS]GKEYCODE ioctls. Folded fixes by: - Dan Carpenter: locking fixes for the original implementation - Jarod Wilson: fix crash when setting keycode and wiring up get/set handlers in original implementation. - Dmitry Torokhov: rework to consolidate old and new scancode handling, provide options to act either by index or scancode. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Dan Carpenter Signed-off-by: Jarod Wilson Signed-off-by: Dmitry Torokhov --- drivers/char/keyboard.c | 31 ++++++-- drivers/input/evdev.c | 100 ++++++++++++++++++++----- drivers/input/input.c | 192 ++++++++++++++++++++++++++++++++++++------------ include/linux/input.h | 55 +++++++++++--- 4 files changed, 292 insertions(+), 86 deletions(-) (limited to 'include') diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c index a7ca75212bfe..e95d7876ca6b 100644 --- a/drivers/char/keyboard.c +++ b/drivers/char/keyboard.c @@ -175,8 +175,7 @@ EXPORT_SYMBOL_GPL(unregister_keyboard_notifier); */ struct getset_keycode_data { - unsigned int scancode; - unsigned int keycode; + struct input_keymap_entry ke; int error; }; @@ -184,32 +183,50 @@ static int getkeycode_helper(struct input_handle *handle, void *data) { struct getset_keycode_data *d = data; - d->error = input_get_keycode(handle->dev, d->scancode, &d->keycode); + d->error = input_get_keycode(handle->dev, &d->ke); return d->error == 0; /* stop as soon as we successfully get one */ } int getkeycode(unsigned int scancode) { - struct getset_keycode_data d = { scancode, 0, -ENODEV }; + struct getset_keycode_data d = { + .ke = { + .flags = 0, + .len = sizeof(scancode), + .keycode = 0, + }, + .error = -ENODEV, + }; + + memcpy(d.ke.scancode, &scancode, sizeof(scancode)); input_handler_for_each_handle(&kbd_handler, &d, getkeycode_helper); - return d.error ?: d.keycode; + return d.error ?: d.ke.keycode; } static int setkeycode_helper(struct input_handle *handle, void *data) { struct getset_keycode_data *d = data; - d->error = input_set_keycode(handle->dev, d->scancode, d->keycode); + d->error = input_set_keycode(handle->dev, &d->ke); return d->error == 0; /* stop as soon as we successfully set one */ } int setkeycode(unsigned int scancode, unsigned int keycode) { - struct getset_keycode_data d = { scancode, keycode, -ENODEV }; + struct getset_keycode_data d = { + .ke = { + .flags = 0, + .len = sizeof(scancode), + .keycode = keycode, + }, + .error = -ENODEV, + }; + + memcpy(d.ke.scancode, &scancode, sizeof(scancode)); input_handler_for_each_handle(&kbd_handler, &d, setkeycode_helper); diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index c908c5f83645..1ce9bf663206 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -534,6 +534,80 @@ static int handle_eviocgbit(struct input_dev *dev, } #undef OLD_KEY_MAX +static int evdev_handle_get_keycode(struct input_dev *dev, + void __user *p, size_t size) +{ + struct input_keymap_entry ke; + int error; + + memset(&ke, 0, sizeof(ke)); + + if (size == sizeof(unsigned int[2])) { + /* legacy case */ + int __user *ip = (int __user *)p; + + if (copy_from_user(ke.scancode, p, sizeof(unsigned int))) + return -EFAULT; + + ke.len = sizeof(unsigned int); + ke.flags = 0; + + error = input_get_keycode(dev, &ke); + if (error) + return error; + + if (put_user(ke.keycode, ip + 1)) + return -EFAULT; + + } else { + size = min(size, sizeof(ke)); + + if (copy_from_user(&ke, p, size)) + return -EFAULT; + + error = input_get_keycode(dev, &ke); + if (error) + return error; + + if (copy_to_user(p, &ke, size)) + return -EFAULT; + } + return 0; +} + +static int evdev_handle_set_keycode(struct input_dev *dev, + void __user *p, size_t size) +{ + struct input_keymap_entry ke; + + memset(&ke, 0, sizeof(ke)); + + if (size == sizeof(unsigned int[2])) { + /* legacy case */ + int __user *ip = (int __user *)p; + + if (copy_from_user(ke.scancode, p, sizeof(unsigned int))) + return -EFAULT; + + if (get_user(ke.keycode, ip + 1)) + return -EFAULT; + + ke.len = sizeof(unsigned int); + ke.flags = 0; + + } else { + size = min(size, sizeof(ke)); + + if (copy_from_user(&ke, p, size)) + return -EFAULT; + + if (ke.len > sizeof(ke.scancode)) + return -EINVAL; + } + + return input_set_keycode(dev, &ke); +} + static long evdev_do_ioctl(struct file *file, unsigned int cmd, void __user *p, int compat_mode) { @@ -580,25 +654,6 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd, return 0; - case EVIOCGKEYCODE: - if (get_user(t, ip)) - return -EFAULT; - - error = input_get_keycode(dev, t, &v); - if (error) - return error; - - if (put_user(v, ip + 1)) - return -EFAULT; - - return 0; - - case EVIOCSKEYCODE: - if (get_user(t, ip) || get_user(v, ip + 1)) - return -EFAULT; - - return input_set_keycode(dev, t, v); - case EVIOCRMFF: return input_ff_erase(dev, (int)(unsigned long) p, file); @@ -620,7 +675,6 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd, /* Now check variable-length commands */ #define EVIOC_MASK_SIZE(nr) ((nr) & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT)) - switch (EVIOC_MASK_SIZE(cmd)) { case EVIOCGKEY(0): @@ -654,6 +708,12 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd, return -EFAULT; return error; + + case EVIOC_MASK_SIZE(EVIOCGKEYCODE): + return evdev_handle_get_keycode(dev, p, size); + + case EVIOC_MASK_SIZE(EVIOCSKEYCODE): + return evdev_handle_set_keycode(dev, p, size); } /* Multi-number variable-length handlers */ diff --git a/drivers/input/input.c b/drivers/input/input.c index acb3c8095c65..832771e73663 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -634,78 +634,141 @@ static void input_disconnect_device(struct input_dev *dev) spin_unlock_irq(&dev->event_lock); } -static int input_fetch_keycode(struct input_dev *dev, int scancode) +/** + * input_scancode_to_scalar() - converts scancode in &struct input_keymap_entry + * @ke: keymap entry containing scancode to be converted. + * @scancode: pointer to the location where converted scancode should + * be stored. + * + * This function is used to convert scancode stored in &struct keymap_entry + * into scalar form understood by legacy keymap handling methods. These + * methods expect scancodes to be represented as 'unsigned int'. + */ +int input_scancode_to_scalar(const struct input_keymap_entry *ke, + unsigned int *scancode) +{ + switch (ke->len) { + case 1: + *scancode = *((u8 *)ke->scancode); + break; + + case 2: + *scancode = *((u16 *)ke->scancode); + break; + + case 4: + *scancode = *((u32 *)ke->scancode); + break; + + default: + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL(input_scancode_to_scalar); + +/* + * Those routines handle the default case where no [gs]etkeycode() is + * defined. In this case, an array indexed by the scancode is used. + */ + +static unsigned int input_fetch_keycode(struct input_dev *dev, + unsigned int index) { switch (dev->keycodesize) { - case 1: - return ((u8 *)dev->keycode)[scancode]; + case 1: + return ((u8 *)dev->keycode)[index]; - case 2: - return ((u16 *)dev->keycode)[scancode]; + case 2: + return ((u16 *)dev->keycode)[index]; - default: - return ((u32 *)dev->keycode)[scancode]; + default: + return ((u32 *)dev->keycode)[index]; } } static int input_default_getkeycode(struct input_dev *dev, - unsigned int scancode, - unsigned int *keycode) + struct input_keymap_entry *ke) { + unsigned int index; + int error; + if (!dev->keycodesize) return -EINVAL; - if (scancode >= dev->keycodemax) + if (ke->flags & INPUT_KEYMAP_BY_INDEX) + index = ke->index; + else { + error = input_scancode_to_scalar(ke, &index); + if (error) + return error; + } + + if (index >= dev->keycodemax) return -EINVAL; - *keycode = input_fetch_keycode(dev, scancode); + ke->keycode = input_fetch_keycode(dev, index); + ke->index = index; + ke->len = sizeof(index); + memcpy(ke->scancode, &index, sizeof(index)); return 0; } static int input_default_setkeycode(struct input_dev *dev, - unsigned int scancode, - unsigned int keycode) + const struct input_keymap_entry *ke, + unsigned int *old_keycode) { - int old_keycode; + unsigned int index; + int error; int i; - if (scancode >= dev->keycodemax) + if (!dev->keycodesize) return -EINVAL; - if (!dev->keycodesize) + if (ke->flags & INPUT_KEYMAP_BY_INDEX) { + index = ke->index; + } else { + error = input_scancode_to_scalar(ke, &index); + if (error) + return error; + } + + if (index >= dev->keycodemax) return -EINVAL; - if (dev->keycodesize < sizeof(keycode) && (keycode >> (dev->keycodesize * 8))) + if (dev->keycodesize < sizeof(dev->keycode) && + (ke->keycode >> (dev->keycodesize * 8))) return -EINVAL; switch (dev->keycodesize) { case 1: { u8 *k = (u8 *)dev->keycode; - old_keycode = k[scancode]; - k[scancode] = keycode; + *old_keycode = k[index]; + k[index] = ke->keycode; break; } case 2: { u16 *k = (u16 *)dev->keycode; - old_keycode = k[scancode]; - k[scancode] = keycode; + *old_keycode = k[index]; + k[index] = ke->keycode; break; } default: { u32 *k = (u32 *)dev->keycode; - old_keycode = k[scancode]; - k[scancode] = keycode; + *old_keycode = k[index]; + k[index] = ke->keycode; break; } } - __clear_bit(old_keycode, dev->keybit); - __set_bit(keycode, dev->keybit); + __clear_bit(*old_keycode, dev->keybit); + __set_bit(ke->keycode, dev->keybit); for (i = 0; i < dev->keycodemax; i++) { - if (input_fetch_keycode(dev, i) == old_keycode) { - __set_bit(old_keycode, dev->keybit); + if (input_fetch_keycode(dev, i) == *old_keycode) { + __set_bit(*old_keycode, dev->keybit); break; /* Setting the bit twice is useless, so break */ } } @@ -716,53 +779,86 @@ static int input_default_setkeycode(struct input_dev *dev, /** * input_get_keycode - retrieve keycode currently mapped to a given scancode * @dev: input device which keymap is being queried - * @scancode: scancode (or its equivalent for device in question) for which - * keycode is needed - * @keycode: result + * @ke: keymap entry * * This function should be called by anyone interested in retrieving current - * keymap. Presently keyboard and evdev handlers use it. + * keymap. Presently evdev handlers use it. */ -int input_get_keycode(struct input_dev *dev, - unsigned int scancode, unsigned int *keycode) +int input_get_keycode(struct input_dev *dev, struct input_keymap_entry *ke) { unsigned long flags; int retval; spin_lock_irqsave(&dev->event_lock, flags); - retval = dev->getkeycode(dev, scancode, keycode); - spin_unlock_irqrestore(&dev->event_lock, flags); + if (dev->getkeycode) { + /* + * Support for legacy drivers, that don't implement the new + * ioctls + */ + u32 scancode = ke->index; + + memcpy(ke->scancode, &scancode, sizeof(scancode)); + ke->len = sizeof(scancode); + retval = dev->getkeycode(dev, scancode, &ke->keycode); + } else { + retval = dev->getkeycode_new(dev, ke); + } + + spin_unlock_irqrestore(&dev->event_lock, flags); return retval; } EXPORT_SYMBOL(input_get_keycode); /** - * input_get_keycode - assign new keycode to a given scancode + * input_set_keycode - attribute a keycode to a given scancode * @dev: input device which keymap is being updated - * @scancode: scancode (or its equivalent for device in question) - * @keycode: new keycode to be assigned to the scancode + * @ke: new keymap entry * * This function should be called by anyone needing to update current * keymap. Presently keyboard and evdev handlers use it. */ int input_set_keycode(struct input_dev *dev, - unsigned int scancode, unsigned int keycode) + const struct input_keymap_entry *ke) { unsigned long flags; unsigned int old_keycode; int retval; - if (keycode > KEY_MAX) + if (ke->keycode > KEY_MAX) return -EINVAL; spin_lock_irqsave(&dev->event_lock, flags); - retval = dev->getkeycode(dev, scancode, &old_keycode); - if (retval) - goto out; + if (dev->setkeycode) { + /* + * Support for legacy drivers, that don't implement the new + * ioctls + */ + unsigned int scancode; + + retval = input_scancode_to_scalar(ke, &scancode); + if (retval) + goto out; + + /* + * We need to know the old scancode, in order to generate a + * keyup effect, if the set operation happens successfully + */ + if (!dev->getkeycode) { + retval = -EINVAL; + goto out; + } + + retval = dev->getkeycode(dev, scancode, &old_keycode); + if (retval) + goto out; + + retval = dev->setkeycode(dev, scancode, ke->keycode); + } else { + retval = dev->setkeycode_new(dev, ke, &old_keycode); + } - retval = dev->setkeycode(dev, scancode, keycode); if (retval) goto out; @@ -1759,11 +1855,11 @@ int input_register_device(struct input_dev *dev) dev->rep[REP_PERIOD] = 33; } - if (!dev->getkeycode) - dev->getkeycode = input_default_getkeycode; + if (!dev->getkeycode && !dev->getkeycode_new) + dev->getkeycode_new = input_default_getkeycode; - if (!dev->setkeycode) - dev->setkeycode = input_default_setkeycode; + if (!dev->setkeycode && !dev->setkeycode_new) + dev->setkeycode_new = input_default_setkeycode; dev_set_name(&dev->dev, "input%ld", (unsigned long) atomic_inc_return(&input_no) - 1); diff --git a/include/linux/input.h b/include/linux/input.h index 789265123531..0057698fd975 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -34,7 +34,7 @@ struct input_event { * Protocol version. */ -#define EV_VERSION 0x010000 +#define EV_VERSION 0x010001 /* * IOCTLs (0x00 - 0x7f) @@ -56,12 +56,37 @@ struct input_absinfo { __s32 resolution; }; +/** + * struct input_keymap_entry - used by EVIOCGKEYCODE/EVIOCSKEYCODE ioctls + * @scancode: scancode represented in machine-endian form. + * @len: length of the scancode that resides in @scancode buffer. + * @index: index in the keymap, may be used instead of scancode + * @flags: allows to specify how kernel should handle the request. For + * example, setting INPUT_KEYMAP_BY_INDEX flag indicates that kernel + * should perform lookup in keymap by @index instead of @scancode + * @keycode: key code assigned to this scancode + * + * The structure is used to retrieve and modify keymap data. Users have + * option of performing lookup either by @scancode itself or by @index + * in keymap entry. EVIOCGKEYCODE will also return scancode or index + * (depending on which element was used to perform lookup). + */ +struct input_keymap_entry { +#define INPUT_KEYMAP_BY_INDEX (1 << 0) + __u8 flags; + __u8 len; + __u16 index; + __u32 keycode; + __u8 scancode[32]; +}; + #define EVIOCGVERSION _IOR('E', 0x01, int) /* get driver version */ #define EVIOCGID _IOR('E', 0x02, struct input_id) /* get device ID */ #define EVIOCGREP _IOR('E', 0x03, unsigned int[2]) /* get repeat settings */ #define EVIOCSREP _IOW('E', 0x03, unsigned int[2]) /* set repeat settings */ -#define EVIOCGKEYCODE _IOR('E', 0x04, unsigned int[2]) /* get keycode */ -#define EVIOCSKEYCODE _IOW('E', 0x04, unsigned int[2]) /* set keycode */ + +#define EVIOCGKEYCODE _IOR('E', 0x04, struct input_keymap_entry) /* get keycode */ +#define EVIOCSKEYCODE _IOW('E', 0x04, struct input_keymap_entry) /* set keycode */ #define EVIOCGNAME(len) _IOC(_IOC_READ, 'E', 0x06, len) /* get device name */ #define EVIOCGPHYS(len) _IOC(_IOC_READ, 'E', 0x07, len) /* get physical location */ @@ -73,8 +98,8 @@ struct input_absinfo { #define EVIOCGSW(len) _IOC(_IOC_READ, 'E', 0x1b, len) /* get all switch states */ #define EVIOCGBIT(ev,len) _IOC(_IOC_READ, 'E', 0x20 + ev, len) /* get event bits */ -#define EVIOCGABS(abs) _IOR('E', 0x40 + abs, struct input_absinfo) /* get abs value/limits */ -#define EVIOCSABS(abs) _IOW('E', 0xc0 + abs, struct input_absinfo) /* set abs value/limits */ +#define EVIOCGABS(abs) _IOR('E', 0x40 + abs, struct input_absinfo) /* get abs value/limits */ +#define EVIOCSABS(abs) _IOW('E', 0xc0 + abs, struct input_absinfo) /* set abs value/limits */ #define EVIOCSFF _IOC(_IOC_WRITE, 'E', 0x80, sizeof(struct ff_effect)) /* send a force effect to a force feedback device */ #define EVIOCRMFF _IOW('E', 0x81, int) /* Erase a force effect */ @@ -1088,13 +1113,13 @@ struct input_mt_slot { * @keycodemax: size of keycode table * @keycodesize: size of elements in keycode table * @keycode: map of scancodes to keycodes for this device + * @getkeycode: optional legacy method to retrieve current keymap. * @setkeycode: optional method to alter current keymap, used to implement * sparse keymaps. If not supplied default mechanism will be used. * The method is being called while holding event_lock and thus must * not sleep - * @getkeycode: optional method to retrieve current keymap. If not supplied - * default mechanism will be used. The method is being called while - * holding event_lock and thus must not sleep + * @getkeycode_new: transition method + * @setkeycode_new: transition method * @ff: force feedback structure associated with the device if device * supports force feedback effects * @repeat_key: stores key code of the last key pressed; used to implement @@ -1168,10 +1193,16 @@ struct input_dev { unsigned int keycodemax; unsigned int keycodesize; void *keycode; + int (*setkeycode)(struct input_dev *dev, unsigned int scancode, unsigned int keycode); int (*getkeycode)(struct input_dev *dev, unsigned int scancode, unsigned int *keycode); + int (*setkeycode_new)(struct input_dev *dev, + const struct input_keymap_entry *ke, + unsigned int *old_keycode); + int (*getkeycode_new)(struct input_dev *dev, + struct input_keymap_entry *ke); struct ff_device *ff; @@ -1478,10 +1509,12 @@ INPUT_GENERATE_ABS_ACCESSORS(fuzz, fuzz) INPUT_GENERATE_ABS_ACCESSORS(flat, flat) INPUT_GENERATE_ABS_ACCESSORS(res, resolution) -int input_get_keycode(struct input_dev *dev, - unsigned int scancode, unsigned int *keycode); +int input_scancode_to_scalar(const struct input_keymap_entry *ke, + unsigned int *scancode); + +int input_get_keycode(struct input_dev *dev, struct input_keymap_entry *ke); int input_set_keycode(struct input_dev *dev, - unsigned int scancode, unsigned int keycode); + const struct input_keymap_entry *ke); extern struct class input_class; -- cgit v1.2.3 From 9f470095068e415658ccc6977cf4b3f5be418526 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 9 Sep 2010 21:59:11 -0700 Subject: Input: media/IR - switch to using new keycode interface Switch the code to use new style of getkeycode and setkeycode methods to allow retrieving and setting keycodes not only by their scancodes but also by index. Acked-by: Mauro Carvalho Chehab Signed-off-by: Dmitry Torokhov --- drivers/media/IR/ir-keytable.c | 393 +++++++++++++++++++++++++++-------------- include/media/rc-map.h | 2 +- 2 files changed, 263 insertions(+), 132 deletions(-) (limited to 'include') diff --git a/drivers/media/IR/ir-keytable.c b/drivers/media/IR/ir-keytable.c index 7e82a9df726b..5ca36a42f019 100644 --- a/drivers/media/IR/ir-keytable.c +++ b/drivers/media/IR/ir-keytable.c @@ -24,15 +24,57 @@ /* FIXME: IR_KEYPRESS_TIMEOUT should be protocol specific */ #define IR_KEYPRESS_TIMEOUT 250 +/** + * ir_create_table() - initializes a scancode table + * @rc_tab: the ir_scancode_table to initialize + * @name: name to assign to the table + * @ir_type: ir type to assign to the new table + * @size: initial size of the table + * @return: zero on success or a negative error code + * + * This routine will initialize the ir_scancode_table and will allocate + * memory to hold at least the specified number elements. + */ +static int ir_create_table(struct ir_scancode_table *rc_tab, + const char *name, u64 ir_type, size_t size) +{ + rc_tab->name = name; + rc_tab->ir_type = ir_type; + rc_tab->alloc = roundup_pow_of_two(size * sizeof(struct ir_scancode)); + rc_tab->size = rc_tab->alloc / sizeof(struct ir_scancode); + rc_tab->scan = kmalloc(rc_tab->alloc, GFP_KERNEL); + if (!rc_tab->scan) + return -ENOMEM; + + IR_dprintk(1, "Allocated space for %u keycode entries (%u bytes)\n", + rc_tab->size, rc_tab->alloc); + return 0; +} + +/** + * ir_free_table() - frees memory allocated by a scancode table + * @rc_tab: the table whose mappings need to be freed + * + * This routine will free memory alloctaed for key mappings used by given + * scancode table. + */ +static void ir_free_table(struct ir_scancode_table *rc_tab) +{ + rc_tab->size = 0; + kfree(rc_tab->scan); + rc_tab->scan = NULL; +} + /** * ir_resize_table() - resizes a scancode table if necessary * @rc_tab: the ir_scancode_table to resize + * @gfp_flags: gfp flags to use when allocating memory * @return: zero on success or a negative error code * * This routine will shrink the ir_scancode_table if it has lots of * unused entries and grow it if it is full. */ -static int ir_resize_table(struct ir_scancode_table *rc_tab) +static int ir_resize_table(struct ir_scancode_table *rc_tab, gfp_t gfp_flags) { unsigned int oldalloc = rc_tab->alloc; unsigned int newalloc = oldalloc; @@ -57,7 +99,7 @@ static int ir_resize_table(struct ir_scancode_table *rc_tab) if (newalloc == oldalloc) return 0; - newscan = kmalloc(newalloc, GFP_ATOMIC); + newscan = kmalloc(newalloc, gfp_flags); if (!newscan) { IR_dprintk(1, "Failed to kmalloc %u bytes\n", newalloc); return -ENOMEM; @@ -72,26 +114,78 @@ static int ir_resize_table(struct ir_scancode_table *rc_tab) } /** - * ir_do_setkeycode() - internal function to set a keycode in the - * scancode->keycode table + * ir_update_mapping() - set a keycode in the scancode->keycode table * @dev: the struct input_dev device descriptor - * @rc_tab: the struct ir_scancode_table to set the keycode in - * @scancode: the scancode for the ir command - * @keycode: the keycode for the ir command - * @resize: whether the keytable may be shrunk - * @return: -EINVAL if the keycode could not be inserted, otherwise zero. + * @rc_tab: scancode table to be adjusted + * @index: index of the mapping that needs to be updated + * @keycode: the desired keycode + * @return: previous keycode assigned to the mapping + * + * This routine is used to update scancode->keycopde mapping at given + * position. + */ +static unsigned int ir_update_mapping(struct input_dev *dev, + struct ir_scancode_table *rc_tab, + unsigned int index, + unsigned int new_keycode) +{ + int old_keycode = rc_tab->scan[index].keycode; + int i; + + /* Did the user wish to remove the mapping? */ + if (new_keycode == KEY_RESERVED || new_keycode == KEY_UNKNOWN) { + IR_dprintk(1, "#%d: Deleting scan 0x%04x\n", + index, rc_tab->scan[index].scancode); + rc_tab->len--; + memmove(&rc_tab->scan[index], &rc_tab->scan[index+ 1], + (rc_tab->len - index) * sizeof(struct ir_scancode)); + } else { + IR_dprintk(1, "#%d: %s scan 0x%04x with key 0x%04x\n", + index, + old_keycode == KEY_RESERVED ? "New" : "Replacing", + rc_tab->scan[index].scancode, new_keycode); + rc_tab->scan[index].keycode = new_keycode; + __set_bit(new_keycode, dev->keybit); + } + + if (old_keycode != KEY_RESERVED) { + /* A previous mapping was updated... */ + __clear_bit(old_keycode, dev->keybit); + /* ... but another scancode might use the same keycode */ + for (i = 0; i < rc_tab->len; i++) { + if (rc_tab->scan[i].keycode == old_keycode) { + __set_bit(old_keycode, dev->keybit); + break; + } + } + + /* Possibly shrink the keytable, failure is not a problem */ + ir_resize_table(rc_tab, GFP_ATOMIC); + } + + return old_keycode; +} + +/** + * ir_locate_scancode() - set a keycode in the scancode->keycode table + * @ir_dev: the struct ir_input_dev device descriptor + * @rc_tab: scancode table to be searched + * @scancode: the desired scancode + * @resize: controls whether we allowed to resize the table to + * accomodate not yet present scancodes + * @return: index of the mapping containing scancode in question + * or -1U in case of failure. * - * This routine is used internally to manipulate the scancode->keycode table. - * The caller has to hold @rc_tab->lock. + * This routine is used to locate given scancode in ir_scancode_table. + * If scancode is not yet present the routine will allocate a new slot + * for it. */ -static int ir_do_setkeycode(struct input_dev *dev, - struct ir_scancode_table *rc_tab, - unsigned scancode, unsigned keycode, - bool resize) +static unsigned int ir_establish_scancode(struct ir_input_dev *ir_dev, + struct ir_scancode_table *rc_tab, + unsigned int scancode, + bool resize) { unsigned int i; - int old_keycode = KEY_RESERVED; - struct ir_input_dev *ir_dev = input_get_drvdata(dev); /* * Unfortunately, some hardware-based IR decoders don't provide @@ -100,65 +194,34 @@ static int ir_do_setkeycode(struct input_dev *dev, * the provided IR with another one, it is needed to allow loading * IR tables from other remotes. So, */ - if (ir_dev->props && ir_dev->props->scanmask) { + if (ir_dev->props && ir_dev->props->scanmask) scancode &= ir_dev->props->scanmask; - } /* First check if we already have a mapping for this ir command */ for (i = 0; i < rc_tab->len; i++) { + if (rc_tab->scan[i].scancode == scancode) + return i; + /* Keytable is sorted from lowest to highest scancode */ - if (rc_tab->scan[i].scancode > scancode) + if (rc_tab->scan[i].scancode >= scancode) break; - else if (rc_tab->scan[i].scancode < scancode) - continue; - - old_keycode = rc_tab->scan[i].keycode; - rc_tab->scan[i].keycode = keycode; - - /* Did the user wish to remove the mapping? */ - if (keycode == KEY_RESERVED || keycode == KEY_UNKNOWN) { - IR_dprintk(1, "#%d: Deleting scan 0x%04x\n", - i, scancode); - rc_tab->len--; - memmove(&rc_tab->scan[i], &rc_tab->scan[i + 1], - (rc_tab->len - i) * sizeof(struct ir_scancode)); - } - - /* Possibly shrink the keytable, failure is not a problem */ - ir_resize_table(rc_tab); - break; } - if (old_keycode == KEY_RESERVED && keycode != KEY_RESERVED) { - /* No previous mapping found, we might need to grow the table */ - if (resize && ir_resize_table(rc_tab)) - return -ENOMEM; - - IR_dprintk(1, "#%d: New scan 0x%04x with key 0x%04x\n", - i, scancode, keycode); + /* No previous mapping found, we might need to grow the table */ + if (rc_tab->size == rc_tab->len) { + if (!resize || ir_resize_table(rc_tab, GFP_ATOMIC)) + return -1U; + } - /* i is the proper index to insert our new keycode */ + /* i is the proper index to insert our new keycode */ + if (i < rc_tab->len) memmove(&rc_tab->scan[i + 1], &rc_tab->scan[i], (rc_tab->len - i) * sizeof(struct ir_scancode)); - rc_tab->scan[i].scancode = scancode; - rc_tab->scan[i].keycode = keycode; - rc_tab->len++; - set_bit(keycode, dev->keybit); - } else { - IR_dprintk(1, "#%d: Replacing scan 0x%04x with key 0x%04x\n", - i, scancode, keycode); - /* A previous mapping was updated... */ - clear_bit(old_keycode, dev->keybit); - /* ...but another scancode might use the same keycode */ - for (i = 0; i < rc_tab->len; i++) { - if (rc_tab->scan[i].keycode == old_keycode) { - set_bit(old_keycode, dev->keybit); - break; - } - } - } + rc_tab->scan[i].scancode = scancode; + rc_tab->scan[i].keycode = KEY_RESERVED; + rc_tab->len++; - return 0; + return i; } /** @@ -171,17 +234,41 @@ static int ir_do_setkeycode(struct input_dev *dev, * This routine is used to handle evdev EVIOCSKEY ioctl. */ static int ir_setkeycode(struct input_dev *dev, - unsigned int scancode, unsigned int keycode) + const struct input_keymap_entry *ke, + unsigned int *old_keycode) { - int rc; - unsigned long flags; struct ir_input_dev *ir_dev = input_get_drvdata(dev); struct ir_scancode_table *rc_tab = &ir_dev->rc_tab; + unsigned int index; + unsigned int scancode; + int retval; + unsigned long flags; spin_lock_irqsave(&rc_tab->lock, flags); - rc = ir_do_setkeycode(dev, rc_tab, scancode, keycode, true); + + if (ke->flags & INPUT_KEYMAP_BY_INDEX) { + index = ke->index; + if (index >= rc_tab->len) { + retval = -EINVAL; + goto out; + } + } else { + retval = input_scancode_to_scalar(ke, &scancode); + if (retval) + goto out; + + index = ir_establish_scancode(ir_dev, rc_tab, scancode, true); + if (index >= rc_tab->len) { + retval = -ENOMEM; + goto out; + } + } + + *old_keycode = ir_update_mapping(dev, rc_tab, index, ke->keycode); + +out: spin_unlock_irqrestore(&rc_tab->lock, flags); - return rc; + return retval; } /** @@ -189,31 +276,72 @@ static int ir_setkeycode(struct input_dev *dev, * @dev: the struct input_dev device descriptor * @to: the struct ir_scancode_table to copy entries to * @from: the struct ir_scancode_table to copy entries from - * @return: -EINVAL if all keycodes could not be inserted, otherwise zero. + * @return: -ENOMEM if all keycodes could not be inserted, otherwise zero. * * This routine is used to handle table initialization. */ -static int ir_setkeytable(struct input_dev *dev, - struct ir_scancode_table *to, +static int ir_setkeytable(struct ir_input_dev *ir_dev, const struct ir_scancode_table *from) { - struct ir_input_dev *ir_dev = input_get_drvdata(dev); struct ir_scancode_table *rc_tab = &ir_dev->rc_tab; - unsigned long flags; - unsigned int i; - int rc = 0; + unsigned int i, index; + int rc; + + rc = ir_create_table(&ir_dev->rc_tab, + from->name, from->ir_type, from->size); + if (rc) + return rc; + + IR_dprintk(1, "Allocated space for %u keycode entries (%u bytes)\n", + rc_tab->size, rc_tab->alloc); - spin_lock_irqsave(&rc_tab->lock, flags); for (i = 0; i < from->size; i++) { - rc = ir_do_setkeycode(dev, to, from->scan[i].scancode, - from->scan[i].keycode, false); - if (rc) + index = ir_establish_scancode(ir_dev, rc_tab, + from->scan[i].scancode, false); + if (index >= rc_tab->len) { + rc = -ENOMEM; break; + } + + ir_update_mapping(ir_dev->input_dev, rc_tab, index, + from->scan[i].keycode); } - spin_unlock_irqrestore(&rc_tab->lock, flags); + + if (rc) + ir_free_table(rc_tab); + return rc; } +/** + * ir_lookup_by_scancode() - locate mapping by scancode + * @rc_tab: the &struct ir_scancode_table to search + * @scancode: scancode to look for in the table + * @return: index in the table, -1U if not found + * + * This routine performs binary search in RC keykeymap table for + * given scancode. + */ +static unsigned int ir_lookup_by_scancode(const struct ir_scancode_table *rc_tab, + unsigned int scancode) +{ + unsigned int start = 0; + unsigned int end = rc_tab->len - 1; + unsigned int mid; + + while (start <= end) { + mid = (start + end) / 2; + if (rc_tab->scan[mid].scancode < scancode) + start = mid + 1; + else if (rc_tab->scan[mid].scancode > scancode) + end = mid - 1; + else + return mid; + } + + return -1U; +} + /** * ir_getkeycode() - get a keycode from the scancode->keycode table * @dev: the struct input_dev device descriptor @@ -224,36 +352,46 @@ static int ir_setkeytable(struct input_dev *dev, * This routine is used to handle evdev EVIOCGKEY ioctl. */ static int ir_getkeycode(struct input_dev *dev, - unsigned int scancode, unsigned int *keycode) + struct input_keymap_entry *ke) { - int start, end, mid; - unsigned long flags; - int key = KEY_RESERVED; struct ir_input_dev *ir_dev = input_get_drvdata(dev); struct ir_scancode_table *rc_tab = &ir_dev->rc_tab; + struct ir_scancode *entry; + unsigned long flags; + unsigned int index; + unsigned int scancode; + int retval; spin_lock_irqsave(&rc_tab->lock, flags); - start = 0; - end = rc_tab->len - 1; - while (start <= end) { - mid = (start + end) / 2; - if (rc_tab->scan[mid].scancode < scancode) - start = mid + 1; - else if (rc_tab->scan[mid].scancode > scancode) - end = mid - 1; - else { - key = rc_tab->scan[mid].keycode; - break; - } + + if (ke->flags & INPUT_KEYMAP_BY_INDEX) { + index = ke->index; + } else { + retval = input_scancode_to_scalar(ke, &scancode); + if (retval) + goto out; + + index = ir_lookup_by_scancode(rc_tab, scancode); + } + + if (index >= rc_tab->len) { + if (!(ke->flags & INPUT_KEYMAP_BY_INDEX)) + IR_dprintk(1, "unknown key for scancode 0x%04x\n", + scancode); + retval = -EINVAL; + goto out; } - spin_unlock_irqrestore(&rc_tab->lock, flags); - if (key == KEY_RESERVED) - IR_dprintk(1, "unknown key for scancode 0x%04x\n", - scancode); + entry = &rc_tab->scan[index]; - *keycode = key; - return 0; + ke->index = index; + ke->keycode = entry->keycode; + ke->len = sizeof(entry->scancode); + memcpy(ke->scancode, &entry->scancode, sizeof(entry->scancode)); + +out: + spin_unlock_irqrestore(&rc_tab->lock, flags); + return retval; } /** @@ -268,12 +406,24 @@ static int ir_getkeycode(struct input_dev *dev, */ u32 ir_g_keycode_from_table(struct input_dev *dev, u32 scancode) { - int keycode; + struct ir_input_dev *ir_dev = input_get_drvdata(dev); + struct ir_scancode_table *rc_tab = &ir_dev->rc_tab; + unsigned int keycode; + unsigned int index; + unsigned long flags; + + spin_lock_irqsave(&rc_tab->lock, flags); + + index = ir_lookup_by_scancode(rc_tab, scancode); + keycode = index < rc_tab->len ? + rc_tab->scan[index].keycode : KEY_RESERVED; + + spin_unlock_irqrestore(&rc_tab->lock, flags); - ir_getkeycode(dev, scancode, &keycode); if (keycode != KEY_RESERVED) IR_dprintk(1, "%s: scancode 0x%04x keycode 0x%02x\n", dev->name, scancode, keycode); + return keycode; } EXPORT_SYMBOL_GPL(ir_g_keycode_from_table); @@ -453,8 +603,8 @@ int __ir_input_register(struct input_dev *input_dev, goto out_dev; } - input_dev->getkeycode = ir_getkeycode; - input_dev->setkeycode = ir_setkeycode; + input_dev->getkeycode_new = ir_getkeycode; + input_dev->setkeycode_new = ir_setkeycode; input_set_drvdata(input_dev, ir_dev); ir_dev->input_dev = input_dev; @@ -462,12 +612,6 @@ int __ir_input_register(struct input_dev *input_dev, spin_lock_init(&ir_dev->keylock); setup_timer(&ir_dev->timer_keyup, ir_timer_keyup, (unsigned long)ir_dev); - ir_dev->rc_tab.name = rc_tab->name; - ir_dev->rc_tab.ir_type = rc_tab->ir_type; - ir_dev->rc_tab.alloc = roundup_pow_of_two(rc_tab->size * - sizeof(struct ir_scancode)); - ir_dev->rc_tab.scan = kmalloc(ir_dev->rc_tab.alloc, GFP_KERNEL); - ir_dev->rc_tab.size = ir_dev->rc_tab.alloc / sizeof(struct ir_scancode); if (props) { ir_dev->props = props; if (props->open) @@ -476,23 +620,14 @@ int __ir_input_register(struct input_dev *input_dev, input_dev->close = ir_close; } - if (!ir_dev->rc_tab.scan) { - rc = -ENOMEM; - goto out_name; - } - - IR_dprintk(1, "Allocated space for %u keycode entries (%u bytes)\n", - ir_dev->rc_tab.size, ir_dev->rc_tab.alloc); - set_bit(EV_KEY, input_dev->evbit); set_bit(EV_REP, input_dev->evbit); set_bit(EV_MSC, input_dev->evbit); set_bit(MSC_SCAN, input_dev->mscbit); - if (ir_setkeytable(input_dev, &ir_dev->rc_tab, rc_tab)) { - rc = -ENOMEM; - goto out_table; - } + rc = ir_setkeytable(ir_dev, rc_tab); + if (rc) + goto out_name; rc = ir_register_class(input_dev); if (rc < 0) @@ -515,7 +650,7 @@ int __ir_input_register(struct input_dev *input_dev, out_event: ir_unregister_class(input_dev); out_table: - kfree(ir_dev->rc_tab.scan); + ir_free_table(&ir_dev->rc_tab); out_name: kfree(ir_dev->driver_name); out_dev: @@ -533,7 +668,6 @@ EXPORT_SYMBOL_GPL(__ir_input_register); void ir_input_unregister(struct input_dev *input_dev) { struct ir_input_dev *ir_dev = input_get_drvdata(input_dev); - struct ir_scancode_table *rc_tab; if (!ir_dev) return; @@ -545,10 +679,7 @@ void ir_input_unregister(struct input_dev *input_dev) if (ir_dev->props->driver_type == RC_DRIVER_IR_RAW) ir_raw_event_unregister(input_dev); - rc_tab = &ir_dev->rc_tab; - rc_tab->size = 0; - kfree(rc_tab->scan); - rc_tab->scan = NULL; + ir_free_table(&ir_dev->rc_tab); ir_unregister_class(input_dev); diff --git a/include/media/rc-map.h b/include/media/rc-map.h index a9c041d49662..9b201ec8dc7f 100644 --- a/include/media/rc-map.h +++ b/include/media/rc-map.h @@ -35,7 +35,7 @@ struct ir_scancode_table { unsigned int len; /* Used number of entries */ unsigned int alloc; /* Size of *scan in bytes */ u64 ir_type; - char *name; + const char *name; spinlock_t lock; }; -- cgit v1.2.3 From db7829c6cc32f3c0c9a324118d743acb1abff081 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Thu, 9 Sep 2010 18:17:26 +0200 Subject: x86, percpu: Optimize this_cpu_ptr Allow arches to implement __this_cpu_ptr, and provide an x86 version. Before: movq $foo, %rax movq %gs:this_cpu_off, %rdx addq %rdx, %rax After: movq $foo, %rax addq %gs:this_cpu_off, %rax The benefit is doing it in one less instruction and not clobbering a temporary register. tj: * Beefed up the comment a bit and renamed in-macro temp variable to match neighboring macros. * Folded fix for const pointer case found in linux-next. * Fixed sparse notation. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/percpu.h | 14 ++++++++++++++ include/asm-generic/percpu.h | 9 +++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index cd28f9ad910d..f899e01a8ac9 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -47,6 +47,20 @@ #ifdef CONFIG_SMP #define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x #define __my_cpu_offset percpu_read(this_cpu_off) + +/* + * Compared to the generic __my_cpu_offset version, the following + * saves one instruction and avoids clobbering a temp register. + */ +#define __this_cpu_ptr(ptr) \ +({ \ + unsigned long tcp_ptr__; \ + __verify_pcpu_ptr(ptr); \ + asm volatile("add " __percpu_arg(1) ", %0" \ + : "=r" (tcp_ptr__) \ + : "m" (this_cpu_off), "0" (ptr)); \ + (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \ +}) #else #define __percpu_arg(x) "%P" #x #endif diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 08923b684768..ec643115bb56 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -60,9 +60,14 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #define __raw_get_cpu_var(var) \ (*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset)) -#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) +#ifndef __this_cpu_ptr #define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) - +#endif +#ifdef CONFIG_DEBUG_PREEMPT +#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) +#else +#define this_cpu_ptr(ptr) __this_cpu_ptr(ptr) +#endif #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA extern void setup_per_cpu_areas(void); -- cgit v1.2.3 From 677243d7494d09bfa782425f063a6013de53c35b Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Thu, 9 Sep 2010 18:17:26 +0200 Subject: percpu: Optimize __get_cpu_var() Redefine __get_cpu_var() using this_cpu_ptr() which can be arch-optimized. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- include/asm-generic/percpu.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index ec643115bb56..d17784ea37ff 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -55,10 +55,6 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; */ #define per_cpu(var, cpu) \ (*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu))) -#define __get_cpu_var(var) \ - (*SHIFT_PERCPU_PTR(&(var), my_cpu_offset)) -#define __raw_get_cpu_var(var) \ - (*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset)) #ifndef __this_cpu_ptr #define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) @@ -69,6 +65,9 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #define this_cpu_ptr(ptr) __this_cpu_ptr(ptr) #endif +#define __get_cpu_var(var) (*this_cpu_ptr(&(var))) +#define __raw_get_cpu_var(var) (*__this_cpu_ptr(&(var))) + #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA extern void setup_per_cpu_areas(void); #endif -- cgit v1.2.3 From 0da2f50944976e890ccc9436ab88c0da87788d02 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: ide: remove unnecessary blk_queue_flushing() test in do_ide_request() Unplugging from a request function doesn't really help much (it's already in the request_fn) and soon block layer will be updated to mix barrier sequence with other commands, so there's no need to treat queue flushing any differently. ide was the only user of blk_queue_flushing(). Remove it. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Acked-by: David S. Miller Signed-off-by: Jens Axboe --- drivers/ide/ide-io.c | 13 ------------- include/linux/blkdev.h | 1 - 2 files changed, 14 deletions(-) (limited to 'include') diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index a381be814070..999dac054bcc 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -441,19 +441,6 @@ void do_ide_request(struct request_queue *q) struct request *rq = NULL; ide_startstop_t startstop; - /* - * drive is doing pre-flush, ordered write, post-flush sequence. even - * though that is 3 requests, it must be seen as a single transaction. - * we must not preempt this drive until that is complete - */ - if (blk_queue_flushing(q)) - /* - * small race where queue could get replugged during - * the 3-request flush cycle, just yank the plug since - * we want it to finish asap - */ - blk_remove_plug(q); - spin_unlock_irq(q->queue_lock); /* HLD do_request() callback might sleep, make sure it's okay */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2c54906f678f..015375c7d031 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -521,7 +521,6 @@ enum { #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) -#define blk_queue_flushing(q) ((q)->ordseq) #define blk_queue_stackable(q) \ test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) -- cgit v1.2.3 From 6958f145459ca7ad9715024de97445addacb8510 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: kill QUEUE_ORDERED_BY_TAG Nobody is making meaningful use of ORDERED_BY_TAG now and queue draining for barrier requests will be removed soon which will render the advantage of tag ordering moot. Kill ORDERED_BY_TAG. The following users are affected. * brd: converted to ORDERED_DRAIN. * virtio_blk: ORDERED_TAG path was already marked deprecated. Removed. * xen-blkfront: ORDERED_TAG case dropped. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Cc: Nick Piggin Cc: Michael S. Tsirkin Cc: Jeremy Fitzhardinge Cc: Chris Wright Signed-off-by: Jens Axboe --- block/blk-barrier.c | 35 +++++++---------------------------- drivers/block/brd.c | 2 +- drivers/block/virtio_blk.c | 9 --------- drivers/block/xen-blkfront.c | 8 +++----- drivers/scsi/sd.c | 4 +--- include/linux/blkdev.h | 17 +---------------- 6 files changed, 13 insertions(+), 62 deletions(-) (limited to 'include') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index f0faefca032f..c807e9ca3a68 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -26,10 +26,7 @@ int blk_queue_ordered(struct request_queue *q, unsigned ordered) if (ordered != QUEUE_ORDERED_NONE && ordered != QUEUE_ORDERED_DRAIN && ordered != QUEUE_ORDERED_DRAIN_FLUSH && - ordered != QUEUE_ORDERED_DRAIN_FUA && - ordered != QUEUE_ORDERED_TAG && - ordered != QUEUE_ORDERED_TAG_FLUSH && - ordered != QUEUE_ORDERED_TAG_FUA) { + ordered != QUEUE_ORDERED_DRAIN_FUA) { printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); return -EINVAL; } @@ -155,21 +152,9 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) * For an empty barrier, there's no actual BAR request, which * in turn makes POSTFLUSH unnecessary. Mask them off. */ - if (!blk_rq_sectors(rq)) { + if (!blk_rq_sectors(rq)) q->ordered &= ~(QUEUE_ORDERED_DO_BAR | QUEUE_ORDERED_DO_POSTFLUSH); - /* - * Empty barrier on a write-through device w/ ordered - * tag has no command to issue and without any command - * to issue, ordering by tag can't be used. Drain - * instead. - */ - if ((q->ordered & QUEUE_ORDERED_BY_TAG) && - !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) { - q->ordered &= ~QUEUE_ORDERED_BY_TAG; - q->ordered |= QUEUE_ORDERED_BY_DRAIN; - } - } /* stash away the original request */ blk_dequeue_request(rq); @@ -210,7 +195,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) } else skip |= QUEUE_ORDSEQ_PREFLUSH; - if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q)) + if (queue_in_flight(q)) rq = NULL; else skip |= QUEUE_ORDSEQ_DRAIN; @@ -257,16 +242,10 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp) rq != &q->pre_flush_rq && rq != &q->post_flush_rq) return true; - if (q->ordered & QUEUE_ORDERED_BY_TAG) { - /* Ordered by tag. Blocking the next barrier is enough. */ - if (is_barrier && rq != &q->bar_rq) - *rqp = NULL; - } else { - /* Ordered by draining. Wait for turn. */ - WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); - if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) - *rqp = NULL; - } + /* Ordered by draining. Wait for turn. */ + WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); + if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) + *rqp = NULL; return true; } diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 1c7f63792ff8..47a41272d26b 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -482,7 +482,7 @@ static struct brd_device *brd_alloc(int i) if (!brd->brd_queue) goto out_free_dev; blk_queue_make_request(brd->brd_queue, brd_make_request); - blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_TAG); + blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_DRAIN); blk_queue_max_hw_sectors(brd->brd_queue, 1024); blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 2aafafca2b13..79652809eee8 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -395,15 +395,6 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) * to implement write barrier support. */ blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH); - } else if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER)) { - /* - * If the BARRIER feature is supported the host expects us - * to order request by tags. This implies there is not - * volatile write cache on the host, and that the host - * never re-orders outstanding I/O. This feature is not - * useful for real life scenarious and deprecated. - */ - blk_queue_ordered(q, QUEUE_ORDERED_TAG); } else { /* * If the FLUSH feature is not supported we must assume that diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index ac1b682edecb..50ec6f834996 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -424,8 +424,7 @@ static int xlvbd_barrier(struct blkfront_info *info) const char *barrier; switch (info->feature_barrier) { - case QUEUE_ORDERED_DRAIN: barrier = "enabled (drain)"; break; - case QUEUE_ORDERED_TAG: barrier = "enabled (tag)"; break; + case QUEUE_ORDERED_DRAIN: barrier = "enabled"; break; case QUEUE_ORDERED_NONE: barrier = "disabled"; break; default: return -EINVAL; } @@ -1078,8 +1077,7 @@ static void blkfront_connect(struct blkfront_info *info) * we're dealing with a very old backend which writes * synchronously; draining will do what needs to get done. * - * If there are barriers, then we can do full queued writes - * with tagged barriers. + * If there are barriers, then we use flush. * * If barriers are not supported, then there's no much we can * do, so just set ordering to NONE. @@ -1087,7 +1085,7 @@ static void blkfront_connect(struct blkfront_info *info) if (err) info->feature_barrier = QUEUE_ORDERED_DRAIN; else if (barrier) - info->feature_barrier = QUEUE_ORDERED_TAG; + info->feature_barrier = QUEUE_ORDERED_DRAIN_FLUSH; else info->feature_barrier = QUEUE_ORDERED_NONE; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 2714becc2eaf..cdfc51ab9cf2 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2151,9 +2151,7 @@ static int sd_revalidate_disk(struct gendisk *disk) /* * We now have all cache related info, determine how we deal - * with ordered requests. Note that as the current SCSI - * dispatch function can alter request order, we cannot use - * QUEUE_ORDERED_TAG_* even when ordered tag is supported. + * with ordered requests. */ if (sdkp->WCE) ordered = sdkp->DPOFUA diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 015375c7d031..7077bc0d6138 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -470,12 +470,7 @@ enum { * DRAIN : ordering by draining is enough * DRAIN_FLUSH : ordering by draining w/ pre and post flushes * DRAIN_FUA : ordering by draining w/ pre flush and FUA write - * TAG : ordering by tag is enough - * TAG_FLUSH : ordering by tag w/ pre and post flushes - * TAG_FUA : ordering by tag w/ pre flush and FUA write */ - QUEUE_ORDERED_BY_DRAIN = 0x01, - QUEUE_ORDERED_BY_TAG = 0x02, QUEUE_ORDERED_DO_PREFLUSH = 0x10, QUEUE_ORDERED_DO_BAR = 0x20, QUEUE_ORDERED_DO_POSTFLUSH = 0x40, @@ -483,8 +478,7 @@ enum { QUEUE_ORDERED_NONE = 0x00, - QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_BY_DRAIN | - QUEUE_ORDERED_DO_BAR, + QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_DO_BAR, QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | QUEUE_ORDERED_DO_PREFLUSH | QUEUE_ORDERED_DO_POSTFLUSH, @@ -492,15 +486,6 @@ enum { QUEUE_ORDERED_DO_PREFLUSH | QUEUE_ORDERED_DO_FUA, - QUEUE_ORDERED_TAG = QUEUE_ORDERED_BY_TAG | - QUEUE_ORDERED_DO_BAR, - QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_POSTFLUSH, - QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_FUA, - /* * Ordered operation sequence */ -- cgit v1.2.3 From 4913efe456c987057e5d36a3f0a55422a9072cae Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: deprecate barrier and replace blk_queue_ordered() with blk_queue_flush() Barrier is deemed too heavy and will soon be replaced by FLUSH/FUA requests. Deprecate barrier. All REQ_HARDBARRIERs are failed with -EOPNOTSUPP and blk_queue_ordered() is replaced with simpler blk_queue_flush(). blk_queue_flush() takes combinations of REQ_FLUSH and FUA. If a device has write cache and can flush it, it should set REQ_FLUSH. If the device can handle FUA writes, it should also set REQ_FUA. All blk_queue_ordered() users are converted. * ORDERED_DRAIN is mapped to 0 which is the default value. * ORDERED_DRAIN_FLUSH is mapped to REQ_FLUSH. * ORDERED_DRAIN_FLUSH_FUA is mapped to REQ_FLUSH | REQ_FUA. Signed-off-by: Tejun Heo Acked-by: Boaz Harrosh Cc: Christoph Hellwig Cc: Nick Piggin Cc: Michael S. Tsirkin Cc: Jeremy Fitzhardinge Cc: Chris Wright Cc: FUJITA Tomonori Cc: Geert Uytterhoeven Cc: David S. Miller Cc: Alasdair G Kergon Cc: Pierre Ossman Cc: Stefan Weinhuber Signed-off-by: Jens Axboe --- block/blk-barrier.c | 29 ----------------------------- block/blk-core.c | 6 ++++-- block/blk-settings.c | 20 ++++++++++++++++++++ drivers/block/brd.c | 1 - drivers/block/loop.c | 2 +- drivers/block/osdblk.c | 2 +- drivers/block/ps3disk.c | 2 +- drivers/block/virtio_blk.c | 25 +++++++++---------------- drivers/block/xen-blkfront.c | 43 ++++++++++++------------------------------- drivers/ide/ide-disk.c | 13 ++++++------- drivers/md/dm.c | 2 +- drivers/mmc/card/queue.c | 1 - drivers/s390/block/dasd.c | 1 - drivers/scsi/sd.c | 16 ++++++++-------- include/linux/blkdev.h | 6 ++++-- 15 files changed, 67 insertions(+), 102 deletions(-) (limited to 'include') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index c807e9ca3a68..ed0aba5463ab 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -9,35 +9,6 @@ #include "blk.h" -/** - * blk_queue_ordered - does this queue support ordered writes - * @q: the request queue - * @ordered: one of QUEUE_ORDERED_* - * - * Description: - * For journalled file systems, doing ordered writes on a commit - * block instead of explicitly doing wait_on_buffer (which is bad - * for performance) can be a big win. Block drivers supporting this - * feature should call this function and indicate so. - * - **/ -int blk_queue_ordered(struct request_queue *q, unsigned ordered) -{ - if (ordered != QUEUE_ORDERED_NONE && - ordered != QUEUE_ORDERED_DRAIN && - ordered != QUEUE_ORDERED_DRAIN_FLUSH && - ordered != QUEUE_ORDERED_DRAIN_FUA) { - printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); - return -EINVAL; - } - - q->ordered = ordered; - q->next_ordered = ordered; - - return 0; -} -EXPORT_SYMBOL(blk_queue_ordered); - /* * Cache flushing for ordered writes handling */ diff --git a/block/blk-core.c b/block/blk-core.c index ee1a1e7e63cc..f06354183b29 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1203,11 +1203,13 @@ static int __make_request(struct request_queue *q, struct bio *bio) const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK; int rw_flags; - if ((bio->bi_rw & REQ_HARDBARRIER) && - (q->next_ordered == QUEUE_ORDERED_NONE)) { + /* REQ_HARDBARRIER is no more */ + if (WARN_ONCE(bio->bi_rw & REQ_HARDBARRIER, + "block: HARDBARRIER is deprecated, use FLUSH/FUA instead\n")) { bio_endio(bio, -EOPNOTSUPP); return 0; } + /* * low level driver can indicate that it wants pages above a * certain limit bounced to low memory (ie for highmem, or even diff --git a/block/blk-settings.c b/block/blk-settings.c index a234f4bf1d6f..9b18afcfe925 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -794,6 +794,26 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask) } EXPORT_SYMBOL(blk_queue_update_dma_alignment); +/** + * blk_queue_flush - configure queue's cache flush capability + * @q: the request queue for the device + * @flush: 0, REQ_FLUSH or REQ_FLUSH | REQ_FUA + * + * Tell block layer cache flush capability of @q. If it supports + * flushing, REQ_FLUSH should be set. If it supports bypassing + * write cache for individual writes, REQ_FUA should be set. + */ +void blk_queue_flush(struct request_queue *q, unsigned int flush) +{ + WARN_ON_ONCE(flush & ~(REQ_FLUSH | REQ_FUA)); + + if (WARN_ON_ONCE(!(flush & REQ_FLUSH) && (flush & REQ_FUA))) + flush &= ~REQ_FUA; + + q->flush_flags = flush & (REQ_FLUSH | REQ_FUA); +} +EXPORT_SYMBOL_GPL(blk_queue_flush); + static int __init blk_settings_init(void) { blk_max_low_pfn = max_low_pfn - 1; diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 47a41272d26b..fa33f97722ba 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -482,7 +482,6 @@ static struct brd_device *brd_alloc(int i) if (!brd->brd_queue) goto out_free_dev; blk_queue_make_request(brd->brd_queue, brd_make_request); - blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_DRAIN); blk_queue_max_hw_sectors(brd->brd_queue, 1024); blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index c3a4a2e176da..953d1e12f4d4 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -832,7 +832,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, lo->lo_queue->unplug_fn = loop_unplug; if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) - blk_queue_ordered(lo->lo_queue, QUEUE_ORDERED_DRAIN_FLUSH); + blk_queue_flush(lo->lo_queue, REQ_FLUSH); set_capacity(lo->lo_disk, size); bd_set_size(bdev, size << 9); diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c index 2284b4f05c62..72d62462433d 100644 --- a/drivers/block/osdblk.c +++ b/drivers/block/osdblk.c @@ -439,7 +439,7 @@ static int osdblk_init_disk(struct osdblk_device *osdev) blk_queue_stack_limits(q, osd_request_queue(osdev->osd)); blk_queue_prep_rq(q, blk_queue_start_tag); - blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH); + blk_queue_flush(q, REQ_FLUSH); disk->queue = q; diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index e9da874d0419..4911f9e57bc7 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -468,7 +468,7 @@ static int __devinit ps3disk_probe(struct ps3_system_bus_device *_dev) blk_queue_dma_alignment(queue, dev->blk_size-1); blk_queue_logical_block_size(queue, dev->blk_size); - blk_queue_ordered(queue, QUEUE_ORDERED_DRAIN_FLUSH); + blk_queue_flush(queue, REQ_FLUSH); blk_queue_max_segments(queue, -1); blk_queue_max_segment_size(queue, dev->bounce_size); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 79652809eee8..d10b635b3946 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -388,22 +388,15 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) vblk->disk->driverfs_dev = &vdev->dev; index++; - if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) { - /* - * If the FLUSH feature is supported we do have support for - * flushing a volatile write cache on the host. Use that - * to implement write barrier support. - */ - blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH); - } else { - /* - * If the FLUSH feature is not supported we must assume that - * the host does not perform any kind of volatile write - * caching. We still need to drain the queue to provider - * proper barrier semantics. - */ - blk_queue_ordered(q, QUEUE_ORDERED_DRAIN); - } + /* + * If the FLUSH feature is supported we do have support for + * flushing a volatile write cache on the host. Use that to + * implement write barrier support; otherwise, we must assume + * that the host does not perform any kind of volatile write + * caching. + */ + if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) + blk_queue_flush(q, REQ_FLUSH); /* If disk is read-only in the host, the guest should obey */ if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 50ec6f834996..0b1eea643262 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -95,7 +95,7 @@ struct blkfront_info struct gnttab_free_callback callback; struct blk_shadow shadow[BLK_RING_SIZE]; unsigned long shadow_free; - int feature_barrier; + unsigned int feature_flush; int is_ready; }; @@ -418,25 +418,12 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) } -static int xlvbd_barrier(struct blkfront_info *info) +static void xlvbd_flush(struct blkfront_info *info) { - int err; - const char *barrier; - - switch (info->feature_barrier) { - case QUEUE_ORDERED_DRAIN: barrier = "enabled"; break; - case QUEUE_ORDERED_NONE: barrier = "disabled"; break; - default: return -EINVAL; - } - - err = blk_queue_ordered(info->rq, info->feature_barrier); - - if (err) - return err; - + blk_queue_flush(info->rq, info->feature_flush); printk(KERN_INFO "blkfront: %s: barriers %s\n", - info->gd->disk_name, barrier); - return 0; + info->gd->disk_name, + info->feature_flush ? "enabled" : "disabled"); } @@ -515,7 +502,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, info->rq = gd->queue; info->gd = gd; - xlvbd_barrier(info); + xlvbd_flush(info); if (vdisk_info & VDISK_READONLY) set_disk_ro(gd, 1); @@ -661,8 +648,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) printk(KERN_WARNING "blkfront: %s: write barrier op failed\n", info->gd->disk_name); error = -EOPNOTSUPP; - info->feature_barrier = QUEUE_ORDERED_NONE; - xlvbd_barrier(info); + info->feature_flush = 0; + xlvbd_flush(info); } /* fall through */ case BLKIF_OP_READ: @@ -1075,19 +1062,13 @@ static void blkfront_connect(struct blkfront_info *info) /* * If there's no "feature-barrier" defined, then it means * we're dealing with a very old backend which writes - * synchronously; draining will do what needs to get done. + * synchronously; nothing to do. * * If there are barriers, then we use flush. - * - * If barriers are not supported, then there's no much we can - * do, so just set ordering to NONE. */ - if (err) - info->feature_barrier = QUEUE_ORDERED_DRAIN; - else if (barrier) - info->feature_barrier = QUEUE_ORDERED_DRAIN_FLUSH; - else - info->feature_barrier = QUEUE_ORDERED_NONE; + info->feature_flush = 0; + if (!err && barrier) + info->feature_flush = REQ_FLUSH; err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); if (err) { diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 7433e07de30e..7c5b01ce51d2 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -516,10 +516,10 @@ static int ide_do_setfeature(ide_drive_t *drive, u8 feature, u8 nsect) return ide_no_data_taskfile(drive, &cmd); } -static void update_ordered(ide_drive_t *drive) +static void update_flush(ide_drive_t *drive) { u16 *id = drive->id; - unsigned ordered = QUEUE_ORDERED_NONE; + unsigned flush = 0; if (drive->dev_flags & IDE_DFLAG_WCACHE) { unsigned long long capacity; @@ -543,13 +543,12 @@ static void update_ordered(ide_drive_t *drive) drive->name, barrier ? "" : "not "); if (barrier) { - ordered = QUEUE_ORDERED_DRAIN_FLUSH; + flush = REQ_FLUSH; blk_queue_prep_rq(drive->queue, idedisk_prep_fn); } - } else - ordered = QUEUE_ORDERED_DRAIN; + } - blk_queue_ordered(drive->queue, ordered); + blk_queue_flush(drive->queue, flush); } ide_devset_get_flag(wcache, IDE_DFLAG_WCACHE); @@ -572,7 +571,7 @@ static int set_wcache(ide_drive_t *drive, int arg) } } - update_ordered(drive); + update_flush(drive); return err; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ac384b2a6a33..b1d92be8f990 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2245,7 +2245,7 @@ static int dm_init_request_based_queue(struct mapped_device *md) blk_queue_softirq_done(md->queue, dm_softirq_done); blk_queue_prep_rq(md->queue, dm_prep_fn); blk_queue_lld_busy(md->queue, dm_lld_busy); - blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH); + blk_queue_flush(md->queue, REQ_FLUSH); elv_register_queue(md->queue); diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c index e876678176be..9c0b42bfe089 100644 --- a/drivers/mmc/card/queue.c +++ b/drivers/mmc/card/queue.c @@ -128,7 +128,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock mq->req = NULL; blk_queue_prep_rq(mq->queue, mmc_prep_request); - blk_queue_ordered(mq->queue, QUEUE_ORDERED_DRAIN); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue); if (mmc_can_erase(card)) { queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mq->queue); diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 8373ca0de8e0..9b106d83b0cd 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2197,7 +2197,6 @@ static void dasd_setup_queue(struct dasd_block *block) */ blk_queue_max_segment_size(block->request_queue, PAGE_SIZE); blk_queue_segment_boundary(block->request_queue, PAGE_SIZE - 1); - blk_queue_ordered(block->request_queue, QUEUE_ORDERED_DRAIN); } /* diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index cdfc51ab9cf2..63bd01ae534f 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2109,7 +2109,7 @@ static int sd_revalidate_disk(struct gendisk *disk) struct scsi_disk *sdkp = scsi_disk(disk); struct scsi_device *sdp = sdkp->device; unsigned char *buffer; - unsigned ordered; + unsigned flush = 0; SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_revalidate_disk\n")); @@ -2151,15 +2151,15 @@ static int sd_revalidate_disk(struct gendisk *disk) /* * We now have all cache related info, determine how we deal - * with ordered requests. + * with flush requests. */ - if (sdkp->WCE) - ordered = sdkp->DPOFUA - ? QUEUE_ORDERED_DRAIN_FUA : QUEUE_ORDERED_DRAIN_FLUSH; - else - ordered = QUEUE_ORDERED_DRAIN; + if (sdkp->WCE) { + flush |= REQ_FLUSH; + if (sdkp->DPOFUA) + flush |= REQ_FUA; + } - blk_queue_ordered(sdkp->disk->queue, ordered); + blk_queue_flush(sdkp->disk->queue, flush); set_capacity(disk, sdkp->capacity); kfree(buffer); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7077bc0d6138..e97911d4dec3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -355,8 +355,10 @@ struct request_queue struct blk_trace *blk_trace; #endif /* - * reserved for flush operations + * for flush operations */ + unsigned int flush_flags; + unsigned int ordered, next_ordered, ordseq; int orderr, ordcolor; struct request pre_flush_rq, bar_rq, post_flush_rq; @@ -865,8 +867,8 @@ extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); +extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -extern int blk_queue_ordered(struct request_queue *, unsigned); extern bool blk_do_ordered(struct request_queue *, struct request **); extern unsigned blk_ordered_cur_seq(struct request_queue *); extern unsigned blk_ordered_req_seq(struct request *); -- cgit v1.2.3 From 9cbbdca44ae1a6f512ea1e2be11ced8bbb9d430a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: remove spurious uses of REQ_HARDBARRIER REQ_HARDBARRIER is deprecated. Remove spurious uses in the following users. Please note that other than osdblk, all other uses were already spurious before deprecation. * osdblk: osdblk_rq_fn() won't receive any request with REQ_HARDBARRIER set. Remove the test for it. * pktcdvd: use of REQ_HARDBARRIER in pkt_generic_packet() doesn't mean anything. Removed. * aic7xxx_old: Setting MSG_ORDERED_Q_TAG on REQ_HARDBARRIER is spurious. Removed. * sas_scsi_host: Setting TASK_ATTR_ORDERED on REQ_HARDBARRIER is spurious. Removed. * scsi_tcq: The ordered tag path wasn't being used anyway. Removed. Signed-off-by: Tejun Heo Acked-by: Boaz Harrosh Cc: James Bottomley Cc: Peter Osterlund Signed-off-by: Jens Axboe --- drivers/block/osdblk.c | 3 +-- drivers/block/pktcdvd.c | 1 - drivers/scsi/aic7xxx_old.c | 21 ++------------------- drivers/scsi/libsas/sas_scsi_host.c | 13 +------------ include/scsi/scsi_tcq.h | 6 +----- 5 files changed, 5 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c index 72d62462433d..87311ebac0db 100644 --- a/drivers/block/osdblk.c +++ b/drivers/block/osdblk.c @@ -310,8 +310,7 @@ static void osdblk_rq_fn(struct request_queue *q) break; /* filter out block requests we don't understand */ - if (rq->cmd_type != REQ_TYPE_FS && - !(rq->cmd_flags & REQ_HARDBARRIER)) { + if (rq->cmd_type != REQ_TYPE_FS) { blk_end_request_all(rq, 0); continue; } diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index b1cbeb59bb76..0166ea136045 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -753,7 +753,6 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * rq->timeout = 60*HZ; rq->cmd_type = REQ_TYPE_BLOCK_PC; - rq->cmd_flags |= REQ_HARDBARRIER; if (cgc->quiet) rq->cmd_flags |= REQ_QUIET; diff --git a/drivers/scsi/aic7xxx_old.c b/drivers/scsi/aic7xxx_old.c index 93984c9dfe14..e1cd6062776c 100644 --- a/drivers/scsi/aic7xxx_old.c +++ b/drivers/scsi/aic7xxx_old.c @@ -2850,12 +2850,6 @@ aic7xxx_done(struct aic7xxx_host *p, struct aic7xxx_scb *scb) aic_dev->r_total++; ptr = aic_dev->r_bins; } - if(cmd->device->simple_tags && cmd->request->cmd_flags & REQ_HARDBARRIER) - { - aic_dev->barrier_total++; - if(scb->tag_action == MSG_ORDERED_Q_TAG) - aic_dev->ordered_total++; - } x = scb->sg_length; x >>= 10; for(i=0; i<6; i++) @@ -10144,19 +10138,8 @@ static void aic7xxx_buildscb(struct aic7xxx_host *p, struct scsi_cmnd *cmd, /* We always force TEST_UNIT_READY to untagged */ if (cmd->cmnd[0] != TEST_UNIT_READY && sdptr->simple_tags) { - if (req->cmd_flags & REQ_HARDBARRIER) - { - if(sdptr->ordered_tags) - { - hscb->control |= MSG_ORDERED_Q_TAG; - scb->tag_action = MSG_ORDERED_Q_TAG; - } - } - else - { - hscb->control |= MSG_SIMPLE_Q_TAG; - scb->tag_action = MSG_SIMPLE_Q_TAG; - } + hscb->control |= MSG_SIMPLE_Q_TAG; + scb->tag_action = MSG_SIMPLE_Q_TAG; } } if ( !(aic_dev->dtr_pending) && diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index f0cfba9a1fc8..535085cd27ec 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -130,17 +130,6 @@ static void sas_scsi_task_done(struct sas_task *task) sc->scsi_done(sc); } -static enum task_attribute sas_scsi_get_task_attr(struct scsi_cmnd *cmd) -{ - enum task_attribute ta = TASK_ATTR_SIMPLE; - if (cmd->request && blk_rq_tagged(cmd->request)) { - if (cmd->device->ordered_tags && - (cmd->request->cmd_flags & REQ_HARDBARRIER)) - ta = TASK_ATTR_ORDERED; - } - return ta; -} - static struct sas_task *sas_create_task(struct scsi_cmnd *cmd, struct domain_device *dev, gfp_t gfp_flags) @@ -160,7 +149,7 @@ static struct sas_task *sas_create_task(struct scsi_cmnd *cmd, task->ssp_task.retry_count = 1; int_to_scsilun(cmd->device->lun, &lun); memcpy(task->ssp_task.LUN, &lun.scsi_lun, 8); - task->ssp_task.task_attr = sas_scsi_get_task_attr(cmd); + task->ssp_task.task_attr = TASK_ATTR_SIMPLE; memcpy(task->ssp_task.cdb, cmd->cmnd, 16); task->scatter = scsi_sglist(cmd); diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h index 17231385cb37..d6e7994aa634 100644 --- a/include/scsi/scsi_tcq.h +++ b/include/scsi/scsi_tcq.h @@ -97,13 +97,9 @@ static inline void scsi_deactivate_tcq(struct scsi_device *sdev, int depth) static inline int scsi_populate_tag_msg(struct scsi_cmnd *cmd, char *msg) { struct request *req = cmd->request; - struct scsi_device *sdev = cmd->device; if (blk_rq_tagged(req)) { - if (sdev->ordered_tags && req->cmd_flags & REQ_HARDBARRIER) - *msg++ = MSG_ORDERED_TAG; - else - *msg++ = MSG_SIMPLE_TAG; + *msg++ = MSG_SIMPLE_TAG; *msg++ = req->tag; return 2; } -- cgit v1.2.3 From dd831006d5be7f74c3fe7aef82380c51c3637960 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: misc cleanups in barrier code Make the following cleanups in preparation of barrier/flush update. * blk_do_ordered() declaration is moved from include/linux/blkdev.h to block/blk.h. * blk_do_ordered() now returns pointer to struct request, with %NULL meaning "try the next request" and ERR_PTR(-EAGAIN) "try again later". The third case will be dropped with further changes. * In the initialization of proxy barrier request, data direction is already set by init_request_from_bio(). Drop unnecessary explicit REQ_WRITE setting and move init_request_from_bio() above REQ_FUA flag setting. * add_request() is collapsed into __make_request(). These changes don't make any functional difference. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-barrier.c | 32 ++++++++++++++------------------ block/blk-core.c | 21 ++++----------------- block/blk.h | 7 +++++-- include/linux/blkdev.h | 1 - 4 files changed, 23 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index ed0aba5463ab..f1be85ba2bb5 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -110,9 +110,9 @@ static void queue_flush(struct request_queue *q, unsigned which) elv_insert(q, rq, ELEVATOR_INSERT_FRONT); } -static inline bool start_ordered(struct request_queue *q, struct request **rqp) +static inline struct request *start_ordered(struct request_queue *q, + struct request *rq) { - struct request *rq = *rqp; unsigned skip = 0; q->orderr = 0; @@ -149,11 +149,9 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) /* initialize proxy request and queue it */ blk_rq_init(q, rq); - if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) - rq->cmd_flags |= REQ_WRITE; + init_request_from_bio(rq, q->orig_bar_rq->bio); if (q->ordered & QUEUE_ORDERED_DO_FUA) rq->cmd_flags |= REQ_FUA; - init_request_from_bio(rq, q->orig_bar_rq->bio); rq->end_io = bar_end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); @@ -171,27 +169,26 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) else skip |= QUEUE_ORDSEQ_DRAIN; - *rqp = rq; - /* * Complete skipped sequences. If whole sequence is complete, - * return false to tell elevator that this request is gone. + * return %NULL to tell elevator that this request is gone. */ - return !blk_ordered_complete_seq(q, skip, 0); + if (blk_ordered_complete_seq(q, skip, 0)) + rq = NULL; + return rq; } -bool blk_do_ordered(struct request_queue *q, struct request **rqp) +struct request *blk_do_ordered(struct request_queue *q, struct request *rq) { - struct request *rq = *rqp; const int is_barrier = rq->cmd_type == REQ_TYPE_FS && (rq->cmd_flags & REQ_HARDBARRIER); if (!q->ordseq) { if (!is_barrier) - return true; + return rq; if (q->next_ordered != QUEUE_ORDERED_NONE) - return start_ordered(q, rqp); + return start_ordered(q, rq); else { /* * Queue ordering not supported. Terminate @@ -199,8 +196,7 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp) */ blk_dequeue_request(rq); __blk_end_request_all(rq, -EOPNOTSUPP); - *rqp = NULL; - return false; + return NULL; } } @@ -211,14 +207,14 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp) /* Special requests are not subject to ordering rules. */ if (rq->cmd_type != REQ_TYPE_FS && rq != &q->pre_flush_rq && rq != &q->post_flush_rq) - return true; + return rq; /* Ordered by draining. Wait for turn. */ WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) - *rqp = NULL; + rq = ERR_PTR(-EAGAIN); - return true; + return rq; } static void bio_end_empty_barrier(struct bio *bio, int err) diff --git a/block/blk-core.c b/block/blk-core.c index f06354183b29..f8d37a8e2c55 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1037,22 +1037,6 @@ void blk_insert_request(struct request_queue *q, struct request *rq, } EXPORT_SYMBOL(blk_insert_request); -/* - * add-request adds a request to the linked list. - * queue lock is held and interrupts disabled, as we muck with the - * request queue list. - */ -static inline void add_request(struct request_queue *q, struct request *req) -{ - drive_stat_acct(req, 1); - - /* - * elevator indicated where it wants this request to be - * inserted at elevator_merge time - */ - __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); -} - static void part_round_stats_single(int cpu, struct hd_struct *part, unsigned long now) { @@ -1316,7 +1300,10 @@ get_rq: req->cpu = blk_cpu_to_group(smp_processor_id()); if (queue_should_plug(q) && elv_queue_empty(q)) blk_plug_device(q); - add_request(q, req); + + /* insert the request into the elevator */ + drive_stat_acct(req, 1); + __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); out: if (unplug || !queue_should_plug(q)) __generic_unplug_device(q); diff --git a/block/blk.h b/block/blk.h index 6e7dc87141e4..874eb4ea8093 100644 --- a/block/blk.h +++ b/block/blk.h @@ -51,6 +51,8 @@ static inline void blk_clear_rq_complete(struct request *rq) */ #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) +struct request *blk_do_ordered(struct request_queue *q, struct request *rq); + static inline struct request *__elv_next_request(struct request_queue *q) { struct request *rq; @@ -58,8 +60,9 @@ static inline struct request *__elv_next_request(struct request_queue *q) while (1) { while (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); - if (blk_do_ordered(q, &rq)) - return rq; + rq = blk_do_ordered(q, rq); + if (rq) + return !IS_ERR(rq) ? rq : NULL; } if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e97911d4dec3..996549d71923 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -869,7 +869,6 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -extern bool blk_do_ordered(struct request_queue *, struct request **); extern unsigned blk_ordered_cur_seq(struct request_queue *); extern unsigned blk_ordered_req_seq(struct request *); extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int); -- cgit v1.2.3 From 28e7d1845216538303bb95d679d8fd4de50e2f1a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: drop barrier ordering by queue draining Filesystems will take all the responsibilities for ordering requests around commit writes and will only indicate how the commit writes themselves should be handled by block layers. This patch drops barrier ordering by queue draining from block layer. Ordering by draining implementation was somewhat invasive to request handling. List of notable changes follow. * Each queue has 1 bit color which is flipped on each barrier issue. This is used to track whether a given request is issued before the current barrier or not. REQ_ORDERED_COLOR flag and coloring implementation in __elv_add_request() are removed. * Requests which shouldn't be processed yet for draining were stalled by returning -EAGAIN from blk_do_ordered() according to the test result between blk_ordered_req_seq() and blk_blk_ordered_cur_seq(). This logic is removed. * Draining completion logic in elv_completed_request() removed. * All barrier sequence requests were queued to request queue and then trckled to lower layer according to progress and thus maintaining request orders during requeue was necessary. This is replaced by queueing the next request in the barrier sequence only after the current one is complete from blk_ordered_complete_seq(), which removes the need for multiple proxy requests in struct request_queue and the request sorting logic in the ELEVATOR_INSERT_REQUEUE path of elv_insert(). * As barriers no longer have ordering constraints, there's no need to dump the whole elevator onto the dispatch queue on each barrier. Insert barriers at the front instead. * If other barrier requests come to the front of the dispatch queue while one is already in progress, they are stored in q->pending_barriers and restored to dispatch queue one-by-one after each barrier completion from blk_ordered_complete_seq(). Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-barrier.c | 220 ++++++++++++++++++---------------------------- block/blk-core.c | 11 ++- block/blk.h | 2 +- block/elevator.c | 79 ++--------------- include/linux/blk_types.h | 2 - include/linux/blkdev.h | 19 ++-- 6 files changed, 113 insertions(+), 220 deletions(-) (limited to 'include') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index f1be85ba2bb5..e8b2e5c091b1 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -9,6 +9,8 @@ #include "blk.h" +static struct request *queue_next_ordseq(struct request_queue *q); + /* * Cache flushing for ordered writes handling */ @@ -19,38 +21,10 @@ unsigned blk_ordered_cur_seq(struct request_queue *q) return 1 << ffz(q->ordseq); } -unsigned blk_ordered_req_seq(struct request *rq) -{ - struct request_queue *q = rq->q; - - BUG_ON(q->ordseq == 0); - - if (rq == &q->pre_flush_rq) - return QUEUE_ORDSEQ_PREFLUSH; - if (rq == &q->bar_rq) - return QUEUE_ORDSEQ_BAR; - if (rq == &q->post_flush_rq) - return QUEUE_ORDSEQ_POSTFLUSH; - - /* - * !fs requests don't need to follow barrier ordering. Always - * put them at the front. This fixes the following deadlock. - * - * http://thread.gmane.org/gmane.linux.kernel/537473 - */ - if (rq->cmd_type != REQ_TYPE_FS) - return QUEUE_ORDSEQ_DRAIN; - - if ((rq->cmd_flags & REQ_ORDERED_COLOR) == - (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) - return QUEUE_ORDSEQ_DRAIN; - else - return QUEUE_ORDSEQ_DONE; -} - -bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) +static struct request *blk_ordered_complete_seq(struct request_queue *q, + unsigned seq, int error) { - struct request *rq; + struct request *next_rq = NULL; if (error && !q->orderr) q->orderr = error; @@ -58,16 +32,22 @@ bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) BUG_ON(q->ordseq & seq); q->ordseq |= seq; - if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) - return false; - - /* - * Okay, sequence complete. - */ - q->ordseq = 0; - rq = q->orig_bar_rq; - __blk_end_request_all(rq, q->orderr); - return true; + if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) { + /* not complete yet, queue the next ordered sequence */ + next_rq = queue_next_ordseq(q); + } else { + /* complete this barrier request */ + __blk_end_request_all(q->orig_bar_rq, q->orderr); + q->orig_bar_rq = NULL; + q->ordseq = 0; + + /* dispatch the next barrier if there's one */ + if (!list_empty(&q->pending_barriers)) { + next_rq = list_entry_rq(q->pending_barriers.next); + list_move(&next_rq->queuelist, &q->queue_head); + } + } + return next_rq; } static void pre_flush_end_io(struct request *rq, int error) @@ -88,133 +68,105 @@ static void post_flush_end_io(struct request *rq, int error) blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); } -static void queue_flush(struct request_queue *q, unsigned which) +static void queue_flush(struct request_queue *q, struct request *rq, + rq_end_io_fn *end_io) { - struct request *rq; - rq_end_io_fn *end_io; - - if (which == QUEUE_ORDERED_DO_PREFLUSH) { - rq = &q->pre_flush_rq; - end_io = pre_flush_end_io; - } else { - rq = &q->post_flush_rq; - end_io = post_flush_end_io; - } - blk_rq_init(q, rq); rq->cmd_type = REQ_TYPE_FS; - rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH; + rq->cmd_flags = REQ_FLUSH; rq->rq_disk = q->orig_bar_rq->rq_disk; rq->end_io = end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); } -static inline struct request *start_ordered(struct request_queue *q, - struct request *rq) +static struct request *queue_next_ordseq(struct request_queue *q) { - unsigned skip = 0; - - q->orderr = 0; - q->ordered = q->next_ordered; - q->ordseq |= QUEUE_ORDSEQ_STARTED; - - /* - * For an empty barrier, there's no actual BAR request, which - * in turn makes POSTFLUSH unnecessary. Mask them off. - */ - if (!blk_rq_sectors(rq)) - q->ordered &= ~(QUEUE_ORDERED_DO_BAR | - QUEUE_ORDERED_DO_POSTFLUSH); - - /* stash away the original request */ - blk_dequeue_request(rq); - q->orig_bar_rq = rq; - rq = NULL; - - /* - * Queue ordered sequence. As we stack them at the head, we - * need to queue in reverse order. Note that we rely on that - * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs - * request gets inbetween ordered sequence. - */ - if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) { - queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH); - rq = &q->post_flush_rq; - } else - skip |= QUEUE_ORDSEQ_POSTFLUSH; + struct request *rq = &q->bar_rq; - if (q->ordered & QUEUE_ORDERED_DO_BAR) { - rq = &q->bar_rq; + switch (blk_ordered_cur_seq(q)) { + case QUEUE_ORDSEQ_PREFLUSH: + queue_flush(q, rq, pre_flush_end_io); + break; + case QUEUE_ORDSEQ_BAR: /* initialize proxy request and queue it */ blk_rq_init(q, rq); init_request_from_bio(rq, q->orig_bar_rq->bio); + rq->cmd_flags &= ~REQ_HARDBARRIER; if (q->ordered & QUEUE_ORDERED_DO_FUA) rq->cmd_flags |= REQ_FUA; rq->end_io = bar_end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); - } else - skip |= QUEUE_ORDSEQ_BAR; + break; - if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) { - queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH); - rq = &q->pre_flush_rq; - } else - skip |= QUEUE_ORDSEQ_PREFLUSH; + case QUEUE_ORDSEQ_POSTFLUSH: + queue_flush(q, rq, post_flush_end_io); + break; - if (queue_in_flight(q)) - rq = NULL; - else - skip |= QUEUE_ORDSEQ_DRAIN; - - /* - * Complete skipped sequences. If whole sequence is complete, - * return %NULL to tell elevator that this request is gone. - */ - if (blk_ordered_complete_seq(q, skip, 0)) - rq = NULL; + default: + BUG(); + } return rq; } struct request *blk_do_ordered(struct request_queue *q, struct request *rq) { - const int is_barrier = rq->cmd_type == REQ_TYPE_FS && - (rq->cmd_flags & REQ_HARDBARRIER); - - if (!q->ordseq) { - if (!is_barrier) - return rq; - - if (q->next_ordered != QUEUE_ORDERED_NONE) - return start_ordered(q, rq); - else { - /* - * Queue ordering not supported. Terminate - * with prejudice. - */ - blk_dequeue_request(rq); - __blk_end_request_all(rq, -EOPNOTSUPP); - return NULL; - } + unsigned skip = 0; + + if (!(rq->cmd_flags & REQ_HARDBARRIER)) + return rq; + + if (q->ordseq) { + /* + * Barrier is already in progress and they can't be + * processed in parallel. Queue for later processing. + */ + list_move_tail(&rq->queuelist, &q->pending_barriers); + return NULL; + } + + if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) { + /* + * Queue ordering not supported. Terminate + * with prejudice. + */ + blk_dequeue_request(rq); + __blk_end_request_all(rq, -EOPNOTSUPP); + return NULL; } /* - * Ordered sequence in progress + * Start a new ordered sequence */ + q->orderr = 0; + q->ordered = q->next_ordered; + q->ordseq |= QUEUE_ORDSEQ_STARTED; - /* Special requests are not subject to ordering rules. */ - if (rq->cmd_type != REQ_TYPE_FS && - rq != &q->pre_flush_rq && rq != &q->post_flush_rq) - return rq; + /* + * For an empty barrier, there's no actual BAR request, which + * in turn makes POSTFLUSH unnecessary. Mask them off. + */ + if (!blk_rq_sectors(rq)) + q->ordered &= ~(QUEUE_ORDERED_DO_BAR | + QUEUE_ORDERED_DO_POSTFLUSH); - /* Ordered by draining. Wait for turn. */ - WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); - if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) - rq = ERR_PTR(-EAGAIN); + /* stash away the original request */ + blk_dequeue_request(rq); + q->orig_bar_rq = rq; - return rq; + if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) + skip |= QUEUE_ORDSEQ_PREFLUSH; + + if (!(q->ordered & QUEUE_ORDERED_DO_BAR)) + skip |= QUEUE_ORDSEQ_BAR; + + if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH)) + skip |= QUEUE_ORDSEQ_POSTFLUSH; + + /* complete skipped sequences and return the first sequence */ + return blk_ordered_complete_seq(q, skip, 0); } static void bio_end_empty_barrier(struct bio *bio, int err) diff --git a/block/blk-core.c b/block/blk-core.c index f8d37a8e2c55..d316662682c8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -520,6 +520,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) init_timer(&q->unplug_timer); setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); INIT_LIST_HEAD(&q->timeout_list); + INIT_LIST_HEAD(&q->pending_barriers); INIT_WORK(&q->unplug_work, blk_unplug_work); kobject_init(&q->kobj, &blk_queue_ktype); @@ -1185,6 +1186,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) const bool sync = (bio->bi_rw & REQ_SYNC); const bool unplug = (bio->bi_rw & REQ_UNPLUG); const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK; + int where = ELEVATOR_INSERT_SORT; int rw_flags; /* REQ_HARDBARRIER is no more */ @@ -1203,7 +1205,12 @@ static int __make_request(struct request_queue *q, struct bio *bio) spin_lock_irq(q->queue_lock); - if (unlikely((bio->bi_rw & REQ_HARDBARRIER)) || elv_queue_empty(q)) + if (bio->bi_rw & REQ_HARDBARRIER) { + where = ELEVATOR_INSERT_FRONT; + goto get_rq; + } + + if (elv_queue_empty(q)) goto get_rq; el_ret = elv_merge(q, &req, bio); @@ -1303,7 +1310,7 @@ get_rq: /* insert the request into the elevator */ drive_stat_acct(req, 1); - __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); + __elv_add_request(q, req, where, 0); out: if (unplug || !queue_should_plug(q)) __generic_unplug_device(q); diff --git a/block/blk.h b/block/blk.h index 874eb4ea8093..08081e4b294e 100644 --- a/block/blk.h +++ b/block/blk.h @@ -62,7 +62,7 @@ static inline struct request *__elv_next_request(struct request_queue *q) rq = list_entry_rq(q->queue_head.next); rq = blk_do_ordered(q, rq); if (rq) - return !IS_ERR(rq) ? rq : NULL; + return rq; } if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) diff --git a/block/elevator.c b/block/elevator.c index ec585c9554d3..241c69c45c5f 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -617,8 +617,6 @@ void elv_quiesce_end(struct request_queue *q) void elv_insert(struct request_queue *q, struct request *rq, int where) { - struct list_head *pos; - unsigned ordseq; int unplug_it = 1; trace_block_rq_insert(q, rq); @@ -626,9 +624,16 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) rq->q = q; switch (where) { + case ELEVATOR_INSERT_REQUEUE: + /* + * Most requeues happen because of a busy condition, + * don't force unplug of the queue for that case. + * Clear unplug_it and fall through. + */ + unplug_it = 0; + case ELEVATOR_INSERT_FRONT: rq->cmd_flags |= REQ_SOFTBARRIER; - list_add(&rq->queuelist, &q->queue_head); break; @@ -668,36 +673,6 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) q->elevator->ops->elevator_add_req_fn(q, rq); break; - case ELEVATOR_INSERT_REQUEUE: - /* - * If ordered flush isn't in progress, we do front - * insertion; otherwise, requests should be requeued - * in ordseq order. - */ - rq->cmd_flags |= REQ_SOFTBARRIER; - - /* - * Most requeues happen because of a busy condition, - * don't force unplug of the queue for that case. - */ - unplug_it = 0; - - if (q->ordseq == 0) { - list_add(&rq->queuelist, &q->queue_head); - break; - } - - ordseq = blk_ordered_req_seq(rq); - - list_for_each(pos, &q->queue_head) { - struct request *pos_rq = list_entry_rq(pos); - if (ordseq <= blk_ordered_req_seq(pos_rq)) - break; - } - - list_add_tail(&rq->queuelist, pos); - break; - default: printk(KERN_ERR "%s: bad insertion point %d\n", __func__, where); @@ -716,26 +691,8 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) void __elv_add_request(struct request_queue *q, struct request *rq, int where, int plug) { - if (q->ordcolor) - rq->cmd_flags |= REQ_ORDERED_COLOR; - if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { - /* - * toggle ordered color - */ - if (rq->cmd_flags & REQ_HARDBARRIER) - q->ordcolor ^= 1; - - /* - * barriers implicitly indicate back insertion - */ - if (where == ELEVATOR_INSERT_SORT) - where = ELEVATOR_INSERT_BACK; - - /* - * this request is scheduling boundary, update - * end_sector - */ + /* barriers are scheduling boundary, update end_sector */ if (rq->cmd_type == REQ_TYPE_FS || (rq->cmd_flags & REQ_DISCARD)) { q->end_sector = rq_end_sector(rq); @@ -855,24 +812,6 @@ void elv_completed_request(struct request_queue *q, struct request *rq) e->ops->elevator_completed_req_fn) e->ops->elevator_completed_req_fn(q, rq); } - - /* - * Check if the queue is waiting for fs requests to be - * drained for flush sequence. - */ - if (unlikely(q->ordseq)) { - struct request *next = NULL; - - if (!list_empty(&q->queue_head)) - next = list_entry_rq(q->queue_head.next); - - if (!queue_in_flight(q) && - blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && - (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) { - blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); - __blk_run_queue(q); - } - } } #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index ca83a97c9715..9192282b4259 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -143,7 +143,6 @@ enum rq_flag_bits { __REQ_FAILED, /* set if the request failed */ __REQ_QUIET, /* don't worry about errors */ __REQ_PREEMPT, /* set for "ide_preempt" requests */ - __REQ_ORDERED_COLOR, /* is before or after barrier */ __REQ_ALLOCED, /* request came from our alloc pool */ __REQ_COPY_USER, /* contains copies of user pages */ __REQ_INTEGRITY, /* integrity metadata has been remapped */ @@ -184,7 +183,6 @@ enum rq_flag_bits { #define REQ_FAILED (1 << __REQ_FAILED) #define REQ_QUIET (1 << __REQ_QUIET) #define REQ_PREEMPT (1 << __REQ_PREEMPT) -#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) #define REQ_ALLOCED (1 << __REQ_ALLOCED) #define REQ_COPY_USER (1 << __REQ_COPY_USER) #define REQ_INTEGRITY (1 << __REQ_INTEGRITY) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 996549d71923..20a3710a481b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -360,9 +360,10 @@ struct request_queue unsigned int flush_flags; unsigned int ordered, next_ordered, ordseq; - int orderr, ordcolor; - struct request pre_flush_rq, bar_rq, post_flush_rq; + int orderr; + struct request bar_rq; struct request *orig_bar_rq; + struct list_head pending_barriers; struct mutex sysfs_lock; @@ -491,12 +492,11 @@ enum { /* * Ordered operation sequence */ - QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ - QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ - QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ - QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ - QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ - QUEUE_ORDSEQ_DONE = 0x20, + QUEUE_ORDSEQ_STARTED = (1 << 0), /* flushing in progress */ + QUEUE_ORDSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ + QUEUE_ORDSEQ_BAR = (1 << 2), /* barrier write in progress */ + QUEUE_ORDSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ + QUEUE_ORDSEQ_DONE = (1 << 4), }; #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) @@ -869,9 +869,6 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -extern unsigned blk_ordered_cur_seq(struct request_queue *); -extern unsigned blk_ordered_req_seq(struct request *); -extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int); extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); -- cgit v1.2.3 From dd4c133f387c48f526022860ad70354637a80f4c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: rename barrier/ordered to flush With ordering requirements dropped, barrier and ordered are misnomers. Now all block layer does is sequencing FLUSH and FUA. Rename them to flush. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 21 ++++++----- block/blk-flush.c | 98 +++++++++++++++++++++++++------------------------- block/blk.h | 4 +-- include/linux/blkdev.h | 24 ++++++------- 4 files changed, 72 insertions(+), 75 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index d316662682c8..8870ae40179d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -136,7 +136,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio, { struct request_queue *q = rq->q; - if (&q->bar_rq != rq) { + if (&q->flush_rq != rq) { if (error) clear_bit(BIO_UPTODATE, &bio->bi_flags); else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) @@ -160,13 +160,12 @@ static void req_bio_endio(struct request *rq, struct bio *bio, if (bio->bi_size == 0) bio_endio(bio, error); } else { - /* - * Okay, this is the barrier request in progress, just - * record the error; + * Okay, this is the sequenced flush request in + * progress, just record the error; */ - if (error && !q->orderr) - q->orderr = error; + if (error && !q->flush_err) + q->flush_err = error; } } @@ -520,7 +519,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) init_timer(&q->unplug_timer); setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); INIT_LIST_HEAD(&q->timeout_list); - INIT_LIST_HEAD(&q->pending_barriers); + INIT_LIST_HEAD(&q->pending_flushes); INIT_WORK(&q->unplug_work, blk_unplug_work); kobject_init(&q->kobj, &blk_queue_ktype); @@ -1764,11 +1763,11 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes) static void blk_account_io_done(struct request *req) { /* - * Account IO completion. bar_rq isn't accounted as a normal - * IO on queueing nor completion. Accounting the containing - * request is enough. + * Account IO completion. flush_rq isn't accounted as a + * normal IO on queueing nor completion. Accounting the + * containing request is enough. */ - if (blk_do_io_stat(req) && req != &req->q->bar_rq) { + if (blk_do_io_stat(req) && req != &req->q->flush_rq) { unsigned long duration = jiffies - req->start_time; const int rw = rq_data_dir(req); struct hd_struct *part; diff --git a/block/blk-flush.c b/block/blk-flush.c index e8b2e5c091b1..dd873225da97 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -9,41 +9,38 @@ #include "blk.h" -static struct request *queue_next_ordseq(struct request_queue *q); +static struct request *queue_next_fseq(struct request_queue *q); -/* - * Cache flushing for ordered writes handling - */ -unsigned blk_ordered_cur_seq(struct request_queue *q) +unsigned blk_flush_cur_seq(struct request_queue *q) { - if (!q->ordseq) + if (!q->flush_seq) return 0; - return 1 << ffz(q->ordseq); + return 1 << ffz(q->flush_seq); } -static struct request *blk_ordered_complete_seq(struct request_queue *q, - unsigned seq, int error) +static struct request *blk_flush_complete_seq(struct request_queue *q, + unsigned seq, int error) { struct request *next_rq = NULL; - if (error && !q->orderr) - q->orderr = error; + if (error && !q->flush_err) + q->flush_err = error; - BUG_ON(q->ordseq & seq); - q->ordseq |= seq; + BUG_ON(q->flush_seq & seq); + q->flush_seq |= seq; - if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) { - /* not complete yet, queue the next ordered sequence */ - next_rq = queue_next_ordseq(q); + if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) { + /* not complete yet, queue the next flush sequence */ + next_rq = queue_next_fseq(q); } else { - /* complete this barrier request */ - __blk_end_request_all(q->orig_bar_rq, q->orderr); - q->orig_bar_rq = NULL; - q->ordseq = 0; - - /* dispatch the next barrier if there's one */ - if (!list_empty(&q->pending_barriers)) { - next_rq = list_entry_rq(q->pending_barriers.next); + /* complete this flush request */ + __blk_end_request_all(q->orig_flush_rq, q->flush_err); + q->orig_flush_rq = NULL; + q->flush_seq = 0; + + /* dispatch the next flush if there's one */ + if (!list_empty(&q->pending_flushes)) { + next_rq = list_entry_rq(q->pending_flushes.next); list_move(&next_rq->queuelist, &q->queue_head); } } @@ -53,19 +50,19 @@ static struct request *blk_ordered_complete_seq(struct request_queue *q, static void pre_flush_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); - blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); + blk_flush_complete_seq(rq->q, QUEUE_FSEQ_PREFLUSH, error); } -static void bar_end_io(struct request *rq, int error) +static void flush_data_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); - blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); + blk_flush_complete_seq(rq->q, QUEUE_FSEQ_DATA, error); } static void post_flush_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); - blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); + blk_flush_complete_seq(rq->q, QUEUE_FSEQ_POSTFLUSH, error); } static void queue_flush(struct request_queue *q, struct request *rq, @@ -74,34 +71,34 @@ static void queue_flush(struct request_queue *q, struct request *rq, blk_rq_init(q, rq); rq->cmd_type = REQ_TYPE_FS; rq->cmd_flags = REQ_FLUSH; - rq->rq_disk = q->orig_bar_rq->rq_disk; + rq->rq_disk = q->orig_flush_rq->rq_disk; rq->end_io = end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); } -static struct request *queue_next_ordseq(struct request_queue *q) +static struct request *queue_next_fseq(struct request_queue *q) { - struct request *rq = &q->bar_rq; + struct request *rq = &q->flush_rq; - switch (blk_ordered_cur_seq(q)) { - case QUEUE_ORDSEQ_PREFLUSH: + switch (blk_flush_cur_seq(q)) { + case QUEUE_FSEQ_PREFLUSH: queue_flush(q, rq, pre_flush_end_io); break; - case QUEUE_ORDSEQ_BAR: + case QUEUE_FSEQ_DATA: /* initialize proxy request and queue it */ blk_rq_init(q, rq); - init_request_from_bio(rq, q->orig_bar_rq->bio); + init_request_from_bio(rq, q->orig_flush_rq->bio); rq->cmd_flags &= ~REQ_HARDBARRIER; if (q->ordered & QUEUE_ORDERED_DO_FUA) rq->cmd_flags |= REQ_FUA; - rq->end_io = bar_end_io; + rq->end_io = flush_data_end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); break; - case QUEUE_ORDSEQ_POSTFLUSH: + case QUEUE_FSEQ_POSTFLUSH: queue_flush(q, rq, post_flush_end_io); break; @@ -111,19 +108,20 @@ static struct request *queue_next_ordseq(struct request_queue *q) return rq; } -struct request *blk_do_ordered(struct request_queue *q, struct request *rq) +struct request *blk_do_flush(struct request_queue *q, struct request *rq) { unsigned skip = 0; if (!(rq->cmd_flags & REQ_HARDBARRIER)) return rq; - if (q->ordseq) { + if (q->flush_seq) { /* - * Barrier is already in progress and they can't be - * processed in parallel. Queue for later processing. + * Sequenced flush is already in progress and they + * can't be processed in parallel. Queue for later + * processing. */ - list_move_tail(&rq->queuelist, &q->pending_barriers); + list_move_tail(&rq->queuelist, &q->pending_flushes); return NULL; } @@ -138,11 +136,11 @@ struct request *blk_do_ordered(struct request_queue *q, struct request *rq) } /* - * Start a new ordered sequence + * Start a new flush sequence */ - q->orderr = 0; + q->flush_err = 0; q->ordered = q->next_ordered; - q->ordseq |= QUEUE_ORDSEQ_STARTED; + q->flush_seq |= QUEUE_FSEQ_STARTED; /* * For an empty barrier, there's no actual BAR request, which @@ -154,19 +152,19 @@ struct request *blk_do_ordered(struct request_queue *q, struct request *rq) /* stash away the original request */ blk_dequeue_request(rq); - q->orig_bar_rq = rq; + q->orig_flush_rq = rq; if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) - skip |= QUEUE_ORDSEQ_PREFLUSH; + skip |= QUEUE_FSEQ_PREFLUSH; if (!(q->ordered & QUEUE_ORDERED_DO_BAR)) - skip |= QUEUE_ORDSEQ_BAR; + skip |= QUEUE_FSEQ_DATA; if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH)) - skip |= QUEUE_ORDSEQ_POSTFLUSH; + skip |= QUEUE_FSEQ_POSTFLUSH; /* complete skipped sequences and return the first sequence */ - return blk_ordered_complete_seq(q, skip, 0); + return blk_flush_complete_seq(q, skip, 0); } static void bio_end_empty_barrier(struct bio *bio, int err) diff --git a/block/blk.h b/block/blk.h index 08081e4b294e..24b92bd78f37 100644 --- a/block/blk.h +++ b/block/blk.h @@ -51,7 +51,7 @@ static inline void blk_clear_rq_complete(struct request *rq) */ #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) -struct request *blk_do_ordered(struct request_queue *q, struct request *rq); +struct request *blk_do_flush(struct request_queue *q, struct request *rq); static inline struct request *__elv_next_request(struct request_queue *q) { @@ -60,7 +60,7 @@ static inline struct request *__elv_next_request(struct request_queue *q) while (1) { while (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); - rq = blk_do_ordered(q, rq); + rq = blk_do_flush(q, rq); if (rq) return rq; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 20a3710a481b..1cd83ec077db 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -357,13 +357,13 @@ struct request_queue /* * for flush operations */ + unsigned int ordered, next_ordered; unsigned int flush_flags; - - unsigned int ordered, next_ordered, ordseq; - int orderr; - struct request bar_rq; - struct request *orig_bar_rq; - struct list_head pending_barriers; + unsigned int flush_seq; + int flush_err; + struct request flush_rq; + struct request *orig_flush_rq; + struct list_head pending_flushes; struct mutex sysfs_lock; @@ -490,13 +490,13 @@ enum { QUEUE_ORDERED_DO_FUA, /* - * Ordered operation sequence + * FLUSH/FUA sequences. */ - QUEUE_ORDSEQ_STARTED = (1 << 0), /* flushing in progress */ - QUEUE_ORDSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ - QUEUE_ORDSEQ_BAR = (1 << 2), /* barrier write in progress */ - QUEUE_ORDSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ - QUEUE_ORDSEQ_DONE = (1 << 4), + QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ + QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ + QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ + QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ + QUEUE_FSEQ_DONE = (1 << 4), }; #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) -- cgit v1.2.3 From 4fed947cb311e5aa51781d316cefca836352f6ce Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:17 +0200 Subject: block: implement REQ_FLUSH/FUA based interface for FLUSH/FUA requests Now that the backend conversion is complete, export sequenced FLUSH/FUA capability through REQ_FLUSH/FUA flags. REQ_FLUSH means the device cache should be flushed before executing the request. REQ_FUA means that the data in the request should be on non-volatile media on completion. Block layer will choose the correct way of implementing the semantics and execute it. The request may be passed to the device directly if the device can handle it; otherwise, it will be sequenced using one or more proxy requests. Devices will never see REQ_FLUSH and/or FUA which it doesn't support. Also, unlike the original REQ_HARDBARRIER, REQ_FLUSH/FUA requests are never failed with -EOPNOTSUPP. If the underlying device doesn't support FLUSH/FUA, the block layer simply make those noop. IOW, it no longer distinguishes between writeback cache which doesn't support cache flush and writethrough/no cache. Devices which have WB cache w/o flush are very difficult to come by these days and there's nothing much we can do anyway, so it doesn't make sense to require everyone to implement -EOPNOTSUPP handling. This will simplify filesystems and block drivers as they can drop -EOPNOTSUPP retry logic for barriers. * QUEUE_ORDERED_* are removed and QUEUE_FSEQ_* are moved into blk-flush.c. * REQ_FLUSH w/o data can also be directly passed to drivers without sequencing but some drivers assume that zero length requests don't have rq->bio which isn't true for these requests requiring the use of proxy requests. * REQ_COMMON_MASK now includes REQ_FLUSH | REQ_FUA so that they are copied from bio to request. * WRITE_BARRIER is marked deprecated and WRITE_FLUSH, WRITE_FUA and WRITE_FLUSH_FUA are added. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- block/blk-flush.c | 85 ++++++++++++++++++++++++--------------------- block/blk.h | 3 ++ include/linux/blk_types.h | 2 +- include/linux/blkdev.h | 38 ++------------------ include/linux/buffer_head.h | 2 +- include/linux/fs.h | 19 ++++++---- 7 files changed, 67 insertions(+), 84 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 8870ae40179d..18455c4f618a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1204,7 +1204,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) spin_lock_irq(q->queue_lock); - if (bio->bi_rw & REQ_HARDBARRIER) { + if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { where = ELEVATOR_INSERT_FRONT; goto get_rq; } diff --git a/block/blk-flush.c b/block/blk-flush.c index dd873225da97..452c552e9ead 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -1,5 +1,5 @@ /* - * Functions related to barrier IO handling + * Functions to sequence FLUSH and FUA writes. */ #include #include @@ -9,6 +9,15 @@ #include "blk.h" +/* FLUSH/FUA sequences */ +enum { + QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ + QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ + QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ + QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ + QUEUE_FSEQ_DONE = (1 << 4), +}; + static struct request *queue_next_fseq(struct request_queue *q); unsigned blk_flush_cur_seq(struct request_queue *q) @@ -79,6 +88,7 @@ static void queue_flush(struct request_queue *q, struct request *rq, static struct request *queue_next_fseq(struct request_queue *q) { + struct request *orig_rq = q->orig_flush_rq; struct request *rq = &q->flush_rq; switch (blk_flush_cur_seq(q)) { @@ -87,12 +97,11 @@ static struct request *queue_next_fseq(struct request_queue *q) break; case QUEUE_FSEQ_DATA: - /* initialize proxy request and queue it */ + /* initialize proxy request, inherit FLUSH/FUA and queue it */ blk_rq_init(q, rq); - init_request_from_bio(rq, q->orig_flush_rq->bio); - rq->cmd_flags &= ~REQ_HARDBARRIER; - if (q->ordered & QUEUE_ORDERED_DO_FUA) - rq->cmd_flags |= REQ_FUA; + init_request_from_bio(rq, orig_rq->bio); + rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA); + rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA); rq->end_io = flush_data_end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); @@ -110,60 +119,58 @@ static struct request *queue_next_fseq(struct request_queue *q) struct request *blk_do_flush(struct request_queue *q, struct request *rq) { + unsigned int fflags = q->flush_flags; /* may change, cache it */ + bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA; + bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH); + bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA); unsigned skip = 0; - if (!(rq->cmd_flags & REQ_HARDBARRIER)) + /* + * Special case. If there's data but flush is not necessary, + * the request can be issued directly. + * + * Flush w/o data should be able to be issued directly too but + * currently some drivers assume that rq->bio contains + * non-zero data if it isn't NULL and empty FLUSH requests + * getting here usually have bio's without data. + */ + if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) { + rq->cmd_flags &= ~REQ_FLUSH; + if (!has_fua) + rq->cmd_flags &= ~REQ_FUA; return rq; + } + /* + * Sequenced flushes can't be processed in parallel. If + * another one is already in progress, queue for later + * processing. + */ if (q->flush_seq) { - /* - * Sequenced flush is already in progress and they - * can't be processed in parallel. Queue for later - * processing. - */ list_move_tail(&rq->queuelist, &q->pending_flushes); return NULL; } - if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) { - /* - * Queue ordering not supported. Terminate - * with prejudice. - */ - blk_dequeue_request(rq); - __blk_end_request_all(rq, -EOPNOTSUPP); - return NULL; - } - /* * Start a new flush sequence */ q->flush_err = 0; - q->ordered = q->next_ordered; q->flush_seq |= QUEUE_FSEQ_STARTED; - /* - * For an empty barrier, there's no actual BAR request, which - * in turn makes POSTFLUSH unnecessary. Mask them off. - */ - if (!blk_rq_sectors(rq)) - q->ordered &= ~(QUEUE_ORDERED_DO_BAR | - QUEUE_ORDERED_DO_POSTFLUSH); - - /* stash away the original request */ + /* adjust FLUSH/FUA of the original request and stash it away */ + rq->cmd_flags &= ~REQ_FLUSH; + if (!has_fua) + rq->cmd_flags &= ~REQ_FUA; blk_dequeue_request(rq); q->orig_flush_rq = rq; - if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) + /* skip unneded sequences and return the first one */ + if (!do_preflush) skip |= QUEUE_FSEQ_PREFLUSH; - - if (!(q->ordered & QUEUE_ORDERED_DO_BAR)) + if (!blk_rq_sectors(rq)) skip |= QUEUE_FSEQ_DATA; - - if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH)) + if (!do_postflush) skip |= QUEUE_FSEQ_POSTFLUSH; - - /* complete skipped sequences and return the first sequence */ return blk_flush_complete_seq(q, skip, 0); } diff --git a/block/blk.h b/block/blk.h index 24b92bd78f37..a09c18b19116 100644 --- a/block/blk.h +++ b/block/blk.h @@ -60,6 +60,9 @@ static inline struct request *__elv_next_request(struct request_queue *q) while (1) { while (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); + if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) || + rq == &q->flush_rq) + return rq; rq = blk_do_flush(q, rq); if (rq) return rq; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 9192282b4259..179799479e6f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -167,7 +167,7 @@ enum rq_flag_bits { (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_COMMON_MASK \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \ - REQ_META| REQ_DISCARD | REQ_NOIDLE) + REQ_META | REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) #define REQ_UNPLUG (1 << __REQ_UNPLUG) #define REQ_RAHEAD (1 << __REQ_RAHEAD) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1cd83ec077db..8ef705f800ab 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -357,7 +357,6 @@ struct request_queue /* * for flush operations */ - unsigned int ordered, next_ordered; unsigned int flush_flags; unsigned int flush_seq; int flush_err; @@ -465,40 +464,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) __clear_bit(flag, &q->queue_flags); } -enum { - /* - * Hardbarrier is supported with one of the following methods. - * - * NONE : hardbarrier unsupported - * DRAIN : ordering by draining is enough - * DRAIN_FLUSH : ordering by draining w/ pre and post flushes - * DRAIN_FUA : ordering by draining w/ pre flush and FUA write - */ - QUEUE_ORDERED_DO_PREFLUSH = 0x10, - QUEUE_ORDERED_DO_BAR = 0x20, - QUEUE_ORDERED_DO_POSTFLUSH = 0x40, - QUEUE_ORDERED_DO_FUA = 0x80, - - QUEUE_ORDERED_NONE = 0x00, - - QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_DO_BAR, - QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_POSTFLUSH, - QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_FUA, - - /* - * FLUSH/FUA sequences. - */ - QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ - QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ - QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ - QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ - QUEUE_FSEQ_DONE = (1 << 4), -}; - #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) @@ -578,7 +543,8 @@ static inline void blk_clear_queue_full(struct request_queue *q, int sync) * it already be started by driver. */ #define RQ_NOMERGE_FLAGS \ - (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) + (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER | \ + REQ_FLUSH | REQ_FUA) #define rq_mergeable(rq) \ (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ (((rq)->cmd_flags & REQ_DISCARD) || \ diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index ec94c12f21da..fc999f583fda 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -32,7 +32,7 @@ enum bh_state_bits { BH_Delay, /* Buffer is not yet allocated on disk */ BH_Boundary, /* Block is followed by a discontiguity */ BH_Write_EIO, /* I/O error on write */ - BH_Eopnotsupp, /* operation not supported (barrier) */ + BH_Eopnotsupp, /* DEPRECATED: operation not supported (barrier) */ BH_Unwritten, /* Buffer is allocated on disk but not written */ BH_Quiet, /* Buffer Error Prinks to be quiet */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 76041b614758..352c48627381 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -135,12 +135,13 @@ struct inodes_stat_t { * immediately after submission. The write equivalent * of READ_SYNC. * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. - * WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all - * previously submitted writes must be safely on storage - * before this one is started. Also guarantees that when - * this write is complete, it itself is also safely on - * storage. Prevents reordering of writes on both sides - * of this IO. + * WRITE_BARRIER DEPRECATED. Always fails. Use FLUSH/FUA instead. + * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. + * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on + * non-volatile media on completion. + * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded + * by a cache flush and data is guaranteed to be on + * non-volatile media on completion. * */ #define RW_MASK REQ_WRITE @@ -158,6 +159,12 @@ struct inodes_stat_t { #define WRITE_META (WRITE | REQ_META) #define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ REQ_HARDBARRIER) +#define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ + REQ_FLUSH) +#define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ + REQ_FUA) +#define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ + REQ_FLUSH | REQ_FUA) /* * These aren't really reads or writes, they pass down information about -- cgit v1.2.3 From 3a2edd0d6ddbd5fa3b389ea6db811285415ce6c8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:18 +0200 Subject: block: make __blk_rq_prep_clone() copy most command flags Currently __blk_rq_prep_clone() copies only REQ_WRITE and REQ_DISCARD. There's no reason to omit other command flags and REQ_FUA needs to be copied to implement FUA support in request-based dm. REQ_COMMON_MASK which specifies flags to be copied from bio to request already identifies all the command flags. Define REQ_CLONE_MASK to be the same as REQ_COMMON_MASK for clarity and make __blk_rq_prep_clone() copy all flags in the mask. Signed-off-by: Tejun Heo Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 4 +--- include/linux/blk_types.h | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 495bdc4a23da..2a5b19204546 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2505,9 +2505,7 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); static void __blk_rq_prep_clone(struct request *dst, struct request *src) { dst->cpu = src->cpu; - dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE); - if (src->cmd_flags & REQ_DISCARD) - dst->cmd_flags |= REQ_DISCARD; + dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE; dst->cmd_type = src->cmd_type; dst->__sector = blk_rq_pos(src); dst->__data_len = blk_rq_bytes(src); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 179799479e6f..36edadf5b41a 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -168,6 +168,7 @@ enum rq_flag_bits { #define REQ_COMMON_MASK \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \ REQ_META | REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) +#define REQ_CLONE_MASK REQ_COMMON_MASK #define REQ_UNPLUG (1 << __REQ_UNPLUG) #define REQ_RAHEAD (1 << __REQ_RAHEAD) -- cgit v1.2.3 From 2cf6d26a354ab6362e301b5a323832b02867df47 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Aug 2010 05:29:10 -0400 Subject: block: pass gfp_mask and flags to sb_issue_discard We'll need to get rid of the BLKDEV_IFL_BARRIER flag, and to facilitate that and to make the interface less confusing pass all flags explicitly. Signed-off-by: Christoph Hellwig Acked-by: Mike Snitzer Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- fs/ext4/mballoc.c | 3 ++- fs/fat/fatent.c | 4 +++- include/linux/blkdev.h | 11 +++++------ 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 4b4ad4b7ce57..df44b345f662 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2566,7 +2566,8 @@ static inline void ext4_issue_discard(struct super_block *sb, discard_block = block + ext4_group_first_block_no(sb, block_group); trace_ext4_discard_blocks(sb, (unsigned long long) discard_block, count); - ret = sb_issue_discard(sb, discard_block, count); + ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, + BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); if (ret == EOPNOTSUPP) { ext4_warning(sb, "discard not supported, disabling"); clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 81184d3b75a3..3a56a82f5658 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -577,7 +577,9 @@ int fat_free_clusters(struct inode *inode, int cluster) sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl), - nr_clus * sbi->sec_per_clus); + nr_clus * sbi->sec_per_clus, + GFP_NOFS, + BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); first_cl = cluster; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8ef705f800ab..6b305eb4a343 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -881,13 +881,12 @@ extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); -static inline int sb_issue_discard(struct super_block *sb, - sector_t block, sector_t nr_blocks) +static inline int sb_issue_discard(struct super_block *sb, sector_t block, + sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) { - block <<= (sb->s_blocksize_bits - 9); - nr_blocks <<= (sb->s_blocksize_bits - 9); - return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS, - BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); + return blkdev_issue_discard(sb->s_bdev, block << (sb->s_blocksize_bits - 9), + nr_blocks << (sb->s_blocksize_bits - 9), + gfp_mask, flags); } extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); -- cgit v1.2.3 From 31725e65c7214b52b607eba705fc4f306be4d5a5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Aug 2010 05:29:21 -0400 Subject: block: remove the WRITE_BARRIER flag It's unused now. Signed-off-by: Christoph Hellwig Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/fs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 352c48627381..d6add69bc170 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -135,7 +135,6 @@ struct inodes_stat_t { * immediately after submission. The write equivalent * of READ_SYNC. * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. - * WRITE_BARRIER DEPRECATED. Always fails. Use FLUSH/FUA instead. * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on * non-volatile media on completion. @@ -157,8 +156,6 @@ struct inodes_stat_t { #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG) #define WRITE_ODIRECT_PLUG (WRITE | REQ_SYNC) #define WRITE_META (WRITE | REQ_META) -#define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ - REQ_HARDBARRIER) #define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ REQ_FLUSH) #define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ -- cgit v1.2.3 From 8c5553678237b7121355108e03c36086037d8975 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Aug 2010 05:29:22 -0400 Subject: block: remove the BLKDEV_IFL_BARRIER flag Remove support for barriers on discards, which is unused now. Also remove the DISCARD_NOBARRIER I/O type in favour of just setting the rw flags up locally in blkdev_issue_discard. tj: Also remove DISCARD_SECURE and use REQ_SECURE directly. Signed-off-by: Christoph Hellwig Acked-by: Mike Snitzer Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-lib.c | 18 ++---------------- include/linux/blkdev.h | 2 -- include/linux/fs.h | 8 -------- 3 files changed, 2 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/block/blk-lib.c b/block/blk-lib.c index c392029a104e..fe2e6ed0f510 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -39,8 +39,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, { DECLARE_COMPLETION_ONSTACK(wait); struct request_queue *q = bdev_get_queue(bdev); - int type = flags & BLKDEV_IFL_BARRIER ? - DISCARD_BARRIER : DISCARD_NOBARRIER; + int type = REQ_WRITE | REQ_DISCARD; unsigned int max_discard_sectors; struct bio *bio; int ret = 0; @@ -65,7 +64,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, if (flags & BLKDEV_IFL_SECURE) { if (!blk_queue_secdiscard(q)) return -EOPNOTSUPP; - type |= DISCARD_SECURE; + type |= REQ_SECURE; } while (nr_sects && !ret) { @@ -162,12 +161,6 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, bb.wait = &wait; bb.end_io = NULL; - if (flags & BLKDEV_IFL_BARRIER) { - /* issue async barrier before the data */ - ret = blkdev_issue_flush(bdev, gfp_mask, NULL, 0); - if (ret) - return ret; - } submit: ret = 0; while (nr_sects != 0) { @@ -199,13 +192,6 @@ submit: issued++; submit_bio(WRITE, bio); } - /* - * When all data bios are in flight. Send final barrier if requeted. - */ - if (nr_sects == 0 && flags & BLKDEV_IFL_BARRIER) - ret = blkdev_issue_flush(bdev, gfp_mask, NULL, - flags & BLKDEV_IFL_WAIT); - if (flags & BLKDEV_IFL_WAIT) /* Wait for bios in-flight */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6b305eb4a343..cfcb3a610605 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -869,11 +869,9 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, } enum{ BLKDEV_WAIT, /* wait for completion */ - BLKDEV_BARRIER, /* issue request with barrier */ BLKDEV_SECURE, /* secure discard */ }; #define BLKDEV_IFL_WAIT (1 << BLKDEV_WAIT) -#define BLKDEV_IFL_BARRIER (1 << BLKDEV_BARRIER) #define BLKDEV_IFL_SECURE (1 << BLKDEV_SECURE) extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *, unsigned long); diff --git a/include/linux/fs.h b/include/linux/fs.h index d6add69bc170..6b0f6e9993a3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -163,14 +163,6 @@ struct inodes_stat_t { #define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ REQ_FLUSH | REQ_FUA) -/* - * These aren't really reads or writes, they pass down information about - * parts of device that are now unused by the file system. - */ -#define DISCARD_NOBARRIER (WRITE | REQ_DISCARD) -#define DISCARD_BARRIER (WRITE | REQ_DISCARD | REQ_HARDBARRIER) -#define DISCARD_SECURE (DISCARD_NOBARRIER | REQ_SECURE) - #define SEL_IN 1 #define SEL_OUT 2 #define SEL_EX 4 -- cgit v1.2.3 From 0edd55faea7c8081bc826234b917501738a6218f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Aug 2010 05:29:23 -0400 Subject: block: remove the BH_Eopnotsupp flag This flag was only set for barrier buffers, which we don't submit anymore. Signed-off-by: Christoph Hellwig Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- fs/buffer.c | 7 +------ fs/fat/misc.c | 5 +---- include/linux/buffer_head.h | 2 -- 3 files changed, 2 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/fs/buffer.c b/fs/buffer.c index 3e7dca279d1c..7f0b9b083f77 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -156,7 +156,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate) if (uptodate) { set_buffer_uptodate(bh); } else { - if (!buffer_eopnotsupp(bh) && !quiet_error(bh)) { + if (!quiet_error(bh)) { buffer_io_error(bh); printk(KERN_WARNING "lost page write due to " "I/O error on %s\n", @@ -2891,7 +2891,6 @@ static void end_bio_bh_io_sync(struct bio *bio, int err) if (err == -EOPNOTSUPP) { set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); - set_bit(BH_Eopnotsupp, &bh->b_state); } if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags))) @@ -3031,10 +3030,6 @@ int __sync_dirty_buffer(struct buffer_head *bh, int rw) bh->b_end_io = end_buffer_write_sync; ret = submit_bh(rw, bh); wait_on_buffer(bh); - if (buffer_eopnotsupp(bh)) { - clear_buffer_eopnotsupp(bh); - ret = -EOPNOTSUPP; - } if (!ret && !buffer_uptodate(bh)) ret = -EIO; } else { diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 1736f2356388..970e682ea754 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -255,10 +255,7 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) for (i = 0; i < nr_bhs; i++) { wait_on_buffer(bhs[i]); - if (buffer_eopnotsupp(bhs[i])) { - clear_buffer_eopnotsupp(bhs[i]); - err = -EOPNOTSUPP; - } else if (!err && !buffer_uptodate(bhs[i])) + if (!err && !buffer_uptodate(bhs[i])) err = -EIO; } return err; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index fc999f583fda..dd1b25b2641c 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -32,7 +32,6 @@ enum bh_state_bits { BH_Delay, /* Buffer is not yet allocated on disk */ BH_Boundary, /* Block is followed by a discontiguity */ BH_Write_EIO, /* I/O error on write */ - BH_Eopnotsupp, /* DEPRECATED: operation not supported (barrier) */ BH_Unwritten, /* Buffer is allocated on disk but not written */ BH_Quiet, /* Buffer Error Prinks to be quiet */ @@ -124,7 +123,6 @@ BUFFER_FNS(Async_Write, async_write) BUFFER_FNS(Delay, delay) BUFFER_FNS(Boundary, boundary) BUFFER_FNS(Write_EIO, write_io_error) -BUFFER_FNS(Eopnotsupp, eopnotsupp) BUFFER_FNS(Unwritten, unwritten) #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) -- cgit v1.2.3 From 30ca22c70e3ef0a96ff84de69cd7e8561b416cb2 Mon Sep 17 00:00:00 2001 From: "Patrick J. LoPresti" Date: Thu, 22 Jul 2010 15:03:41 -0700 Subject: ext3/ext4: Factor out disk addressability check As part of adding support for OCFS2 to mount huge volumes, we need to check that the sector_t and page cache of the system are capable of addressing the entire volume. An identical check already appears in ext3 and ext4. This patch moves the addressability check into its own function in fs/libfs.c and modifies ext3 and ext4 to invoke it. [Edited to -EINVAL instead of BUG_ON() for bad blocksize_bits -- Joel] Signed-off-by: Patrick LoPresti Cc: linux-ext4@vger.kernel.org Acked-by: Andreas Dilger Signed-off-by: Joel Becker --- fs/ext3/super.c | 4 ++-- fs/ext4/super.c | 8 +++----- fs/libfs.c | 29 +++++++++++++++++++++++++++++ include/linux/fs.h | 2 ++ 4 files changed, 36 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 5dbf4dba03c4..a367dd044280 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -1849,8 +1849,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) goto failed_mount; } - if (le32_to_cpu(es->s_blocks_count) > - (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { + if (generic_check_addressable(sb->s_blocksize_bits, + le32_to_cpu(es->s_blocks_count))) { ext3_msg(sb, KERN_ERR, "error: filesystem is too large to mount safely"); if (sizeof(sector_t) < 8) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 26147746c272..7f47c366bf15 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2831,15 +2831,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * Test whether we have more sectors than will fit in sector_t, * and whether the max offset is addressable by the page cache. */ - if ((ext4_blocks_count(es) > - (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) || - (ext4_blocks_count(es) > - (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) { + ret = generic_check_addressable(sb->s_blocksize_bits, + ext4_blocks_count(es)); + if (ret) { ext4_msg(sb, KERN_ERR, "filesystem" " too large to mount safely on this system"); if (sizeof(sector_t) < 8) ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); - ret = -EFBIG; goto failed_mount; } diff --git a/fs/libfs.c b/fs/libfs.c index 0a9da95317f7..8debe7b33769 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -913,6 +913,35 @@ int generic_file_fsync(struct file *file, int datasync) } EXPORT_SYMBOL(generic_file_fsync); +/** + * generic_check_addressable - Check addressability of file system + * @blocksize_bits: log of file system block size + * @num_blocks: number of blocks in file system + * + * Determine whether a file system with @num_blocks blocks (and a + * block size of 2**@blocksize_bits) is addressable by the sector_t + * and page cache of the system. Return 0 if so and -EFBIG otherwise. + */ +int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks) +{ + u64 last_fs_block = num_blocks - 1; + + if (unlikely(num_blocks == 0)) + return 0; + + if ((blocksize_bits < 9) || (blocksize_bits > PAGE_CACHE_SHIFT)) + return -EINVAL; + + if ((last_fs_block > + (sector_t)(~0ULL) >> (blocksize_bits - 9)) || + (last_fs_block > + (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - blocksize_bits))) { + return -EFBIG; + } + return 0; +} +EXPORT_SYMBOL(generic_check_addressable); + /* * No-op implementation of ->fsync for in-memory filesystems. */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 76041b614758..1a759f40ab9e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2374,6 +2374,8 @@ extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, extern int generic_file_fsync(struct file *, int); +extern int generic_check_addressable(unsigned, u64); + #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, struct page *, struct page *); -- cgit v1.2.3 From c8bf1336824ebd698d37b71763e1c43190f2229a Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 10 Sep 2010 20:07:38 +0200 Subject: Consolidate min_not_zero We have several users of min_not_zero, each of them using their own definition. Move the define to kernel.h. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-settings.c | 5 ----- drivers/block/drbd/drbd_receiver.c | 1 - drivers/md/dm-snap.c | 2 -- drivers/md/dm-table.c | 5 ----- include/linux/kernel.h | 10 ++++++++++ 5 files changed, 10 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/block/blk-settings.c b/block/blk-settings.c index a234f4bf1d6f..8d592b559bd3 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -455,11 +455,6 @@ void blk_queue_io_opt(struct request_queue *q, unsigned int opt) } EXPORT_SYMBOL(blk_queue_io_opt); -/* - * Returns the minimum that is _not_ zero, unless both are zero. - */ -#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) - /** * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers * @t: the stacking driver (top) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 081522d3c742..484ecbb6b772 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2972,7 +2972,6 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h) * we still need to figure out whether we accept that. */ mdev->p_size = p_size; -#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) if (get_ldev(mdev)) { warn_if_differ_considerably(mdev, "lower level device sizes", p_size, drbd_get_max_capacity(mdev->ldev)); diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 5974d3094d97..f30f6e8d594e 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -706,8 +706,6 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new) return 0; } -#define min_not_zero(l, r) (((l) == 0) ? (r) : (((r) == 0) ? (l) : min(l, r))) - /* * Return a minimum chunk size of all snapshots that have the specified origin. * Return zero if the origin has no snapshots. diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index f9fc07d7a4b9..90267f8d64ee 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -486,11 +486,6 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti, return 0; } -/* - * Returns the minimum that is _not_ zero, unless both are zero. - */ -#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) - int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2b0a35e6bc69..f5df2f4acb0d 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -640,6 +640,16 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } (void) (&_max1 == &_max2); \ _max1 > _max2 ? _max1 : _max2; }) +/** + * min_not_zero - return the minimum that is _not_ zero, unless both are zero + * @x: value1 + * @y: value2 + */ +#define min_not_zero(x, y) ({ \ + typeof(x) __x = (x); \ + typeof(y) __y = (y); \ + __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) + /** * clamp - return a value clamped to a given range with strict typechecking * @val: current value -- cgit v1.2.3 From 13f05c8d8e98bbdce89158bfdb2e380940695a88 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 10 Sep 2010 20:50:10 +0200 Subject: block/scsi: Provide a limit on the number of integrity segments Some controllers have a hardware limit on the number of protection information scatter-gather list segments they can handle. Introduce a max_integrity_segments limit in the block layer and provide a new scsi_host_template setting that allows HBA drivers to provide a value suitable for the hardware. Add support for honoring the integrity segment limit when merging both bios and requests. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-integrity.c | 93 ++++++++++++++++++++++++++++++++++++----------- block/blk-merge.c | 23 +++++++----- block/blk-settings.c | 3 ++ block/blk-sysfs.c | 11 ++++++ block/blk.h | 8 ---- drivers/scsi/hosts.c | 1 + drivers/scsi/scsi_lib.c | 26 +++++++++---- drivers/scsi/scsi_sysfs.c | 2 + include/linux/bio.h | 4 ++ include/linux/blkdev.h | 33 +++++++++++++++-- include/scsi/scsi.h | 6 +++ include/scsi/scsi_host.h | 7 ++++ 12 files changed, 167 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/block/blk-integrity.c b/block/blk-integrity.c index edce1ef7933d..885cbb59967e 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -32,24 +32,37 @@ static struct kmem_cache *integrity_cachep; /** * blk_rq_count_integrity_sg - Count number of integrity scatterlist elements - * @rq: request with integrity metadata attached + * @q: request queue + * @bio: bio with integrity metadata attached * * Description: Returns the number of elements required in a - * scatterlist corresponding to the integrity metadata in a request. + * scatterlist corresponding to the integrity metadata in a bio. */ -int blk_rq_count_integrity_sg(struct request *rq) +int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio) { - struct bio_vec *iv, *ivprv; - struct req_iterator iter; - unsigned int segments; + struct bio_vec *iv, *ivprv = NULL; + unsigned int segments = 0; + unsigned int seg_size = 0; + unsigned int i = 0; - ivprv = NULL; - segments = 0; + bio_for_each_integrity_vec(iv, bio, i) { - rq_for_each_integrity_segment(iv, rq, iter) { + if (ivprv) { + if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv)) + goto new_segment; + + if (!BIOVEC_SEG_BOUNDARY(q, ivprv, iv)) + goto new_segment; + + if (seg_size + iv->bv_len > queue_max_segment_size(q)) + goto new_segment; - if (!ivprv || !BIOVEC_PHYS_MERGEABLE(ivprv, iv)) + seg_size += iv->bv_len; + } else { +new_segment: segments++; + seg_size = iv->bv_len; + } ivprv = iv; } @@ -60,30 +73,34 @@ EXPORT_SYMBOL(blk_rq_count_integrity_sg); /** * blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist - * @rq: request with integrity metadata attached + * @q: request queue + * @bio: bio with integrity metadata attached * @sglist: target scatterlist * * Description: Map the integrity vectors in request into a * scatterlist. The scatterlist must be big enough to hold all * elements. I.e. sized using blk_rq_count_integrity_sg(). */ -int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist) +int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio, + struct scatterlist *sglist) { - struct bio_vec *iv, *ivprv; - struct req_iterator iter; - struct scatterlist *sg; - unsigned int segments; + struct bio_vec *iv, *ivprv = NULL; + struct scatterlist *sg = NULL; + unsigned int segments = 0; + unsigned int i = 0; - ivprv = NULL; - sg = NULL; - segments = 0; - - rq_for_each_integrity_segment(iv, rq, iter) { + bio_for_each_integrity_vec(iv, bio, i) { if (ivprv) { if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv)) goto new_segment; + if (!BIOVEC_SEG_BOUNDARY(q, ivprv, iv)) + goto new_segment; + + if (sg->length + iv->bv_len > queue_max_segment_size(q)) + goto new_segment; + sg->length += iv->bv_len; } else { new_segment: @@ -162,6 +179,40 @@ int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2) } EXPORT_SYMBOL(blk_integrity_compare); +int blk_integrity_merge_rq(struct request_queue *q, struct request *req, + struct request *next) +{ + if (blk_integrity_rq(req) != blk_integrity_rq(next)) + return -1; + + if (req->nr_integrity_segments + next->nr_integrity_segments > + q->limits.max_integrity_segments) + return -1; + + return 0; +} +EXPORT_SYMBOL(blk_integrity_merge_rq); + +int blk_integrity_merge_bio(struct request_queue *q, struct request *req, + struct bio *bio) +{ + int nr_integrity_segs; + struct bio *next = bio->bi_next; + + bio->bi_next = NULL; + nr_integrity_segs = blk_rq_count_integrity_sg(q, bio); + bio->bi_next = next; + + if (req->nr_integrity_segments + nr_integrity_segs > + q->limits.max_integrity_segments) + return -1; + + req->nr_integrity_segments += nr_integrity_segs; + + return 0; +} +EXPORT_SYMBOL(blk_integrity_merge_bio); + struct integrity_sysfs_entry { struct attribute attr; ssize_t (*show)(struct blk_integrity *, char *); diff --git a/block/blk-merge.c b/block/blk-merge.c index 3b0cd4249671..6a725461654d 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -205,12 +205,11 @@ static inline int ll_new_hw_segment(struct request_queue *q, { int nr_phys_segs = bio_phys_segments(q, bio); - if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) { - req->cmd_flags |= REQ_NOMERGE; - if (req == q->last_merge) - q->last_merge = NULL; - return 0; - } + if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) + goto no_merge; + + if (bio_integrity(bio) && blk_integrity_merge_bio(q, req, bio)) + goto no_merge; /* * This will form the start of a new hw segment. Bump both @@ -218,6 +217,12 @@ static inline int ll_new_hw_segment(struct request_queue *q, */ req->nr_phys_segments += nr_phys_segs; return 1; + +no_merge: + req->cmd_flags |= REQ_NOMERGE; + if (req == q->last_merge) + q->last_merge = NULL; + return 0; } int ll_back_merge_fn(struct request_queue *q, struct request *req, @@ -301,6 +306,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, if (total_phys_segments > queue_max_segments(q)) return 0; + if (blk_integrity_rq(req) && blk_integrity_merge_rq(q, req, next)) + return 0; + /* Merge is OK... */ req->nr_phys_segments = total_phys_segments; return 1; @@ -372,9 +380,6 @@ static int attempt_merge(struct request_queue *q, struct request *req, || next->special) return 0; - if (blk_integrity_rq(req) != blk_integrity_rq(next)) - return 0; - /* * If we are allowed to merge, then append bio list * from next to rq and release next. merge_requests_fn diff --git a/block/blk-settings.c b/block/blk-settings.c index 8d592b559bd3..f8f2ddf20613 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -111,6 +111,7 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy); void blk_set_default_limits(struct queue_limits *lim) { lim->max_segments = BLK_MAX_SEGMENTS; + lim->max_integrity_segments = 0; lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; lim->max_sectors = BLK_DEF_MAX_SECTORS; @@ -509,6 +510,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, b->seg_boundary_mask); t->max_segments = min_not_zero(t->max_segments, b->max_segments); + t->max_integrity_segments = min_not_zero(t->max_integrity_segments, + b->max_integrity_segments); t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 001ab18078f5..b014f7739e98 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -112,6 +112,11 @@ static ssize_t queue_max_segments_show(struct request_queue *q, char *page) return queue_var_show(queue_max_segments(q), (page)); } +static ssize_t queue_max_integrity_segments_show(struct request_queue *q, char *page) +{ + return queue_var_show(q->limits.max_integrity_segments, (page)); +} + static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page) { if (test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) @@ -288,6 +293,11 @@ static struct queue_sysfs_entry queue_max_segments_entry = { .show = queue_max_segments_show, }; +static struct queue_sysfs_entry queue_max_integrity_segments_entry = { + .attr = {.name = "max_integrity_segments", .mode = S_IRUGO }, + .show = queue_max_integrity_segments_show, +}; + static struct queue_sysfs_entry queue_max_segment_size_entry = { .attr = {.name = "max_segment_size", .mode = S_IRUGO }, .show = queue_max_segment_size_show, @@ -375,6 +385,7 @@ static struct attribute *default_attrs[] = { &queue_max_hw_sectors_entry.attr, &queue_max_sectors_entry.attr, &queue_max_segments_entry.attr, + &queue_max_integrity_segments_entry.attr, &queue_max_segment_size_entry.attr, &queue_iosched_entry.attr, &queue_hw_sector_size_entry.attr, diff --git a/block/blk.h b/block/blk.h index 6e7dc87141e4..6738831ba447 100644 --- a/block/blk.h +++ b/block/blk.h @@ -132,14 +132,6 @@ static inline int queue_congestion_off_threshold(struct request_queue *q) return q->nr_congestion_off; } -#if defined(CONFIG_BLK_DEV_INTEGRITY) - -#define rq_for_each_integrity_segment(bvl, _rq, _iter) \ - __rq_for_each_bio(_iter.bio, _rq) \ - bip_for_each_vec(bvl, _iter.bio->bi_integrity, _iter.i) - -#endif /* BLK_DEV_INTEGRITY */ - static inline int blk_cpu_to_group(int cpu) { #ifdef CONFIG_SCHED_MC diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 8a8f803439e1..10478153641b 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -376,6 +376,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) shost->this_id = sht->this_id; shost->can_queue = sht->can_queue; shost->sg_tablesize = sht->sg_tablesize; + shost->sg_prot_tablesize = sht->sg_prot_tablesize; shost->cmd_per_lun = sht->cmd_per_lun; shost->unchecked_isa_dma = sht->unchecked_isa_dma; shost->use_clustering = sht->use_clustering; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 9ade720422c6..861c0b937ac9 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -968,11 +968,13 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb, */ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask) { - int error = scsi_init_sgtable(cmd->request, &cmd->sdb, gfp_mask); + struct request *rq = cmd->request; + + int error = scsi_init_sgtable(rq, &cmd->sdb, gfp_mask); if (error) goto err_exit; - if (blk_bidi_rq(cmd->request)) { + if (blk_bidi_rq(rq)) { struct scsi_data_buffer *bidi_sdb = kmem_cache_zalloc( scsi_sdb_cache, GFP_ATOMIC); if (!bidi_sdb) { @@ -980,28 +982,28 @@ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask) goto err_exit; } - cmd->request->next_rq->special = bidi_sdb; - error = scsi_init_sgtable(cmd->request->next_rq, bidi_sdb, - GFP_ATOMIC); + rq->next_rq->special = bidi_sdb; + error = scsi_init_sgtable(rq->next_rq, bidi_sdb, GFP_ATOMIC); if (error) goto err_exit; } - if (blk_integrity_rq(cmd->request)) { + if (blk_integrity_rq(rq)) { struct scsi_data_buffer *prot_sdb = cmd->prot_sdb; int ivecs, count; BUG_ON(prot_sdb == NULL); - ivecs = blk_rq_count_integrity_sg(cmd->request); + ivecs = blk_rq_count_integrity_sg(rq->q, rq->bio); if (scsi_alloc_sgtable(prot_sdb, ivecs, gfp_mask)) { error = BLKPREP_DEFER; goto err_exit; } - count = blk_rq_map_integrity_sg(cmd->request, + count = blk_rq_map_integrity_sg(rq->q, rq->bio, prot_sdb->table.sgl); BUG_ON(unlikely(count > ivecs)); + BUG_ON(unlikely(count > queue_max_integrity_segments(rq->q))); cmd->prot_sdb = prot_sdb; cmd->prot_sdb->table.nents = count; @@ -1625,6 +1627,14 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, blk_queue_max_segments(q, min_t(unsigned short, shost->sg_tablesize, SCSI_MAX_SG_CHAIN_SEGMENTS)); + if (scsi_host_prot_dma(shost)) { + shost->sg_prot_tablesize = + min_not_zero(shost->sg_prot_tablesize, + (unsigned short)SCSI_MAX_PROT_SG_SEGMENTS); + BUG_ON(shost->sg_prot_tablesize < shost->sg_tablesize); + blk_queue_max_integrity_segments(q, shost->sg_prot_tablesize); + } + blk_queue_max_hw_sectors(q, shost->max_sectors); blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost)); blk_queue_segment_boundary(q, shost->dma_boundary); diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index c3f67373a4f8..20ad59dff730 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -251,6 +251,7 @@ shost_rd_attr(host_busy, "%hu\n"); shost_rd_attr(cmd_per_lun, "%hd\n"); shost_rd_attr(can_queue, "%hd\n"); shost_rd_attr(sg_tablesize, "%hu\n"); +shost_rd_attr(sg_prot_tablesize, "%hu\n"); shost_rd_attr(unchecked_isa_dma, "%d\n"); shost_rd_attr(prot_capabilities, "%u\n"); shost_rd_attr(prot_guard_type, "%hd\n"); @@ -262,6 +263,7 @@ static struct attribute *scsi_sysfs_shost_attrs[] = { &dev_attr_cmd_per_lun.attr, &dev_attr_can_queue.attr, &dev_attr_sg_tablesize.attr, + &dev_attr_sg_prot_tablesize.attr, &dev_attr_unchecked_isa_dma.attr, &dev_attr_proc_name.attr, &dev_attr_scan.attr, diff --git a/include/linux/bio.h b/include/linux/bio.h index 5274103434ad..2c3fd7421607 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -496,6 +496,10 @@ static inline struct bio *bio_list_get(struct bio_list *bl) #define bip_for_each_vec(bvl, bip, i) \ __bip_for_each_vec(bvl, bip, i, (bip)->bip_idx) +#define bio_for_each_integrity_vec(_bvl, _bio, _iter) \ + for_each_bio(_bio) \ + bip_for_each_vec(_bvl, _bio->bi_integrity, _iter) + #define bio_integrity(bio) (bio->bi_integrity != NULL) extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2c54906f678f..7e661106270a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -124,6 +124,9 @@ struct request { * physical address coalescing is performed. */ unsigned short nr_phys_segments; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + unsigned short nr_integrity_segments; +#endif unsigned short ioprio; @@ -243,6 +246,7 @@ struct queue_limits { unsigned short logical_block_size; unsigned short max_segments; + unsigned short max_integrity_segments; unsigned char misaligned; unsigned char discard_misaligned; @@ -1213,8 +1217,13 @@ struct blk_integrity { extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); extern void blk_integrity_unregister(struct gendisk *); extern int blk_integrity_compare(struct gendisk *, struct gendisk *); -extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); -extern int blk_rq_count_integrity_sg(struct request *); +extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, + struct scatterlist *); +extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); +extern int blk_integrity_merge_rq(struct request_queue *, struct request *, + struct request *); +extern int blk_integrity_merge_bio(struct request_queue *, struct request *, + struct bio *); static inline struct blk_integrity *bdev_get_integrity(struct block_device *bdev) @@ -1235,16 +1244,32 @@ static inline int blk_integrity_rq(struct request *rq) return bio_integrity(rq->bio); } +static inline void blk_queue_max_integrity_segments(struct request_queue *q, + unsigned int segs) +{ + q->limits.max_integrity_segments = segs; +} + +static inline unsigned short +queue_max_integrity_segments(struct request_queue *q) +{ + return q->limits.max_integrity_segments; +} + #else /* CONFIG_BLK_DEV_INTEGRITY */ #define blk_integrity_rq(rq) (0) -#define blk_rq_count_integrity_sg(a) (0) -#define blk_rq_map_integrity_sg(a, b) (0) +#define blk_rq_count_integrity_sg(a, b) (0) +#define blk_rq_map_integrity_sg(a, b, c) (0) #define bdev_get_integrity(a) (0) #define blk_get_integrity(a) (0) #define blk_integrity_compare(a, b) (0) #define blk_integrity_register(a, b) (0) #define blk_integrity_unregister(a) do { } while (0); +#define blk_queue_max_integrity_segments(a, b) do { } while (0); +#define queue_max_integrity_segments(a) (0) +#define blk_integrity_merge_rq(a, b, c) (0) +#define blk_integrity_merge_bio(a, b, c) (0) #endif /* CONFIG_BLK_DEV_INTEGRITY */ diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index 8fcb6e0e9e72..d63533a4a59e 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h @@ -31,6 +31,12 @@ struct scsi_cmnd; #define SCSI_MAX_SG_CHAIN_SEGMENTS SCSI_MAX_SG_SEGMENTS #endif +/* + * DIX-capable adapters effectively support infinite chaining for the + * protection information scatterlist + */ +#define SCSI_MAX_PROT_SG_SEGMENTS 0xFFFF + /* * Special value for scanning to specify scanning or rescanning of all * possible channels, (target) ids, or luns on a given shost. diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index b7bdecb7b76e..d0a6a845f204 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -388,6 +388,7 @@ struct scsi_host_template { * of scatter-gather. */ unsigned short sg_tablesize; + unsigned short sg_prot_tablesize; /* * Set this if the host adapter has limitations beside segment count. @@ -599,6 +600,7 @@ struct Scsi_Host { int can_queue; short cmd_per_lun; short unsigned int sg_tablesize; + short unsigned int sg_prot_tablesize; short unsigned int max_sectors; unsigned long dma_boundary; /* @@ -823,6 +825,11 @@ static inline unsigned int scsi_host_get_prot(struct Scsi_Host *shost) return shost->prot_capabilities; } +static inline int scsi_host_prot_dma(struct Scsi_Host *shost) +{ + return shost->prot_capabilities >= SHOST_DIX_TYPE0_PROTECTION; +} + static inline unsigned int scsi_host_dif_capable(struct Scsi_Host *shost, unsigned int target_type) { static unsigned char cap[] = { 0, -- cgit v1.2.3 From 637bbdc5b83615ef9f45f50399d1c7f27473c713 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Mon, 13 Sep 2010 20:19:03 +0800 Subject: sched: Remove unused PF_ALIGNWARN flag PF_ALIGNWARN is not implemented and it is for 486 as the comment. It is not likely someone will implement this flag feature. So here remove this flag and leave the valuable 0x00000001 for future use. Signed-off-by: Dave Young Cc: Peter Zijlstra Cc: Linus Torvalds LKML-Reference: <20100913121903.GB22238@darkstar> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index b51c53c285b8..cdf56693ecbf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1682,8 +1682,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * /* * Per process flags */ -#define PF_ALIGNWARN 0x00000001 /* Print alignment warning msgs */ - /* Not implemented yet, only for 486*/ #define PF_STARTING 0x00000002 /* being created */ #define PF_EXITING 0x00000004 /* getting shut down */ #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ -- cgit v1.2.3 From ceee42714cf382e9bb9ab71b846ad49497b29d6c Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 13 Sep 2010 23:53:55 -0700 Subject: Input: serio_driver - mark id_table and description as const Memory pointed to by these fields is not supposed to change. Signed-off-by: Dmitry Torokhov --- include/linux/serio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/serio.h b/include/linux/serio.h index b5552568178d..a31c95a3171e 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -55,9 +55,9 @@ struct serio { struct serio_driver { void *private; - char *description; + const char *description; - struct serio_device_id *id_table; + const struct serio_device_id *id_table; bool manual_bind; void (*write_wakeup)(struct serio *); -- cgit v1.2.3 From 7fc49c498c18795d35864bee433caf419bd013b2 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 13 Sep 2010 23:53:55 -0700 Subject: Input: serio_driver - drop private pointer Nobody uses it anymore. Signed-off-by: Dmitry Torokhov --- include/linux/serio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/serio.h b/include/linux/serio.h index a31c95a3171e..111ad501b054 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -54,7 +54,6 @@ struct serio { #define to_serio_port(d) container_of(d, struct serio, dev) struct serio_driver { - void *private; const char *description; const struct serio_device_id *id_table; -- cgit v1.2.3 From 37b0bb112b7e3ffa5015c4305a934e861b4e2e53 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 13 Sep 2010 23:53:55 -0700 Subject: Input: gameport_driver - mark description as const pointer Memory pointed to by the pointer should not change. Signed-off-by: Dmitry Torokhov --- include/linux/gameport.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/gameport.h b/include/linux/gameport.h index 361d1cc288d0..632d1265fbe0 100644 --- a/include/linux/gameport.h +++ b/include/linux/gameport.h @@ -55,7 +55,7 @@ struct gameport { struct gameport_driver { void *private; - char *description; + const char *description; int (*connect)(struct gameport *, struct gameport_driver *drv); int (*reconnect)(struct gameport *); -- cgit v1.2.3 From 528487081aad32da85bf99802bdb7af32f4922b9 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 13 Sep 2010 23:53:55 -0700 Subject: Input: gameport_driver - drop private pointer Nobody uses it anymore. Signed-off-by: Dmitry Torokhov --- include/linux/gameport.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/gameport.h b/include/linux/gameport.h index 632d1265fbe0..b65a6f472775 100644 --- a/include/linux/gameport.h +++ b/include/linux/gameport.h @@ -53,8 +53,6 @@ struct gameport { #define to_gameport_port(d) container_of(d, struct gameport, dev) struct gameport_driver { - - void *private; const char *description; int (*connect)(struct gameport *, struct gameport_driver *drv); -- cgit v1.2.3 From 44432407d9f5e4b2e56c7eccb65d98cad4bba191 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 3 Sep 2010 07:20:04 +0000 Subject: fbdev: sh_mobile_lcdcfb: Support multiple video modes in platform data This is a preparation for HDMI hotplug support. This patch just moves all platform defined video modes for the sh_mobile_lcdcfb driver to separate arrays and switches all users to use element 0 of that array, so, this patch doesn't introduce any functional changes and as such should not cause any regressions. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/board-ap4evb.c | 98 ++++++++++++++++++++--------------- arch/sh/boards/mach-ap325rxa/setup.c | 29 ++++++----- arch/sh/boards/mach-ecovec24/setup.c | 60 ++++++++++++--------- arch/sh/boards/mach-kfr2r09/setup.c | 29 ++++++----- arch/sh/boards/mach-migor/setup.c | 58 +++++++++++---------- arch/sh/boards/mach-se/7724/setup.c | 54 ++++++++++++------- drivers/video/sh_mipi_dsi.c | 32 +++++++----- drivers/video/sh_mobile_hdmi.c | 3 +- drivers/video/sh_mobile_lcdcfb.c | 10 ++-- include/video/sh_mobile_lcdc.h | 3 +- 10 files changed, 219 insertions(+), 157 deletions(-) (limited to 'include') diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index 31e5b28ab9b4..4e883e0fb01b 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -375,10 +375,40 @@ static struct platform_device usb1_host_device = { .resource = usb1_host_resources, }; +const static struct fb_videomode ap4evb_lcdc_modes[] = { + { +#ifdef CONFIG_AP4EVB_QHD + .name = "R63302(QHD)", + .xres = 544, + .yres = 961, + .left_margin = 72, + .right_margin = 600, + .hsync_len = 16, + .upper_margin = 8, + .lower_margin = 8, + .vsync_len = 2, + .sync = FB_SYNC_VERT_HIGH_ACT | FB_SYNC_HOR_HIGH_ACT, +#else + .name = "WVGA Panel", + .xres = 800, + .yres = 480, + .left_margin = 220, + .right_margin = 110, + .hsync_len = 70, + .upper_margin = 20, + .lower_margin = 5, + .vsync_len = 5, + .sync = 0, +#endif + }, +}; + static struct sh_mobile_lcdc_info lcdc_info = { .ch[0] = { .chan = LCDC_CHAN_MAINLCD, .bpp = 16, + .lcd_cfg = ap4evb_lcdc_modes, + .num_cfg = ARRAY_SIZE(ap4evb_lcdc_modes), } }; @@ -569,6 +599,31 @@ static struct platform_device fsi_device = { }, }; +const static struct fb_videomode ap4evb_hdmi_modes[] = { + { + .name = "HDMI 720p", + .xres = 1280, + .yres = 720, + + /* + * If left and right margins are not multiples of 8, + * LDHAJR will be adjusted accordingly by the LCDC + * driver. Until we start using EDID, these values + * might have to be adjusted for different monitors. + */ + .left_margin = 200, + .right_margin = 88, + .hsync_len = 48, + + .upper_margin = 20, + .lower_margin = 5, + .vsync_len = 5, + + .pixclock = 13468, + .sync = FB_SYNC_VERT_HIGH_ACT | FB_SYNC_HOR_HIGH_ACT, + }, +}; + static struct sh_mobile_lcdc_info sh_mobile_lcdc1_info = { .clock_source = LCDC_CLK_EXTERNAL, .ch[0] = { @@ -577,26 +632,8 @@ static struct sh_mobile_lcdc_info sh_mobile_lcdc1_info = { .interface_type = RGB24, .clock_divider = 1, .flags = LCDC_FLAGS_DWPOL, - .lcd_cfg = { - .name = "HDMI", - /* So far only 720p is supported */ - .xres = 1280, - .yres = 720, - /* - * If left and right margins are not multiples of 8, - * LDHAJR will be adjusted accordingly by the LCDC - * driver. Until we start using EDID, these values - * might have to be adjusted for different monitors. - */ - .left_margin = 200, - .right_margin = 88, - .hsync_len = 48, - .upper_margin = 20, - .lower_margin = 5, - .vsync_len = 5, - .pixclock = 13468, - .sync = FB_SYNC_VERT_HIGH_ACT | FB_SYNC_HOR_HIGH_ACT, - }, + .lcd_cfg = ap4evb_hdmi_modes, + .num_cfg = ARRAY_SIZE(ap4evb_hdmi_modes), } }; @@ -960,17 +997,6 @@ static void __init ap4evb_init(void) lcdc_info.ch[0].interface_type = RGB24; lcdc_info.ch[0].clock_divider = 1; lcdc_info.ch[0].flags = LCDC_FLAGS_DWPOL; - lcdc_info.ch[0].lcd_cfg.name = "R63302(QHD)"; - lcdc_info.ch[0].lcd_cfg.xres = 544; - lcdc_info.ch[0].lcd_cfg.yres = 961; - lcdc_info.ch[0].lcd_cfg.left_margin = 72; - lcdc_info.ch[0].lcd_cfg.right_margin = 600; - lcdc_info.ch[0].lcd_cfg.hsync_len = 16; - lcdc_info.ch[0].lcd_cfg.upper_margin = 8; - lcdc_info.ch[0].lcd_cfg.lower_margin = 8; - lcdc_info.ch[0].lcd_cfg.vsync_len = 2; - lcdc_info.ch[0].lcd_cfg.sync = FB_SYNC_VERT_HIGH_ACT | - FB_SYNC_HOR_HIGH_ACT; lcdc_info.ch[0].lcd_size_cfg.width = 44; lcdc_info.ch[0].lcd_size_cfg.height = 79; @@ -1013,16 +1039,6 @@ static void __init ap4evb_init(void) lcdc_info.ch[0].interface_type = RGB18; lcdc_info.ch[0].clock_divider = 2; lcdc_info.ch[0].flags = 0; - lcdc_info.ch[0].lcd_cfg.name = "WVGA Panel"; - lcdc_info.ch[0].lcd_cfg.xres = 800; - lcdc_info.ch[0].lcd_cfg.yres = 480; - lcdc_info.ch[0].lcd_cfg.left_margin = 220; - lcdc_info.ch[0].lcd_cfg.right_margin = 110; - lcdc_info.ch[0].lcd_cfg.hsync_len = 70; - lcdc_info.ch[0].lcd_cfg.upper_margin = 20; - lcdc_info.ch[0].lcd_cfg.lower_margin = 5; - lcdc_info.ch[0].lcd_cfg.vsync_len = 5; - lcdc_info.ch[0].lcd_cfg.sync = 0; lcdc_info.ch[0].lcd_size_cfg.width = 152; lcdc_info.ch[0].lcd_size_cfg.height = 91; diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c index 3da116f47f01..00553233a7c5 100644 --- a/arch/sh/boards/mach-ap325rxa/setup.c +++ b/arch/sh/boards/mach-ap325rxa/setup.c @@ -176,6 +176,21 @@ static void ap320_wvga_power_off(void *board_data) __raw_writew(0, FPGA_LCDREG); } +const static struct fb_videomode ap325rxa_lcdc_modes[] = { + { + .name = "LB070WV1", + .xres = 800, + .yres = 480, + .left_margin = 32, + .right_margin = 160, + .hsync_len = 8, + .upper_margin = 63, + .lower_margin = 80, + .vsync_len = 1, + .sync = 0, /* hsync and vsync are active low */ + }, +}; + static struct sh_mobile_lcdc_info lcdc_info = { .clock_source = LCDC_CLK_EXTERNAL, .ch[0] = { @@ -183,18 +198,8 @@ static struct sh_mobile_lcdc_info lcdc_info = { .bpp = 16, .interface_type = RGB18, .clock_divider = 1, - .lcd_cfg = { - .name = "LB070WV1", - .xres = 800, - .yres = 480, - .left_margin = 32, - .right_margin = 160, - .hsync_len = 8, - .upper_margin = 63, - .lower_margin = 80, - .vsync_len = 1, - .sync = 0, /* hsync and vsync are active low */ - }, + .lcd_cfg = ap325rxa_lcdc_modes, + .num_cfg = ARRAY_SIZE(ap325rxa_lcdc_modes), .lcd_size_cfg = { /* 7.0 inch */ .width = 152, .height = 91, diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c index 1d7b495a7db4..feadf5dada77 100644 --- a/arch/sh/boards/mach-ecovec24/setup.c +++ b/arch/sh/boards/mach-ecovec24/setup.c @@ -231,14 +231,41 @@ static struct platform_device usb1_common_device = { }; /* LCDC */ +const static struct fb_videomode ecovec_lcd_modes[] = { + { + .name = "Panel", + .xres = 800, + .yres = 480, + .left_margin = 220, + .right_margin = 110, + .hsync_len = 70, + .upper_margin = 20, + .lower_margin = 5, + .vsync_len = 5, + .sync = 0, /* hsync and vsync are active low */ + }, +}; + +const static struct fb_videomode ecovec_dvi_modes[] = { + { + .name = "DVI", + .xres = 1280, + .yres = 720, + .left_margin = 220, + .right_margin = 110, + .hsync_len = 40, + .upper_margin = 20, + .lower_margin = 5, + .vsync_len = 5, + .sync = 0, /* hsync and vsync are active low */ + }, +}; + static struct sh_mobile_lcdc_info lcdc_info = { .ch[0] = { .interface_type = RGB18, .chan = LCDC_CHAN_MAINLCD, .bpp = 16, - .lcd_cfg = { - .sync = 0, /* hsync and vsync are active low */ - }, .lcd_size_cfg = { /* 7.0 inch */ .width = 152, .height = 91, @@ -1079,33 +1106,18 @@ static int __init arch_setup(void) if (gpio_get_value(GPIO_PTE6)) { /* DVI */ lcdc_info.clock_source = LCDC_CLK_EXTERNAL; - lcdc_info.ch[0].clock_divider = 1, - lcdc_info.ch[0].lcd_cfg.name = "DVI"; - lcdc_info.ch[0].lcd_cfg.xres = 1280; - lcdc_info.ch[0].lcd_cfg.yres = 720; - lcdc_info.ch[0].lcd_cfg.left_margin = 220; - lcdc_info.ch[0].lcd_cfg.right_margin = 110; - lcdc_info.ch[0].lcd_cfg.hsync_len = 40; - lcdc_info.ch[0].lcd_cfg.upper_margin = 20; - lcdc_info.ch[0].lcd_cfg.lower_margin = 5; - lcdc_info.ch[0].lcd_cfg.vsync_len = 5; + lcdc_info.ch[0].clock_divider = 1; + lcdc_info.ch[0].lcd_cfg = ecovec_dvi_modes; + lcdc_info.ch[0].num_cfg = ARRAY_SIZE(ecovec_dvi_modes); gpio_set_value(GPIO_PTA2, 1); gpio_set_value(GPIO_PTU1, 1); } else { /* Panel */ - lcdc_info.clock_source = LCDC_CLK_PERIPHERAL; - lcdc_info.ch[0].clock_divider = 2, - lcdc_info.ch[0].lcd_cfg.name = "Panel"; - lcdc_info.ch[0].lcd_cfg.xres = 800; - lcdc_info.ch[0].lcd_cfg.yres = 480; - lcdc_info.ch[0].lcd_cfg.left_margin = 220; - lcdc_info.ch[0].lcd_cfg.right_margin = 110; - lcdc_info.ch[0].lcd_cfg.hsync_len = 70; - lcdc_info.ch[0].lcd_cfg.upper_margin = 20; - lcdc_info.ch[0].lcd_cfg.lower_margin = 5; - lcdc_info.ch[0].lcd_cfg.vsync_len = 5; + lcdc_info.ch[0].clock_divider = 2; + lcdc_info.ch[0].lcd_cfg = ecovec_lcd_modes; + lcdc_info.ch[0].num_cfg = ARRAY_SIZE(ecovec_lcd_modes); gpio_set_value(GPIO_PTR1, 1); diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c index 68994a163f6c..87d4b90e368c 100644 --- a/arch/sh/boards/mach-kfr2r09/setup.c +++ b/arch/sh/boards/mach-kfr2r09/setup.c @@ -126,6 +126,21 @@ static struct platform_device kfr2r09_sh_keysc_device = { }, }; +const static struct fb_videomode kfr2r09_lcdc_modes[] = { + { + .name = "TX07D34VM0AAA", + .xres = 240, + .yres = 400, + .left_margin = 0, + .right_margin = 16, + .hsync_len = 8, + .upper_margin = 0, + .lower_margin = 1, + .vsync_len = 1, + .sync = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, + }, +}; + static struct sh_mobile_lcdc_info kfr2r09_sh_lcdc_info = { .clock_source = LCDC_CLK_BUS, .ch[0] = { @@ -134,18 +149,8 @@ static struct sh_mobile_lcdc_info kfr2r09_sh_lcdc_info = { .interface_type = SYS18, .clock_divider = 6, .flags = LCDC_FLAGS_DWPOL, - .lcd_cfg = { - .name = "TX07D34VM0AAA", - .xres = 240, - .yres = 400, - .left_margin = 0, - .right_margin = 16, - .hsync_len = 8, - .upper_margin = 0, - .lower_margin = 1, - .vsync_len = 1, - .sync = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, - }, + .lcd_cfg = kfr2r09_lcdc_modes, + .num_cfg = ARRAY_SIZE(kfr2r09_lcdc_modes), .lcd_size_cfg = { .width = 35, .height = 58, diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c index 662debe4ead2..9204cbb87147 100644 --- a/arch/sh/boards/mach-migor/setup.c +++ b/arch/sh/boards/mach-migor/setup.c @@ -213,51 +213,55 @@ static struct platform_device migor_nand_flash_device = { } }; +const static struct fb_videomode migor_lcd_modes[] = { + { +#if defined(CONFIG_SH_MIGOR_RTA_WVGA) + .name = "LB070WV1", + .xres = 800, + .yres = 480, + .left_margin = 64, + .right_margin = 16, + .hsync_len = 120, + .sync = 0, +#elif defined(CONFIG_SH_MIGOR_QVGA) + .name = "PH240320T", + .xres = 320, + .yres = 240, + .left_margin = 0, + .right_margin = 16, + .hsync_len = 8, + .sync = FB_SYNC_HOR_HIGH_ACT, +#endif + .upper_margin = 1, + .lower_margin = 17, + .vsync_len = 2, + }, +}; + static struct sh_mobile_lcdc_info sh_mobile_lcdc_info = { -#ifdef CONFIG_SH_MIGOR_RTA_WVGA +#if defined(CONFIG_SH_MIGOR_RTA_WVGA) .clock_source = LCDC_CLK_BUS, .ch[0] = { .chan = LCDC_CHAN_MAINLCD, .bpp = 16, .interface_type = RGB16, .clock_divider = 2, - .lcd_cfg = { - .name = "LB070WV1", - .xres = 800, - .yres = 480, - .left_margin = 64, - .right_margin = 16, - .hsync_len = 120, - .upper_margin = 1, - .lower_margin = 17, - .vsync_len = 2, - .sync = 0, - }, + .lcd_cfg = migor_lcd_modes, + .num_cfg = ARRAY_SIZE(migor_lcd_modes), .lcd_size_cfg = { /* 7.0 inch */ .width = 152, .height = 91, }, } -#endif -#ifdef CONFIG_SH_MIGOR_QVGA +#elif defined(CONFIG_SH_MIGOR_QVGA) .clock_source = LCDC_CLK_PERIPHERAL, .ch[0] = { .chan = LCDC_CHAN_MAINLCD, .bpp = 16, .interface_type = SYS16A, .clock_divider = 10, - .lcd_cfg = { - .name = "PH240320T", - .xres = 320, - .yres = 240, - .left_margin = 0, - .right_margin = 16, - .hsync_len = 8, - .upper_margin = 1, - .lower_margin = 17, - .vsync_len = 2, - .sync = FB_SYNC_HOR_HIGH_ACT, - }, + .lcd_cfg = migor_lcd_modes, + .num_cfg = ARRAY_SIZE(migor_lcd_modes), .lcd_size_cfg = { /* 2.4 inch */ .width = 49, .height = 37, diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c index 552ebd9ba82b..3099c36759ad 100644 --- a/arch/sh/boards/mach-se/7724/setup.c +++ b/arch/sh/boards/mach-se/7724/setup.c @@ -144,16 +144,42 @@ static struct platform_device nor_flash_device = { }; /* LCDC */ +const static struct fb_videomode lcdc_720p_modes[] = { + { + .name = "LB070WV1", + .sync = 0, /* hsync and vsync are active low */ + .xres = 1280; + .yres = 720; + .left_margin = 220; + .right_margin = 110; + .hsync_len = 40; + .upper_margin = 20; + .lower_margin = 5; + .vsync_len = 5; + }, +}; + +const static struct fb_videomode lcdc_vga_modes[] = { + { + .name = "LB070WV1", + .sync = 0, /* hsync and vsync are active low */ + .xres = 640; + .yres = 480; + .left_margin = 105; + .right_margin = 50; + .hsync_len = 96; + .upper_margin = 33; + .lower_margin = 10; + .vsync_len = 2; + }, +}; + static struct sh_mobile_lcdc_info lcdc_info = { .clock_source = LCDC_CLK_EXTERNAL, .ch[0] = { .chan = LCDC_CHAN_MAINLCD, .bpp = 16, .clock_divider = 1, - .lcd_cfg = { - .name = "LB070WV1", - .sync = 0, /* hsync and vsync are active low */ - }, .lcd_size_cfg = { /* 7.0 inch */ .width = 152, .height = 91, @@ -909,24 +935,12 @@ static int __init devices_setup(void) if (sw & SW41_B) { /* 720p */ - lcdc_info.ch[0].lcd_cfg.xres = 1280; - lcdc_info.ch[0].lcd_cfg.yres = 720; - lcdc_info.ch[0].lcd_cfg.left_margin = 220; - lcdc_info.ch[0].lcd_cfg.right_margin = 110; - lcdc_info.ch[0].lcd_cfg.hsync_len = 40; - lcdc_info.ch[0].lcd_cfg.upper_margin = 20; - lcdc_info.ch[0].lcd_cfg.lower_margin = 5; - lcdc_info.ch[0].lcd_cfg.vsync_len = 5; + lcdc_info.ch[0].lcd_cfg = lcdc_720p_modes; + lcdc_info.ch[0].num_cfg = ARRAY_SIZE(lcdc_720p_modes); } else { /* VGA */ - lcdc_info.ch[0].lcd_cfg.xres = 640; - lcdc_info.ch[0].lcd_cfg.yres = 480; - lcdc_info.ch[0].lcd_cfg.left_margin = 105; - lcdc_info.ch[0].lcd_cfg.right_margin = 50; - lcdc_info.ch[0].lcd_cfg.hsync_len = 96; - lcdc_info.ch[0].lcd_cfg.upper_margin = 33; - lcdc_info.ch[0].lcd_cfg.lower_margin = 10; - lcdc_info.ch[0].lcd_cfg.vsync_len = 2; + lcdc_info.ch[0].lcd_cfg = lcdc_vga_modes; + lcdc_info.ch[0].num_cfg = ARRAY_SIZE(lcdc_vga_modes); } if (sw & SW41_A) { diff --git a/drivers/video/sh_mipi_dsi.c b/drivers/video/sh_mipi_dsi.c index 5699ce0c1780..3f3d431033ca 100644 --- a/drivers/video/sh_mipi_dsi.c +++ b/drivers/video/sh_mipi_dsi.c @@ -123,83 +123,87 @@ static int __init sh_mipi_setup(struct sh_mipi *mipi, u32 linelength; bool yuv; - /* Select data format */ + /* + * Select data format. MIPI DSI is not hot-pluggable, so, we just use + * the default videomode. If this ever becomes a problem, We'll have to + * move this to mipi_display_on() above and use info->var.xres + */ switch (pdata->data_format) { case MIPI_RGB888: pctype = 0; datatype = MIPI_DSI_PACKED_PIXEL_STREAM_24; pixfmt = MIPI_DCS_PIXEL_FMT_24BIT; - linelength = ch->lcd_cfg.xres * 3; + linelength = ch->lcd_cfg[0].xres * 3; yuv = false; break; case MIPI_RGB565: pctype = 1; datatype = MIPI_DSI_PACKED_PIXEL_STREAM_16; pixfmt = MIPI_DCS_PIXEL_FMT_16BIT; - linelength = ch->lcd_cfg.xres * 2; + linelength = ch->lcd_cfg[0].xres * 2; yuv = false; break; case MIPI_RGB666_LP: pctype = 2; datatype = MIPI_DSI_PIXEL_STREAM_3BYTE_18; pixfmt = MIPI_DCS_PIXEL_FMT_24BIT; - linelength = ch->lcd_cfg.xres * 3; + linelength = ch->lcd_cfg[0].xres * 3; yuv = false; break; case MIPI_RGB666: pctype = 3; datatype = MIPI_DSI_PACKED_PIXEL_STREAM_18; pixfmt = MIPI_DCS_PIXEL_FMT_18BIT; - linelength = (ch->lcd_cfg.xres * 18 + 7) / 8; + linelength = (ch->lcd_cfg[0].xres * 18 + 7) / 8; yuv = false; break; case MIPI_BGR888: pctype = 8; datatype = MIPI_DSI_PACKED_PIXEL_STREAM_24; pixfmt = MIPI_DCS_PIXEL_FMT_24BIT; - linelength = ch->lcd_cfg.xres * 3; + linelength = ch->lcd_cfg[0].xres * 3; yuv = false; break; case MIPI_BGR565: pctype = 9; datatype = MIPI_DSI_PACKED_PIXEL_STREAM_16; pixfmt = MIPI_DCS_PIXEL_FMT_16BIT; - linelength = ch->lcd_cfg.xres * 2; + linelength = ch->lcd_cfg[0].xres * 2; yuv = false; break; case MIPI_BGR666_LP: pctype = 0xa; datatype = MIPI_DSI_PIXEL_STREAM_3BYTE_18; pixfmt = MIPI_DCS_PIXEL_FMT_24BIT; - linelength = ch->lcd_cfg.xres * 3; + linelength = ch->lcd_cfg[0].xres * 3; yuv = false; break; case MIPI_BGR666: pctype = 0xb; datatype = MIPI_DSI_PACKED_PIXEL_STREAM_18; pixfmt = MIPI_DCS_PIXEL_FMT_18BIT; - linelength = (ch->lcd_cfg.xres * 18 + 7) / 8; + linelength = (ch->lcd_cfg[0].xres * 18 + 7) / 8; yuv = false; break; case MIPI_YUYV: pctype = 4; datatype = MIPI_DSI_PACKED_PIXEL_STREAM_YCBCR16; pixfmt = MIPI_DCS_PIXEL_FMT_16BIT; - linelength = ch->lcd_cfg.xres * 2; + linelength = ch->lcd_cfg[0].xres * 2; yuv = true; break; case MIPI_UYVY: pctype = 5; datatype = MIPI_DSI_PACKED_PIXEL_STREAM_YCBCR16; pixfmt = MIPI_DCS_PIXEL_FMT_16BIT; - linelength = ch->lcd_cfg.xres * 2; + linelength = ch->lcd_cfg[0].xres * 2; yuv = true; break; case MIPI_YUV420_L: pctype = 6; datatype = MIPI_DSI_PACKED_PIXEL_STREAM_YCBCR12; pixfmt = MIPI_DCS_PIXEL_FMT_12BIT; - linelength = (ch->lcd_cfg.xres * 12 + 7) / 8; + linelength = (ch->lcd_cfg[0].xres * 12 + 7) / 8; yuv = true; break; case MIPI_YUV420: @@ -207,7 +211,7 @@ static int __init sh_mipi_setup(struct sh_mipi *mipi, datatype = MIPI_DSI_PACKED_PIXEL_STREAM_YCBCR12; pixfmt = MIPI_DCS_PIXEL_FMT_12BIT; /* Length of U/V line */ - linelength = (ch->lcd_cfg.xres + 1) / 2; + linelength = (ch->lcd_cfg[0].xres + 1) / 2; yuv = true; break; default: @@ -281,7 +285,7 @@ static int __init sh_mipi_setup(struct sh_mipi *mipi, iowrite32(0x00e00000, base + 0x8024); /* VMCTR2 */ /* * 0x660 = 1632 bytes per line (RGB24, 544 pixels: see - * sh_mobile_lcdc_info.ch[0].lcd_cfg.xres), HSALEN = 1 - default + * sh_mobile_lcdc_info.ch[0].lcd_cfg[0].xres), HSALEN = 1 - default * (unused, since VMCTR2[HSABM] = 0) */ iowrite32(1 | (linelength << 16), base + 0x8028); /* VMLEN1 */ diff --git a/drivers/video/sh_mobile_hdmi.c b/drivers/video/sh_mobile_hdmi.c index a8117c33e75f..6608429c0fbb 100644 --- a/drivers/video/sh_mobile_hdmi.c +++ b/drivers/video/sh_mobile_hdmi.c @@ -777,7 +777,8 @@ static int __init sh_hdmi_probe(struct platform_device *pdev) goto egetclk; } - rate = PICOS2KHZ(pdata->lcd_chan->lcd_cfg.pixclock) * 1000; + /* TODO: reconfigure the clock on monitor plug in */ + rate = PICOS2KHZ(pdata->lcd_chan->lcd_cfg[0].pixclock) * 1000; rate = clk_round_rate(hdmi->hdmi_clk, rate); if (rate < 0) { diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c index e4f6012fe4e3..ce3ed4bc991f 100644 --- a/drivers/video/sh_mobile_lcdcfb.c +++ b/drivers/video/sh_mobile_lcdcfb.c @@ -989,8 +989,8 @@ static int sh_mobile_lcdc_notify(struct notifier_block *nb, int ret; /* Can we handle this display? */ - if (var->xres > ch->cfg.lcd_cfg.xres || - var->yres > ch->cfg.lcd_cfg.yres) + if (var->xres > ch->cfg.lcd_cfg[0].xres || + var->yres > ch->cfg.lcd_cfg[0].yres) return -ENOMEM; /* Add to the modelist */ @@ -1115,7 +1115,7 @@ static int __devinit sh_mobile_lcdc_probe(struct platform_device *pdev) info = ch->info; var = &info->var; - lcd_cfg = &cfg->lcd_cfg; + lcd_cfg = &cfg->lcd_cfg[0]; info->fbops = &sh_mobile_lcdc_ops; fb_videomode_to_var(var, lcd_cfg); /* Default Y virtual resolution is 2x panel size */ @@ -1187,8 +1187,8 @@ static int __devinit sh_mobile_lcdc_probe(struct platform_device *pdev) pdev->name, (ch->cfg.chan == LCDC_CHAN_MAINLCD) ? "mainlcd" : "sublcd", - (int) ch->cfg.lcd_cfg.xres, - (int) ch->cfg.lcd_cfg.yres, + (int) ch->cfg.lcd_cfg[0].xres, + (int) ch->cfg.lcd_cfg[0].yres, ch->cfg.bpp); /* deferred io mode: disable clock to save power */ diff --git a/include/video/sh_mobile_lcdc.h b/include/video/sh_mobile_lcdc.h index 55d700e8566e..19c69d788f3b 100644 --- a/include/video/sh_mobile_lcdc.h +++ b/include/video/sh_mobile_lcdc.h @@ -70,7 +70,8 @@ struct sh_mobile_lcdc_chan_cfg { int interface_type; /* selects RGBn or SYSn I/F, see above */ int clock_divider; unsigned long flags; /* LCDC_FLAGS_... */ - struct fb_videomode lcd_cfg; + const struct fb_videomode *lcd_cfg; + int num_cfg; struct sh_mobile_lcdc_lcd_size_cfg lcd_size_cfg; struct sh_mobile_lcdc_board_cfg board_cfg; struct sh_mobile_lcdc_sys_bus_cfg sys_bus_cfg; /* only for SYSn I/F */ -- cgit v1.2.3 From 6de9edd5bde0cdfea12e9948690e53ec669c3018 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 3 Sep 2010 07:20:23 +0000 Subject: fbdev: sh_mobile_hdmi: implement locking The SH-Mobile HDMI driver runs in several contexts: ISR, delayed work-queue, task context, when called from the sh_mobile_lcdc framebuffer driver. This creates ample race possibilities. Even though most these races are purely theoretical, it is better to close them. To trace fb_info validity we install a notification callback in the HDMI driver, and the only way for it to get to driver internal data is by using struct sh_mobile_lcdc_chan, therefore it had to be extracted into a separate common header. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mundt --- drivers/video/sh_mobile_hdmi.c | 103 +++++++++++++++++++++++++++++++++------ drivers/video/sh_mobile_lcdcfb.c | 46 ++++++----------- drivers/video/sh_mobile_lcdcfb.h | 37 ++++++++++++++ include/video/sh_mobile_lcdc.h | 2 + 4 files changed, 141 insertions(+), 47 deletions(-) create mode 100644 drivers/video/sh_mobile_lcdcfb.h (limited to 'include') diff --git a/drivers/video/sh_mobile_hdmi.c b/drivers/video/sh_mobile_hdmi.c index a8cf9a510f30..56e44fd0a3af 100644 --- a/drivers/video/sh_mobile_hdmi.c +++ b/drivers/video/sh_mobile_hdmi.c @@ -26,6 +26,8 @@ #include