diff options
Diffstat (limited to 'drivers')
84 files changed, 9543 insertions, 2445 deletions
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 6cc0db1bf522..3f129b45451a 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -684,7 +684,7 @@ again: err = xenbus_switch_state(dev, XenbusStateConnected); if (err) - xenbus_dev_fatal(dev, err, "switching to Connected state", + xenbus_dev_fatal(dev, err, "%s: switching to Connected state", dev->nodename); return; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f245c588ae95..ce7914c4c044 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -262,6 +262,7 @@ enum intel_pch { }; #define QUIRK_PIPEA_FORCE (1<<0) +#define QUIRK_LVDS_SSC_DISABLE (1<<1) struct intel_fbdev; @@ -1194,7 +1195,9 @@ void i915_gem_free_all_phys_object(struct drm_device *dev); void i915_gem_release(struct drm_device *dev, struct drm_file *file); uint32_t -i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj); +i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev, + uint32_t size, + int tiling_mode); /* i915_gem_gtt.c */ void i915_gem_restore_gtt_mappings(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5c0d1247f453..a087e1bf0c2f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1374,25 +1374,24 @@ i915_gem_free_mmap_offset(struct drm_i915_gem_object *obj) } static uint32_t -i915_gem_get_gtt_size(struct drm_i915_gem_object *obj) +i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) { - struct drm_device *dev = obj->base.dev; - uint32_t size; + uint32_t gtt_size; if (INTEL_INFO(dev)->gen >= 4 || - obj->tiling_mode == I915_TILING_NONE) - return obj->base.size; + tiling_mode == I915_TILING_NONE) + return size; /* Previous chips need a power-of-two fence region when tiling */ if (INTEL_INFO(dev)->gen == 3) - size = 1024*1024; + gtt_size = 1024*1024; else - size = 512*1024; + gtt_size = 512*1024; - while (size < obj->base.size) - size <<= 1; + while (gtt_size < size) + gtt_size <<= 1; - return size; + return gtt_size; } /** @@ -1403,59 +1402,52 @@ i915_gem_get_gtt_size(struct drm_i915_gem_object *obj) * potential fence register mapping. */ static uint32_t -i915_gem_get_gtt_alignment(struct drm_i915_gem_object *obj) +i915_gem_get_gtt_alignment(struct drm_device *dev, + uint32_t size, + int tiling_mode) { - struct drm_device *dev = obj->base.dev; - /* * Minimum alignment is 4k (GTT page size), but might be greater * if a fence register is needed for the object. */ if (INTEL_INFO(dev)->gen >= 4 || - obj->tiling_mode == I915_TILING_NONE) + tiling_mode == I915_TILING_NONE) return 4096; /* * Previous chips need to be aligned to the size of the smallest * fence register that can contain the object. */ - return i915_gem_get_gtt_size(obj); + return i915_gem_get_gtt_size(dev, size, tiling_mode); } /** * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an * unfenced object - * @obj: object to check + * @dev: the device + * @size: size of the object + * @tiling_mode: tiling mode of the object * * Return the required GTT alignment for an object, only taking into account * unfenced tiled surface requirements. */ uint32_t -i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj) +i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev, + uint32_t size, + int tiling_mode) { - struct drm_device *dev = obj->base.dev; - int tile_height; - /* * Minimum alignment is 4k (GTT page size) for sane hw. */ if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) || - obj->tiling_mode == I915_TILING_NONE) + tiling_mode == I915_TILING_NONE) return 4096; - /* - * Older chips need unfenced tiled buffers to be aligned to the left - * edge of an even tile row (where tile rows are counted as if the bo is - * placed in a fenced gtt region). + /* Previous hardware however needs to be aligned to a power-of-two + * tile height. The simplest method for determining this is to reuse + * the power-of-tile object size. */ - if (IS_GEN2(dev)) - tile_height = 16; - else if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) - tile_height = 32; - else - tile_height = 8; - - return tile_height * obj->stride * 2; + return i915_gem_get_gtt_size(dev, size, tiling_mode); } int @@ -2744,9 +2736,16 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, return -EINVAL; } - fence_size = i915_gem_get_gtt_size(obj); - fence_alignment = i915_gem_get_gtt_alignment(obj); - unfenced_alignment = i915_gem_get_unfenced_gtt_alignment(obj); + fence_size = i915_gem_get_gtt_size(dev, + obj->base.size, + obj->tiling_mode); + fence_alignment = i915_gem_get_gtt_alignment(dev, + obj->base.size, + obj->tiling_mode); + unfenced_alignment = + i915_gem_get_unfenced_gtt_alignment(dev, + obj->base.size, + obj->tiling_mode); if (alignment == 0) alignment = map_and_fenceable ? fence_alignment : diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 82d70fd9e933..99c4faa59d8f 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -348,7 +348,9 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, /* Rebind if we need a change of alignment */ if (!obj->map_and_fenceable) { u32 unfenced_alignment = - i915_gem_get_unfenced_gtt_alignment(obj); + i915_gem_get_unfenced_gtt_alignment(dev, + obj->base.size, + args->tiling_mode); if (obj->gtt_offset & (unfenced_alignment - 1)) ret = i915_gem_object_unbind(obj); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 21b6f93fe919..0f1c799afea1 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4305,7 +4305,8 @@ static void intel_update_watermarks(struct drm_device *dev) static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv) { - return dev_priv->lvds_use_ssc && i915_panel_use_ssc; + return dev_priv->lvds_use_ssc && i915_panel_use_ssc + && !(dev_priv->quirks & QUIRK_LVDS_SSC_DISABLE); } static int i9xx_crtc_mode_set(struct drm_crtc *crtc, @@ -7810,6 +7811,15 @@ static void quirk_pipea_force (struct drm_device *dev) DRM_DEBUG_DRIVER("applying pipe a force quirk\n"); } +/* + * Some machines (Lenovo U160) do not work with SSC on LVDS for some reason + */ +static void quirk_ssc_force_disable(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + dev_priv->quirks |= QUIRK_LVDS_SSC_DISABLE; +} + struct intel_quirk { int device; int subsystem_vendor; @@ -7838,6 +7848,9 @@ struct intel_quirk intel_quirks[] = { /* 855 & before need to leave pipe A & dpll A up */ { 0x3582, PCI_ANY_ID, PCI_ANY_ID, quirk_pipea_force }, { 0x2562, PCI_ANY_ID, PCI_ANY_ID, quirk_pipea_force }, + + /* Lenovo U160 cannot use SSC on LVDS */ + { 0x0046, 0x17aa, 0x3920, quirk_ssc_force_disable }, }; static void intel_init_quirks(struct drm_device *dev) diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index efa202499e37..2535933c49f8 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -117,7 +117,7 @@ static __init int map_switcher(void) /* * Now the Switcher is mapped at the right address, we can't fail! - * Copy in the compiled-in Switcher code (from <arch>_switcher.S). + * Copy in the compiled-in Switcher code (from x86/switcher_32.S). */ memcpy(switcher_vma->addr, start_switcher_text, end_switcher_text - start_switcher_text); diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index daaf86631647..28433a155d67 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c @@ -375,11 +375,9 @@ static bool direct_trap(unsigned int num) /* * The Host needs to see page faults (for shadow paging and to save the * fault address), general protection faults (in/out emulation) and - * device not available (TS handling), invalid opcode fault (kvm hcall), - * and of course, the hypercall trap. + * device not available (TS handling) and of course, the hypercall trap. */ - return num != 14 && num != 13 && num != 7 && - num != 6 && num != LGUEST_TRAP_ENTRY; + return num != 14 && num != 13 && num != 7 && num != LGUEST_TRAP_ENTRY; } /*:*/ @@ -429,8 +427,8 @@ void pin_stack_pages(struct lg_cpu *cpu) /* * Direct traps also mean that we need to know whenever the Guest wants to use - * a different kernel stack, so we can change the IDT entries to use that - * stack. The IDT entries expect a virtual address, so unlike most addresses + * a different kernel stack, so we can change the guest TSS to use that + * stack. The TSS entries expect a virtual address, so unlike most addresses * the Guest gives us, the "esp" (stack pointer) value here is virtual, not * physical. * diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 9136411fadd5..295df06e6590 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -59,6 +59,8 @@ struct lg_cpu { struct lguest_pages *last_pages; + /* Initialization mode: linear map everything. */ + bool linear_pages; int cpu_pgd; /* Which pgd this cpu is currently using */ /* If a hypercall was asked for, this points to the arguments. */ diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index 69c84a1d88ea..5289ffa2e500 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -109,6 +109,17 @@ static u32 lg_get_features(struct virtio_device *vdev) } /* + * To notify on reset or feature finalization, we (ab)use the NOTIFY + * hypercall, with the descriptor address of the device. + */ +static void status_notify(struct virtio_device *vdev) +{ + unsigned long offset = (void *)to_lgdev(vdev)->desc - lguest_devices; + + hcall(LHCALL_NOTIFY, (max_pfn << PAGE_SHIFT) + offset, 0, 0, 0); +} + +/* * The virtio core takes the features the Host offers, and copies the ones * supported by the driver into the vdev->features array. Once that's all * sorted out, this routine is called so we can tell the Host which features we @@ -135,6 +146,9 @@ static void lg_finalize_features(struct virtio_device *vdev) if (test_bit(i, vdev->features)) out_features[i / 8] |= (1 << (i % 8)); } + + /* Tell Host we've finished with this device's feature negotiation */ + status_notify(vdev); } /* Once they've found a field, getting a copy of it is easy. */ @@ -168,28 +182,21 @@ static u8 lg_get_status(struct virtio_device *vdev) return to_lgdev(vdev)->desc->status; } -/* - * To notify on status updates, we (ab)use the NOTIFY hypercall, with the - * descriptor address of the device. A zero status means "reset". - */ -static void set_status(struct virtio_device *vdev, u8 status) -{ - unsigned long offset = (void *)to_lgdev(vdev)->desc - lguest_devices; - - /* We set the status. */ - to_lgdev(vdev)->desc->status = status; - hcall(LHCALL_NOTIFY, (max_pfn << PAGE_SHIFT) + offset, 0, 0, 0); -} - static void lg_set_status(struct virtio_device *vdev, u8 status) { BUG_ON(!status); - set_status(vdev, status); + to_lgdev(vdev)->desc->status = status; + + /* Tell Host immediately if we failed. */ + if (status & VIRTIO_CONFIG_S_FAILED) + status_notify(vdev); } static void lg_reset(struct virtio_device *vdev) { - set_status(vdev, 0); + /* 0 status means "reset" */ + to_lgdev(vdev)->desc->status = 0; + status_notify(vdev); } /* diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index 948c547b8e9e..f97e625241ad 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -1,8 +1,10 @@ -/*P:200 This contains all the /dev/lguest code, whereby the userspace launcher - * controls and communicates with the Guest. For example, the first write will - * tell us the Guest's memory layout and entry point. A read will run the - * Guest until something happens, such as a signal or the Guest doing a NOTIFY - * out to the Launcher. +/*P:200 This contains all the /dev/lguest code, whereby the userspace + * launcher controls and communicates with the Guest. For example, + * the first write will tell us the Guest's memory layout and entry + * point. A read will run the Guest until something happens, such as + * a signal or the Guest doing a NOTIFY out to the Launcher. There is + * also a way for the Launcher to attach eventfds to particular NOTIFY + * values instead of returning from the read() call. :*/ #include <linux/uaccess.h> #include <linux/miscdevice.h> @@ -357,8 +359,8 @@ static int initialize(struct file *file, const unsigned long __user *input) goto free_eventfds; /* - * Initialize the Guest's shadow page tables, using the toplevel - * address the Launcher gave us. This allocates memory, so can fail. + * Initialize the Guest's shadow page tables. This allocates + * memory, so can fail. */ err = init_guest_pagetable(lg); if (err) @@ -516,6 +518,7 @@ static const struct file_operations lguest_fops = { .read = read, .llseek = default_llseek, }; +/*:*/ /* * This is a textbook example of a "misc" character device. Populate a "struct diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index d21578ee95de..3b62be160a6e 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -17,7 +17,6 @@ #include <linux/percpu.h> #include <asm/tlbflush.h> #include <asm/uaccess.h> -#include <asm/bootparam.h> #include "lg.h" /*M:008 @@ -156,7 +155,7 @@ static pte_t *spte_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr) } /* - * These functions are just like the above two, except they access the Guest + * These functions are just like the above, except they access the Guest * page tables. Hence they return a Guest address. */ static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr) @@ -196,7 +195,7 @@ static unsigned long gpte_addr(struct lg_cpu *cpu, #endif /*:*/ -/*M:014 +/*M:007 * get_pfn is slow: we could probably try to grab batches of pages here as * an optimization (ie. pre-faulting). :*/ @@ -325,10 +324,15 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) #endif /* First step: get the top-level Guest page table entry. */ - gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t); - /* Toplevel not present? We can't map it in. */ - if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) - return false; + if (unlikely(cpu->linear_pages)) { + /* Faking up a linear mapping. */ + gpgd = __pgd(CHECK_GPGD_MASK); + } else { + gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t); + /* Toplevel not present? We can't map it in. */ + if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) + return false; + } /* Now look at the matching shadow entry. */ spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr); @@ -353,10 +357,15 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) } #ifdef CONFIG_X86_PAE - gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t); - /* Middle level not present? We can't map it in. */ - if (!(pmd_flags(gpmd) & _PAGE_PRESENT)) - return false; + if (unlikely(cpu->linear_pages)) { + /* Faking up a linear mapping. */ + gpmd = __pmd(_PAGE_TABLE); + } else { + gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t); + /* Middle level not present? We can't map it in. */ + if (!(pmd_flags(gpmd) & _PAGE_PRESENT)) + return false; + } /* Now look at the matching shadow entry. */ spmd = spmd_addr(cpu, *spgd, vaddr); @@ -397,8 +406,13 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) gpte_ptr = gpte_addr(cpu, gpgd, vaddr); #endif - /* Read the actual PTE value. */ - gpte = lgread(cpu, gpte_ptr, pte_t); + if (unlikely(cpu->linear_pages)) { + /* Linear? Make up a PTE which points to same page. */ + gpte = __pte((vaddr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT); + } else { + /* Read the actual PTE value. */ + gpte = lgread(cpu, gpte_ptr, pte_t); + } /* If this page isn't in the Guest page tables, we can't page it in. */ if (!(pte_flags(gpte) & _PAGE_PRESENT)) @@ -454,7 +468,8 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) * Finally, we write the Guest PTE entry back: we've set the * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */ - lgwrite(cpu, gpte_ptr, pte_t, gpte); + if (likely(!cpu->linear_pages)) + lgwrite(cpu, gpte_ptr, pte_t, gpte); /* * The fault is fixed, the page table is populated, the mapping @@ -612,6 +627,11 @@ unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr) #ifdef CONFIG_X86_PAE pmd_t gpmd; #endif + + /* Still not set up? Just map 1:1. */ + if (unlikely(cpu->linear_pages)) + return vaddr; + /* First step: get the top-level Guest page table entry. */ gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t); /* Toplevel not present? We can't map it in. */ @@ -708,32 +728,6 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, return next; } -/*H:430 - * (iv) Switching page tables - * - * Now we've seen all the page table setting and manipulation, let's see - * what happens when the Guest changes page tables (ie. changes the top-level - * pgdir). This occurs on almost every context switch. - */ -void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable) -{ - int newpgdir, repin = 0; - - /* Look to see if we have this one already. */ - newpgdir = find_pgdir(cpu->lg, pgtable); - /* - * If not, we allocate or mug an existing one: if it's a fresh one, - * repin gets set to 1. - */ - if (newpgdir == ARRAY_SIZE(cpu->lg->pgdirs)) - newpgdir = new_pgdir(cpu, pgtable, &repin); - /* Change the current pgd index to the new one. */ - cpu->cpu_pgd = newpgdir; - /* If it was completely blank, we map in the Guest kernel stack */ - if (repin) - pin_stack_pages(cpu); -} - /*H:470 * Finally, a routine which throws away everything: all PGD entries in all * the shadow page tables, including the Guest's kernel mappings. This is used @@ -780,6 +774,44 @@ void guest_pagetable_clear_all(struct lg_cpu *cpu) /* We need the Guest kernel stack mapped again. */ pin_stack_pages(cpu); } + +/*H:430 + * (iv) Switching page tables + * + * Now we've seen all the page table setting and manipulation, let's see + * what happens when the Guest changes page tables (ie. changes the top-level + * pgdir). This occurs on almost every context switch. + */ +void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable) +{ + int newpgdir, repin = 0; + + /* + * The very first time they call this, we're actually running without + * any page tables; we've been making it up. Throw them away now. + */ + if (unlikely(cpu->linear_pages)) { + release_all_pagetables(cpu->lg); + cpu->linear_pages = false; + /* Force allocation of a new pgdir. */ + newpgdir = ARRAY_SIZE(cpu->lg->pgdirs); + } else { + /* Look to see if we have this one already. */ + newpgdir = find_pgdir(cpu->lg, pgtable); + } + + /* + * If not, we allocate or mug an existing one: if it's a fresh one, + * repin gets set to 1. + */ + if (newpgdir == ARRAY_SIZE(cpu->lg->pgdirs)) + newpgdir = new_pgdir(cpu, pgtable, &repin); + /* Change the current pgd index to the new one. */ + cpu->cpu_pgd = newpgdir; + /* If it was completely blank, we map in the Guest kernel stack */ + if (repin) + pin_stack_pages(cpu); +} /*:*/ /*M:009 @@ -919,168 +951,26 @@ void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx) } #endif -/*H:505 - * To get through boot, we construct simple identity page mappings (which - * set virtual == physical) and linear mappings which will get the Guest far - * enough into the boot to create its own. The linear mapping means we - * simplify the Guest boot, but it makes assumptions about their PAGE_OFFSET, - * as you'll see. - * - * We lay them out of the way, just below the initrd (which is why we need to - * know its size here). - */ -static unsigned long setup_pagetables(struct lguest *lg, - unsigned long mem, - unsigned long initrd_size) -{ - pgd_t __user *pgdir; - pte_t __user *linear; - unsigned long mem_base = (unsigned long)lg->mem_base; - unsigned int mapped_pages, i, linear_pages; -#ifdef CONFIG_X86_PAE - pmd_t __user *pmds; - unsigned int j; - pgd_t pgd; - pmd_t pmd; -#else - unsigned int phys_linear; -#endif - - /* - * We have mapped_pages frames to map, so we need linear_pages page - * tables to map them. - */ - mapped_pages = mem / PAGE_SIZE; - linear_pages = (mapped_pages + PTRS_PER_PTE - 1) / PTRS_PER_PTE; - - /* We put the toplevel page directory page at the top of memory. */ - pgdir = (pgd_t *)(mem + mem_base - initrd_size - PAGE_SIZE); - - /* Now we use the next linear_pages pages as pte pages */ - linear = (void *)pgdir - linear_pages * PAGE_SIZE; - -#ifdef CONFIG_X86_PAE - /* - * And the single mid page goes below that. We only use one, but - * that's enough to map 1G, which definitely gets us through boot. - */ - pmds = (void *)linear - PAGE_SIZE; -#endif - /* - * Linear mapping is easy: put every page's address into the - * mapping in order. - */ - for (i = 0; i < mapped_pages; i++) { - pte_t pte; - pte = pfn_pte(i, __pgprot(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER)); - if (copy_to_user(&linear[i], &pte, sizeof(pte)) != 0) - return -EFAULT; - } - -#ifdef CONFIG_X86_PAE - /* - * Make the Guest PMD entries point to the corresponding place in the - * linear mapping (up to one page worth of PMD). - */ - for (i = j = 0; i < mapped_pages && j < PTRS_PER_PMD; - i += PTRS_PER_PTE, j++) { - pmd = pfn_pmd(((unsigned long)&linear[i] - mem_base)/PAGE_SIZE, - __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER)); - - if (copy_to_user(&pmds[j], &pmd, sizeof(pmd)) != 0) - return -EFAULT; - } - - /* One PGD entry, pointing to that PMD page. */ - pgd = __pgd(((unsigned long)pmds - mem_base) | _PAGE_PRESENT); - /* Copy it in as the first PGD entry (ie. addresses 0-1G). */ - if (copy_to_user(&pgdir[0], &pgd, sizeof(pgd)) != 0) - return -EFAULT; - /* - * And the other PGD entry to make the linear mapping at PAGE_OFFSET - */ - if (copy_to_user(&pgdir[KERNEL_PGD_BOUNDARY], &pgd, sizeof(pgd))) - return -EFAULT; -#else - /* - * The top level points to the linear page table pages above. - * We setup the identity and linear mappings here. - */ - phys_linear = (unsigned long)linear - mem_base; - for (i = 0; i < mapped_pages; i += PTRS_PER_PTE) { - pgd_t pgd; - /* - * Create a PGD entry which points to the right part of the - * linear PTE pages. - */ - pgd = __pgd((phys_linear + i * sizeof(pte_t)) | - (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER)); - - /* - * Copy it into the PGD page at 0 and PAGE_OFFSET. - */ - if (copy_to_user(&pgdir[i / PTRS_PER_PTE], &pgd, sizeof(pgd)) - || copy_to_user(&pgdir[pgd_index(PAGE_OFFSET) - + i / PTRS_PER_PTE], - &pgd, sizeof(pgd))) - return -EFAULT; - } -#endif - - /* - * We return the top level (guest-physical) address: we remember where - * this is to write it into lguest_data when the Guest initializes. - */ - return (unsigned long)pgdir - mem_base; -} - /*H:500 * (vii) Setting up the page tables initially. * - * When a Guest is first created, the Launcher tells us where the toplevel of - * its first page table is. We set some things up here: + * When a Guest is first created, set initialize a shadow page table which + * we will populate on future faults. The Guest doesn't have any actual + * pagetables yet, so we set linear_pages to tell demand_page() to fake it + * for the moment. */ int init_guest_pagetable(struct lguest *lg) { - u64 mem; - u32 initrd_size; - struct boot_params __user *boot = (struct boot_params *)lg->mem_base; -#ifdef CONFIG_X86_PAE - pgd_t *pgd; - pmd_t *pmd_table; -#endif - /* - * Get the Guest memory size and the ramdisk size from the boot header - * located at lg->mem_base (Guest address 0). - */ - if (copy_from_user(&mem, &boot->e820_map[0].size, sizeof(mem)) - || get_user(initrd_size, &boot->hdr.ramdisk_size)) - return -EFAULT; + struct lg_cpu *cpu = &lg->cpus[0]; + int allocated = 0; - /* - * We start on the first shadow page table, and give it a blank PGD - * page. - */ - lg->pgdirs[0].gpgdir = setup_pagetables(lg, mem, initrd_size); - if (IS_ERR_VALUE(lg->pgdirs[0].gpgdir)) - return lg->pgdirs[0].gpgdir; - lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); - if (!lg->pgdirs[0].pgdir) + /* lg (and lg->cpus[]) starts zeroed: this allocates a new pgdir */ + cpu->cpu_pgd = new_pgdir(cpu, 0, &allocated); + if (!allocated) return -ENOMEM; -#ifdef CONFIG_X86_PAE - /* For PAE, we also create the initial mid-level. */ - pgd = lg->pgdirs[0].pgdir; - pmd_table = (pmd_t *) get_zeroed_page(GFP_KERNEL); - if (!pmd_table) - return -ENOMEM; - - set_pgd(pgd + SWITCHER_PGD_INDEX, - __pgd(__pa(pmd_table) | _PAGE_PRESENT)); -#endif - - /* This is the current page table. */ - lg->cpus[0].cpu_pgd = 0; + /* We start with a linear mapping until the initialize. */ + cpu->linear_pages = true; return 0; } @@ -1095,10 +985,10 @@ void page_table_guest_data_init(struct lg_cpu *cpu) * of virtual addresses used by the Switcher. */ || put_user(RESERVE_MEM * 1024 * 1024, - &cpu->lg->lguest_data->reserve_mem) - || put_user(cpu->lg->pgdirs[0].gpgdir, - &cpu->lg->lguest_data->pgdir)) + &cpu->lg->lguest_data->reserve_mem)) { kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); + return; + } /* * In flush_user_mappings() we loop from 0 to diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 9f1659c3d1f3..65af42f2d593 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -269,10 +269,10 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) static int emulate_insn(struct lg_cpu *cpu) { u8 insn; - unsigned int insnlen = 0, in = 0, shift = 0; + unsigned int insnlen = 0, in = 0, small_operand = 0; /* * The eip contains the *virtual* address of the Guest's instruction: - * guest_pa just subtracts the Guest's page_offset. + * walk the Guest's page tables to find the "physical" address. */ unsigned long physaddr = guest_pa(cpu, cpu->regs->eip); @@ -300,11 +300,10 @@ static int emulate_insn(struct lg_cpu *cpu) } /* - * 0x66 is an "operand prefix". It means it's using the upper 16 bits - * of the eax register. + * 0x66 is an "operand prefix". It means a 16, not 32 bit in/out. */ if (insn == 0x66) { - shift = 16; + small_operand = 1; /* The instruction is 1 byte so far, read the next byte. */ insnlen = 1; insn = lgread(cpu, physaddr + insnlen, u8); @@ -340,11 +339,14 @@ static int emulate_insn(struct lg_cpu *cpu) * traditionally means "there's nothing there". */ if (in) { - /* Lower bit tells is whether it's a 16 or 32 bit access */ - if (insn & 0x1) - cpu->regs->eax = 0xFFFFFFFF; - else - cpu->regs->eax |= (0xFFFF << shift); + /* Lower bit tells means it's a 32/16 bit access */ + if (insn & 0x1) { + if (small_operand) + cpu->regs->eax |= 0xFFFF; + else + cpu->regs->eax = 0xFFFFFFFF; + } else + cpu->regs->eax |= 0xFF; } /* Finally, we've "done" the instruction, so move past it. */ cpu->regs->eip += insnlen; @@ -352,69 +354,6 @@ static int emulate_insn(struct lg_cpu *cpu) return 1; } -/* - * Our hypercalls mechanism used to be based on direct software interrupts. - * After Anthony's "Refactor hypercall infrastructure" kvm patch, we decided to - * change over to using kvm hypercalls. - * - * KVM_HYPERCALL is actually a "vmcall" instruction, which generates an invalid - * opcode fault (fault 6) on non-VT cpus, so the easiest solution seemed to be - * an *emulation approach*: if the fault was really produced by an hypercall - * (is_hypercall() does exactly this check), we can just call the corresponding - * hypercall host implementation function. - * - * But these invalid opcode faults are notably slower than software interrupts. - * So we implemented the *patching (or rewriting) approach*: every time we hit - * the KVM_HYPERCALL opcode in Guest code, we patch it to the old "int 0x1f" - * opcode, so next time the Guest calls this hypercall it will use the - * faster trap mechanism. - * - * Matias even benchmarked it to convince you: this shows the average cycle - * cost of a hypercall. For each alternative solution mentioned above we've - * made 5 runs of the benchmark: - * - * 1) direct software interrupt: 2915, 2789, 2764, 2721, 2898 - * 2) emulation technique: 3410, 3681, 3466, 3392, 3780 - * 3) patching (rewrite) technique: 2977, 2975, 2891, 2637, 2884 - * - * One two-line function is worth a 20% hypercall speed boost! - */ -static void rewrite_hypercall(struct lg_cpu *cpu) -{ - /* - * This are the opcodes we use to patch the Guest. The opcode for "int - * $0x1f" is "0xcd 0x1f" but vmcall instruction is 3 bytes long, so we - * complete the sequence with a NOP (0x90). - */ - u8 insn[3] = {0xcd, 0x1f, 0x90}; - - __lgwrite(cpu, guest_pa(cpu, cpu->regs->eip), insn, sizeof(insn)); - /* - * The above write might have caused a copy of that page to be made - * (if it was read-only). We need to make sure the Guest has - * up-to-date pagetables. As this doesn't happen often, we can just - * drop them all. - */ - guest_pagetable_clear_all(cpu); -} - -static bool is_hypercall(struct lg_cpu *cpu) -{ - u8 insn[3]; - - /* - * This must be the Guest kernel trying to do something. - * The bottom two bits of the CS segment register are the privilege - * level. - */ - if ((cpu->regs->cs & 3) != GUEST_PL) - return false; - - /* Is it a vmcall? */ - __lgread(cpu, insn, guest_pa(cpu, cpu->regs->eip), sizeof(insn)); - return insn[0] == 0x0f && insn[1] == 0x01 && insn[2] == 0xc1; -} - /*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */ void lguest_arch_handle_trap(struct lg_cpu *cpu) { @@ -429,20 +368,6 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) if (emulate_insn(cpu)) return; } - /* - * If KVM is active, the vmcall instruction triggers a General - * Protection Fault. Normally it triggers an invalid opcode - * fault (6): - */ - case 6: - /* - * We need to check if ring == GUEST_PL and faulting - * instruction == vmcall. - */ - if (is_hypercall(cpu)) { - rewrite_hypercall(cpu); - return; - } break; case 14: /* We've intercepted a Page Fault. */ /* @@ -486,7 +411,7 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) * These values mean a real interrupt occurred, in which case * the Host handler has already been run. We just do a * friendly check if another process should now be run, then - * return to run the Guest again + * return to run the Guest again. */ cond_resched(); return; @@ -536,7 +461,7 @@ void __init lguest_arch_host_init(void) int i; /* - * Most of the i386/switcher.S doesn't care that it's been moved; on + * Most of the x86/switcher_32.S doesn't care that it's been moved; on * Intel, jumps are relative, and it doesn't access any references to * external code or data. * @@ -664,7 +589,7 @@ void __init lguest_arch_host_init(void) clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE); } put_online_cpus(); -}; +} /*:*/ void __exit lguest_arch_host_fini(void) @@ -747,8 +672,6 @@ int lguest_arch_init_hypercalls(struct lg_cpu *cpu) /*:*/ /*L:030 - * lguest_arch_setup_regs() - * * Most of the Guest's registers are left alone: we used get_zeroed_page() to * allocate the structure, so they will be 0. */ diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index f85e42224559..1ff5486213fb 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -106,6 +106,16 @@ struct mmc_blk_data { static DEFINE_MUTEX(open_lock); +enum mmc_blk_status { + MMC_BLK_SUCCESS = 0, + MMC_BLK_PARTIAL, + MMC_BLK_RETRY, + MMC_BLK_RETRY_SINGLE, + MMC_BLK_DATA_ERR, + MMC_BLK_CMD_ERR, + MMC_BLK_ABORT, +}; + module_param(perdev_minors, int, 0444); MODULE_PARM_DESC(perdev_minors, "Minors numbers to allocate per device"); @@ -427,14 +437,6 @@ static const struct block_device_operations mmc_bdops = { #endif }; -struct mmc_blk_request { - struct mmc_request mrq; - struct mmc_command sbc; - struct mmc_command cmd; - struct mmc_command stop; - struct mmc_data data; -}; - static inline int mmc_blk_part_switch(struct mmc_card *card, struct mmc_blk_data *md) { @@ -525,7 +527,20 @@ static u32 mmc_sd_num_wr_blocks(struct mmc_card *card) return result; } -static u32 get_card_status(struct mmc_card *card, struct request *req) +static int send_stop(struct mmc_card *card, u32 *status) +{ + struct mmc_command cmd = {0}; + int err; + + cmd.opcode = MMC_STOP_TRANSMISSION; + cmd.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC; + err = mmc_wait_for_cmd(card->host, &cmd, 5); + if (err == 0) + *status = cmd.resp[0]; + return err; +} + +static int get_card_status(struct mmc_card *card, u32 *status, int retries) { struct mmc_command cmd = {0}; int err; @@ -534,11 +549,141 @@ static u32 get_card_status(struct mmc_card *card, struct request *req) if (!mmc_host_is_spi(card->host)) cmd.arg = card->rca << 16; cmd.flags = MMC_RSP_SPI_R2 | MMC_RSP_R1 | MMC_CMD_AC; - err = mmc_wait_for_cmd(card->host, &cmd, 0); + err = mmc_wait_for_cmd(card->host, &cmd, retries); + if (err == 0) + *status = cmd.resp[0]; + return err; +} + +#define ERR_RETRY 2 +#define ERR_ABORT 1 +#define ERR_CONTINUE 0 + +static int mmc_blk_cmd_error(struct request *req, const char *name, int error, + bool status_valid, u32 status) +{ + switch (error) { + case -EILSEQ: + /* response crc error, retry the r/w cmd */ + pr_err("%s: %s sending %s command, card status %#x\n", + req->rq_disk->disk_name, "response CRC error", + name, status); + return ERR_RETRY; + + case -ETIMEDOUT: + pr_err("%s: %s sending %s command, card status %#x\n", + req->rq_disk->disk_name, "timed out", name, status); + + /* If the status cmd initially failed, retry the r/w cmd */ + if (!status_valid) + return ERR_RETRY; + + /* + * If it was a r/w cmd crc error, or illegal command + * (eg, issued in wrong state) then retry - we should + * have corrected the state problem above. + */ + if (status & (R1_COM_CRC_ERROR | R1_ILLEGAL_COMMAND)) + return ERR_RETRY; + + /* Otherwise abort the command */ + return ERR_ABORT; + + default: + /* We don't understand the error code the driver gave us */ + pr_err("%s: unknown error %d sending read/write command, card status %#x\n", + req->rq_disk->disk_name, error, status); + return ERR_ABORT; + } +} + +/* + * Initial r/w and stop cmd error recovery. + * We don't know whether the card received the r/w cmd or not, so try to + * restore things back to a sane state. Essentially, we do this as follows: + * - Obtain card status. If the first attempt to obtain card status fails, + * the status word will reflect the failed status cmd, not the failed + * r/w cmd. If we fail to obtain card status, it suggests we can no + * longer communicate with the card. + * - Check the card state. If the card received the cmd but there was a + * transient problem with the response, it might still be in a data transfer + * mode. Try to send it a stop command. If this fails, we can't recover. + * - If the r/w cmd failed due to a response CRC error, it was probably + * transient, so retry the cmd. + * - If the r/w cmd timed out, but we didn't get the r/w cmd status, retry. + * - If the r/w cmd timed out, and the r/w cmd failed due to CRC error or + * illegal cmd, retry. + * Otherwise we don't understand what happened, so abort. + */ +static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req, + struct mmc_blk_request *brq) +{ + bool prev_cmd_status_valid = true; + u32 status, stop_status = 0; + int err, retry; + + /* + * Try to get card status which indicates both the card state + * and why there was no response. If the first attempt fails, + * we can't be sure the returned status is for the r/w command. + */ + for (retry = 2; retry >= 0; retry--) { + err = get_card_status(card, &status, 0); + if (!err) + break; + + prev_cmd_status_valid = false; + pr_err("%s: error %d sending status command, %sing\n", + req->rq_disk->disk_name, err, retry ? "retry" : "abort"); + } + + /* We couldn't get a response from the card. Give up. */ if (err) - printk(KERN_ERR "%s: error %d sending status command", - req->rq_disk->disk_name, err); - return cmd.resp[0]; + return ERR_ABORT; + + /* + * Check the current card state. If it is in some data transfer + * mode, tell it to stop (and hopefully transition back to TRAN.) + */ + if (R1_CURRENT_STATE(status) == R1_STATE_DATA || + R1_CURRENT_STATE(status) == R1_STATE_RCV) { + err = send_stop(card, &stop_status); + if (err) + pr_err("%s: error %d sending stop command\n", + req->rq_disk->disk_name, err); + + /* + * If the stop cmd also timed out, the card is probably + * not present, so abort. Other errors are bad news too. + */ + if (err) + return ERR_ABORT; + } + + /* Check for set block count errors */ + if (brq->sbc.error) + return mmc_blk_cmd_error(req, "SET_BLOCK_COUNT", brq->sbc.error, + prev_cmd_status_valid, status); + + /* Check for r/w command errors */ + if (brq->cmd.error) + return mmc_blk_cmd_error(req, "r/w cmd", brq->cmd.error, + prev_cmd_status_valid, status); + + /* Now for stop errors. These aren't fatal to the transfer. */ + pr_err("%s: error %d sending stop command, original cmd response %#x, card status %#x\n", + req->rq_disk->disk_name, brq->stop.error, + brq->cmd.resp[0], status); + + /* + * Subsitute in our own stop status as this will give the error + * state which happened during the execution of the r/w command. + */ + if (stop_status) { + brq->stop.resp[0] = stop_status; + brq->stop.error = 0; + } + return ERR_CONTINUE; } static int mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) @@ -669,240 +814,324 @@ static inline void mmc_apply_rel_rw(struct mmc_blk_request *brq, } } -static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req) +#define CMD_ERRORS \ + (R1_OUT_OF_RANGE | /* Command argument out of range */ \ + R1_ADDRESS_ERROR | /* Misaligned address */ \ + R1_BLOCK_LEN_ERROR | /* Transferred block length incorrect */\ + R1_WP_VIOLATION | /* Tried to write to protected block */ \ + R1_CC_ERROR | /* Card controller error */ \ + R1_ERROR) /* General/unknown error */ + +static int mmc_blk_err_check(struct mmc_card *card, + struct mmc_async_req *areq) { - struct mmc_blk_data *md = mq->data; - struct mmc_card *card = md->queue.card; - struct mmc_blk_request brq; - int ret = 1, disable_multi = 0; + enum mmc_blk_status ret = MMC_BLK_SUCCESS; + struct mmc_queue_req *mq_mrq = container_of(areq, struct mmc_queue_req, + mmc_active); + struct mmc_blk_request *brq = &mq_mrq->brq; + struct request *req = mq_mrq->req; /* - * Reliable writes are used to implement Forced Unit Access and - * REQ_META accesses, and are supported only on MMCs. + * sbc.error indicates a problem with the set block count + * command. No data will have been transferred. + * + * cmd.error indicates a problem with the r/w command. No + * data will have been transferred. + * + * stop.error indicates a problem with the stop command. Data + * may have been transferred, or may still be transferring. */ - bool do_rel_wr = ((req->cmd_flags & REQ_FUA) || - (req->cmd_flags & REQ_META)) && - (rq_data_dir(req) == WRITE) && - (md->flags & MMC_BLK_REL_WR); + if (brq->sbc.error || brq->cmd.error || brq->stop.error) { + switch (mmc_blk_cmd_recovery(card, req, brq)) { + case ERR_RETRY: + return MMC_BLK_RETRY; + case ERR_ABORT: + return MMC_BLK_ABORT; + case ERR_CONTINUE: + break; + } + } - do { - struct mmc_command cmd = {0}; - u32 readcmd, writecmd, status = 0; - - memset(&brq, 0, sizeof(struct mmc_blk_request)); - brq.mrq.cmd = &brq.cmd; - brq.mrq.data = &brq.data; - - brq.cmd.arg = blk_rq_pos(req); - if (!mmc_card_blockaddr(card)) - brq.cmd.arg <<= 9; - brq.cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC; - brq.data.blksz = 512; - brq.stop.opcode = MMC_STOP_TRANSMISSION; - brq.stop.arg = 0; - brq.stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC; - brq.data.blocks = blk_rq_sectors(req); + /* + * Check for errors relating to the execution of the + * initial command - such as address errors. No data + * has been transferred. + */ + if (brq->cmd.resp[0] & CMD_ERRORS) { + pr_err("%s: r/w command failed, status = %#x\n", + req->rq_disk->disk_name, brq->cmd.resp[0]); + return MMC_BLK_ABORT; + } - /* - * The block layer doesn't support all sector count - * restrictions, so we need to be prepared for too big - * requests. - */ - if (brq.data.blocks > card->host->max_blk_count) - brq.data.blocks = card->host->max_blk_count; + /* + * Everything else is either success, or a data error of some + * kind. If it was a write, we may have transitioned to + * program mode, which we have to wait for it to complete. + */ + if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) { + u32 status; + do { + int err = get_card_status(card, &status, 5); + if (err) { + printk(KERN_ERR "%s: error %d requesting status\n", + req->rq_disk->disk_name, err); + return MMC_BLK_CMD_ERR; + } + /* + * Some cards mishandle the status bits, + * so make sure to check both the busy + * indication and the card state. + */ + } while (!(status & R1_READY_FOR_DATA) || + (R1_CURRENT_STATE(status) == R1_STATE_PRG)); + } - /* - * After a read error, we redo the request one sector at a time - * in order to accurately determine which sectors can be read - * successfully. - */ - if (disable_multi && brq.data.blocks > 1) - brq.data.blocks = 1; + if (brq->data.error) { + pr_err("%s: error %d transferring data, sector %u, nr %u, cmd response %#x, card status %#x\n", + req->rq_disk->disk_name, brq->data.error, + (unsigned)blk_rq_pos(req), + (unsigned)blk_rq_sectors(req), + brq->cmd.resp[0], brq->stop.resp[0]); - if (brq.data.blocks > 1 || do_rel_wr) { - /* SPI multiblock writes terminate using a special - * token, not a STOP_TRANSMISSION request. - */ - if (!mmc_host_is_spi(card->host) || - rq_data_dir(req) == READ) - brq.mrq.stop = &brq.stop; - readcmd = MMC_READ_MULTIPLE_BLOCK; - writecmd = MMC_WRITE_MULTIPLE_BLOCK; - } else { - brq.mrq.stop = NULL; - readcmd = MMC_READ_SINGLE_BLOCK; - writecmd = MMC_WRITE_BLOCK; - } if (rq_data_dir(req) == READ) { - brq.cmd.opcode = readcmd; - brq.data.flags |= MMC_DATA_READ; + if (brq->data.blocks > 1) { + /* Redo read one sector at a time */ + pr_warning("%s: retrying using single block read\n", + req->rq_disk->disk_name); + return MMC_BLK_RETRY_SINGLE; + } + return MMC_BLK_DATA_ERR; } else { - brq.cmd.opcode = writecmd; - brq.data.flags |= MMC_DATA_WRITE; + return MMC_BLK_CMD_ERR; } + } - if (do_rel_wr) - mmc_apply_rel_rw(&brq, card, req); + if (ret == MMC_BLK_SUCCESS && + blk_rq_bytes(req) != brq->data.bytes_xfered) + ret = MMC_BLK_PARTIAL; - /* - * Pre-defined multi-block transfers are preferable to - * open ended-ones (and necessary for reliable writes). - * However, it is not sufficient to just send CMD23, - * and avoid the final CMD12, as on an error condition - * CMD12 (stop) needs to be sent anyway. This, coupled - * with Auto-CMD23 enhancements provided by some - * hosts, means that the complexity of dealing - * with this is best left to the host. If CMD23 is - * supported by card and host, we'll fill sbc in and let - * the host deal with handling it correctly. This means - * that for hosts that don't expose MMC_CAP_CMD23, no - * change of behavior will be observed. - * - * N.B: Some MMC cards experience perf degradation. - * We'll avoid using CMD23-bounded multiblock writes for - * these, while retaining features like reliable writes. - */ + return ret; +} - if ((md->flags & MMC_BLK_CMD23) && - mmc_op_multi(brq.cmd.opcode) && - (do_rel_wr || !(card->quirks & MMC_QUIRK_BLK_NO_CMD23))) { - brq.sbc.opcode = MMC_SET_BLOCK_COUNT; - brq.sbc.arg = brq.data.blocks | - (do_rel_wr ? (1 << 31) : 0); - brq.sbc.flags = MMC_RSP_R1 | MMC_CMD_AC; - brq.mrq.sbc = &brq.sbc; - } +static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, + struct mmc_card *card, + int disable_multi, + struct mmc_queue *mq) +{ + u32 readcmd, writecmd; + struct mmc_blk_request *brq = &mqrq->brq; + struct request *req = mqrq->req; + struct mmc_blk_data *md = mq->data; - mmc_set_data_timeout(&brq.data, card); + /* + * Reliable writes are used to implement Forced Unit Access and + * REQ_META accesses, and are supported only on MMCs. + */ + bool do_rel_wr = ((req->cmd_flags & REQ_FUA) || + (req->cmd_flags & REQ_META)) && + (rq_data_dir(req) == WRITE) && + (md->flags & MMC_BLK_REL_WR); - brq.data.sg = mq->sg; - brq.data.sg_len = mmc_queue_map_sg(mq); + memset(brq, 0, sizeof(struct mmc_blk_request)); + brq->mrq.cmd = &brq->cmd; + brq->mrq.data = &brq->data; - /* - * Adjust the sg list so it is the same size as the - * request. + brq->cmd.arg = blk_rq_pos(req); + if (!mmc_card_blockaddr(card)) + brq->cmd.arg <<= 9; + brq->cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC; + brq->data.blksz = 512; + brq->stop.opcode = MMC_STOP_TRANSMISSION; + brq->stop.arg = 0; + brq->stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC; + brq->data.blocks = blk_rq_sectors(req); + + /* + * The block layer doesn't support all sector count + * restrictions, so we need to be prepared for too big + * requests. + */ + if (brq->data.blocks > card->host->max_blk_count) + brq->data.blocks = card->host->max_blk_count; + + /* + * After a read error, we redo the request one sector at a time + * in order to accurately determine which sectors can be read + * successfully. + */ + if (disable_multi && brq->data.blocks > 1) + brq->data.blocks = 1; + + if (brq->data.blocks > 1 || do_rel_wr) { + /* SPI multiblock writes terminate using a special + * token, not a STOP_TRANSMISSION request. */ - if (brq.data.blocks != blk_rq_sectors(req)) { - int i, data_size = brq.data.blocks << 9; - struct scatterlist *sg; - - for_each_sg(brq.data.sg, sg, brq.data.sg_len, i) { - data_size -= sg->length; - if (data_size <= 0) { - sg->length += data_size; - i++; - break; - } - } - brq.data.sg_len = i; - } + if (!mmc_host_is_spi(card->host) || + rq_data_dir(req) == READ) + brq->mrq.stop = &brq->stop; + readcmd = MMC_READ_MULTIPLE_BLOCK; + writecmd = MMC_WRITE_MULTIPLE_BLOCK; + } else { + brq->mrq.stop = NULL; + readcmd = MMC_READ_SINGLE_BLOCK; + writecmd = MMC_WRITE_BLOCK; + } + if (rq_data_dir(req) == READ) { + brq->cmd.opcode = readcmd; + brq->data.flags |= MMC_DATA_READ; + } else { + brq->cmd.opcode = writecmd; + brq->data.flags |= MMC_DATA_WRITE; + } - mmc_queue_bounce_pre(mq); + if (do_rel_wr) + mmc_apply_rel_rw(brq, card, req); - mmc_wait_for_req(card->host, &brq.mrq); + /* + * Pre-defined multi-block transfers are preferable to + * open ended-ones (and necessary for reliable writes). + * However, it is not sufficient to just send CMD23, + * and avoid the final CMD12, as on an error condition + * CMD12 (stop) needs to be sent anyway. This, coupled + * with Auto-CMD23 enhancements provided by some + * hosts, means that the complexity of dealing + * with this is best left to the host. If CMD23 is + * supported by card and host, we'll fill sbc in and let + * the host deal with handling it correctly. This means + * that for hosts that don't expose MMC_CAP_CMD23, no + * change of behavior will be observed. + * + * N.B: Some MMC cards experience perf degradation. + * We'll avoid using CMD23-bounded multiblock writes for + * these, while retaining features like reliable writes. + */ - mmc_queue_bounce_post(mq); + if ((md->flags & MMC_BLK_CMD23) && + mmc_op_multi(brq->cmd.opcode) && + (do_rel_wr || !(card->quirks & MMC_QUIRK_BLK_NO_CMD23))) { + brq->sbc.opcode = MMC_SET_BLOCK_COUNT; + brq->sbc.arg = brq->data.blocks | + (do_rel_wr ? (1 << 31) : 0); + brq->sbc.flags = MMC_RSP_R1 | MMC_CMD_AC; + brq->mrq.sbc = &brq->sbc; + } - /* - * Check for errors here, but don't jump to cmd_err - * until later as we need to wait for the card to leave - * programming mode even when things go wrong. - */ - if (brq.sbc.error || brq.cmd.error || - brq.data.error || brq.stop.error) { - if (brq.data.blocks > 1 && rq_data_dir(req) == READ) { - /* Redo read one sector at a time */ - printk(KERN_WARNING "%s: retrying using single " - "block read\n", req->rq_disk->disk_name); - disable_multi = 1; - continue; - } - status = get_card_status(card, req); - } + mmc_set_data_timeout(&brq->data, card); - if (brq.sbc.error) { - printk(KERN_ERR "%s: error %d sending SET_BLOCK_COUNT " - "command, response %#x, card status %#x\n", - req->rq_disk->disk_name, brq.sbc.error, - brq.sbc.resp[0], status); - } + brq->data.sg = mqrq->sg; + brq->data.sg_len = mmc_queue_map_sg(mq, mqrq); - if (brq.cmd.error) { - printk(KERN_ERR "%s: error %d sending read/write " - "command, response %#x, card status %#x\n", - req->rq_disk->disk_name, brq.cmd.error, - brq.cmd.resp[0], status); + /* + * Adjust the sg list so it is the same size as the + * request. + */ + if (brq->data.blocks != blk_rq_sectors(req)) { + int i, data_size = brq->data.blocks << 9; + struct scatterlist *sg; + + for_each_sg(brq->data.sg, sg, brq->data.sg_len, i) { + data_size -= sg->length; + if (data_size <= 0) { + sg->length += data_size; + i++; + break; + } } + brq->data.sg_len = i; + } - if (brq.data.error) { - if (brq.data.error == -ETIMEDOUT && brq.mrq.stop) - /* 'Stop' response contains card status */ - status = brq.mrq.stop->resp[0]; - printk(KERN_ERR "%s: error %d transferring data," - " sector %u, nr %u, card status %#x\n", - req->rq_disk->disk_name, brq.data.error, - (unsigned)blk_rq_pos(req), - (unsigned)blk_rq_sectors(req), status); - } + mqrq->mmc_active.mrq = &brq->mrq; + mqrq->mmc_active.err_check = mmc_blk_err_check; - if (brq.stop.error) { - printk(KERN_ERR "%s: error %d sending stop command, " - "response %#x, card status %#x\n", - req->rq_disk->disk_name, brq.stop.error, - brq.stop.resp[0], status); - } + mmc_queue_bounce_pre(mqrq); +} - if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) { - do { - int err; - - cmd.opcode = MMC_SEND_STATUS; - cmd.arg = card->rca << 16; - cmd.flags = MMC_RSP_R1 | MMC_CMD_AC; - err = mmc_wait_for_cmd(card->host, &cmd, 5); - if (err) { - printk(KERN_ERR "%s: error %d requesting status\n", - req->rq_disk->disk_name, err); - goto cmd_err; - } - /* - * Some cards mishandle the status bits, - * so make sure to check both the busy - * indication and the card state. - */ - } while (!(cmd.resp[0] & R1_READY_FOR_DATA) || - (R1_CURRENT_STATE(cmd.resp[0]) == 7)); - -#if 0 - if (cmd.resp[0] & ~0x00000900) - printk(KERN_ERR "%s: status = %08x\n", - req->rq_disk->disk_name, cmd.resp[0]); - if (mmc_decode_status(cmd.resp)) - goto cmd_err; -#endif - } +static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc) +{ + struct mmc_blk_data *md = mq->data; + struct mmc_card *card = md->queue.card; + struct mmc_blk_request *brq = &mq->mqrq_cur->brq; + int ret = 1, disable_multi = 0, retry = 0; + enum mmc_blk_status status; + struct mmc_queue_req *mq_rq; + struct request *req; + struct mmc_async_req *areq; + + if (!rqc && !mq->mqrq_prev->req) + return 0; - if (brq.cmd.error || brq.stop.error || brq.data.error) { - if (rq_data_dir(req) == READ) { + do { + if (rqc) { + mmc_blk_rw_rq_prep(mq->mqrq_cur, card, 0, mq); + areq = &mq->mqrq_cur->mmc_active; + } else + areq = NULL; + areq = mmc_start_req(card->host, areq, (int *) &status); + if (!areq) + return 0; + + mq_rq = container_of(areq, struct mmc_queue_req, mmc_active); + brq = &mq_rq->brq; + req = mq_rq->req; + mmc_queue_bounce_post(mq_rq); + + switch (status) { + case MMC_BLK_SUCCESS: + case MMC_BLK_PARTIAL: + /* + * A block was successfully transferred. + */ + spin_lock_irq(&md->lock); + ret = __blk_end_request(req, 0, + brq->data.bytes_xfered); + spin_unlock_irq(&md->lock); + if (status == MMC_BLK_SUCCESS && ret) { /* - * After an error, we redo I/O one sector at a - * time, so we only reach here after trying to - * read a single sector. + * The blk_end_request has returned non zero + * even though all data is transfered and no + * erros returned by host. + * If this happen it's a bug. */ - spin_lock_irq(&md->lock); - ret = __blk_end_request(req, -EIO, brq.data.blksz); - spin_unlock_irq(&md->lock); - continue; + printk(KERN_ERR "%s BUG rq_tot %d d_xfer %d\n", + __func__, blk_rq_bytes(req), + brq->data.bytes_xfered); + rqc = NULL; + goto cmd_abort; } + break; + case MMC_BLK_CMD_ERR: goto cmd_err; + case MMC_BLK_RETRY_SINGLE: + disable_multi = 1; + break; + case MMC_BLK_RETRY: + if (retry++ < 5) + break; + case MMC_BLK_ABORT: + goto cmd_abort; + case MMC_BLK_DATA_ERR: + /* + * After an error, we redo I/O one sector at a + * time, so we only reach here after trying to + * read a single sector. + */ + spin_lock_irq(&md->lock); + ret = __blk_end_request(req, -EIO, + brq->data.blksz); + spin_unlock_irq(&md->lock); + if (!ret) + goto start_new_req; + break; } - /* - * A block was successfully transferred. - */ - spin_lock_irq(&md->lock); - ret = __blk_end_request(req, 0, brq.data.bytes_xfered); - spin_unlock_irq(&md->lock); + if (ret) { + /* + * In case of a none complete request + * prepare it again and resend. + */ + mmc_blk_rw_rq_prep(mq_rq, card, disable_multi, mq); + mmc_start_req(card->host, &mq_rq->mmc_active, NULL); + } } while (ret); return 1; @@ -927,15 +1156,22 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req) } } else { spin_lock_irq(&md->lock); - ret = __blk_end_request(req, 0, brq.data.bytes_xfered); + ret = __blk_end_request(req, 0, brq->data.bytes_xfered); spin_unlock_irq(&md->lock); } + cmd_abort: spin_lock_irq(&md->lock); while (ret) ret = __blk_end_request(req, -EIO, blk_rq_cur_bytes(req)); spin_unlock_irq(&md->lock); + start_new_req: + if (rqc) { + mmc_blk_rw_rq_prep(mq->mqrq_cur, card, 0, mq); + mmc_start_req(card->host, &mq->mqrq_cur->mmc_active, NULL); + } + return 0; } @@ -945,26 +1181,37 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) struct mmc_blk_data *md = mq->data; struct mmc_card *card = md->queue.card; - mmc_claim_host(card->host); + if (req && !mq->mqrq_prev->req) + /* claim host only for the first request */ + mmc_claim_host(card->host); + ret = mmc_blk_part_switch(card, md); if (ret) { ret = 0; goto out; } - if (req->cmd_flags & REQ_DISCARD) { + if (req && req->cmd_flags & REQ_DISCARD) { + /* complete ongoing async transfer before issuing discard */ + if (card->host->areq) + mmc_blk_issue_rw_rq(mq, NULL); if (req->cmd_flags & REQ_SECURE) ret = mmc_blk_issue_secdiscard_rq(mq, req); else ret = mmc_blk_issue_discard_rq(mq, req); - } else if (req->cmd_flags & REQ_FLUSH) { + } else if (req && req->cmd_flags & REQ_FLUSH) { + /* complete ongoing async transfer before issuing flush */ + if (card->host->areq) + mmc_blk_issue_rw_rq(mq, NULL); ret = mmc_blk_issue_flush(mq, req); } else { ret = mmc_blk_issue_rw_rq(mq, req); } out: - mmc_release_host(card->host); + if (!req) + /* release host only when there are no more requests */ + mmc_release_host(card->host); return ret; } diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c index 233cdfae92f4..006a5e9f8ab8 100644 --- a/drivers/mmc/card/mmc_test.c +++ b/drivers/mmc/card/mmc_test.c @@ -148,6 +148,27 @@ struct mmc_test_card { struct mmc_test_general_result *gr; }; +enum mmc_test_prep_media { + MMC_TEST_PREP_NONE = 0, + MMC_TEST_PREP_WRITE_FULL = 1 << 0, + MMC_TEST_PREP_ERASE = 1 << 1, +}; + +struct mmc_test_multiple_rw { + unsigned int *sg_len; + unsigned int *bs; + unsigned int len; + unsigned int size; + bool do_write; + bool do_nonblock_req; + enum mmc_test_prep_media prepare; +}; + +struct mmc_test_async_req { + struct mmc_async_req areq; + struct mmc_test_card *test; +}; + /*******************************************************************/ /* General helper functions */ /*******************************************************************/ @@ -367,21 +388,26 @@ out_free: * Map memory into a scatterlist. Optionally allow the same memory to be * mapped more than once. */ -static int mmc_test_map_sg(struct mmc_test_mem *mem, unsigned long sz, +static int mmc_test_map_sg(struct mmc_test_mem *mem, unsigned long size, struct scatterlist *sglist, int repeat, unsigned int max_segs, unsigned int max_seg_sz, - unsigned int *sg_len) + unsigned int *sg_len, int min_sg_len) { struct scatterlist *sg = NULL; unsigned int i; + unsigned long sz = size; sg_init_table(sglist, max_segs); + if (min_sg_len > max_segs) + min_sg_len = max_segs; *sg_len = 0; do { for (i = 0; i < mem->cnt; i++) { unsigned long len = PAGE_SIZE << mem->arr[i].order; + if (min_sg_len && (size / min_sg_len < len)) + len = ALIGN(size / min_sg_len, 512); if (len > sz) len = sz; if (len > max_seg_sz) @@ -554,11 +580,12 @@ static void mmc_test_print_avg_rate(struct mmc_test_card *test, uint64_t bytes, printk(KERN_INFO "%s: Transfer of %u x %u sectors (%u x %u%s KiB) took " "%lu.%09lu seconds (%u kB/s, %u KiB/s, " - "%u.%02u IOPS)\n", + "%u.%02u IOPS, sg_len %d)\n", mmc_hostname(test->card->host), count, sectors, count, sectors >> 1, (sectors & 1 ? ".5" : ""), (unsigned long)ts.tv_sec, (unsigned long)ts.tv_nsec, - rate / 1000, rate / 1024, iops / 100, iops % 100); + rate / 1000, rate / 1024, iops / 100, iops % 100, + test->area.sg_len); mmc_test_save_transfer_result(test, count, sectors, ts, rate, iops); } @@ -661,7 +688,7 @@ static void mmc_test_prepare_broken_mrq(struct mmc_test_card *test, * Checks that a normal transfer didn't have any errors */ static int mmc_test_check_result(struct mmc_test_card *test, - struct mmc_request *mrq) + struct mmc_request *mrq) { int ret; @@ -685,6 +712,17 @@ static int mmc_test_check_result(struct mmc_test_card *test, return ret; } +static int mmc_test_check_result_async(struct mmc_card *card, + struct mmc_async_req *areq) +{ + struct mmc_test_async_req *test_async = + container_of(areq, struct mmc_test_async_req, areq); + + mmc_test_wait_busy(test_async->test); + + return mmc_test_check_result(test_async->test, areq->mrq); +} + /* * Checks that a "short transfer" behaved as expected */ @@ -720,6 +758,85 @@ static int mmc_test_check_broken_result(struct mmc_test_card *test, } /* + * Tests nonblock transfer with certain parameters + */ +static void mmc_test_nonblock_reset(struct mmc_request *mrq, + struct mmc_command *cmd, + struct mmc_command *stop, + struct mmc_data *data) +{ + memset(mrq, 0, sizeof(struct mmc_request)); + memset(cmd, 0, sizeof(struct mmc_command)); + memset(data, 0, sizeof(struct mmc_data)); + memset(stop, 0, sizeof(struct mmc_command)); + + mrq->cmd = cmd; + mrq->data = data; + mrq->stop = stop; +} +static int mmc_test_nonblock_transfer(struct mmc_test_card *test, + struct scatterlist *sg, unsigned sg_len, + unsigned dev_addr, unsigned blocks, + unsigned blksz, int write, int count) +{ + struct mmc_request mrq1; + struct mmc_command cmd1; + struct mmc_command stop1; + struct mmc_data data1; + + struct mmc_request mrq2; + struct mmc_command cmd2; + struct mmc_command stop2; + struct mmc_data data2; + + struct mmc_test_async_req test_areq[2]; + struct mmc_async_req *done_areq; + struct mmc_async_req *cur_areq = &test_areq[0].areq; + struct mmc_async_req *other_areq = &test_areq[1].areq; + int i; + int ret; + + test_areq[0].test = test; + test_areq[1].test = test; + + mmc_test_nonblock_reset(&mrq1, &cmd1, &stop1, &data1); + mmc_test_nonblock_reset(&mrq2, &cmd2, &stop2, &data2); + + cur_areq->mrq = &mrq1; + cur_areq->err_check = mmc_test_check_result_async; + other_areq->mrq = &mrq2; + other_areq->err_check = mmc_test_check_result_async; + + for (i = 0; i < count; i++) { + mmc_test_prepare_mrq(test, cur_areq->mrq, sg, sg_len, dev_addr, + blocks, blksz, write); + done_areq = mmc_start_req(test->card->host, cur_areq, &ret); + + if (ret || (!done_areq && i > 0)) + goto err; + + if (done_areq) { + if (done_areq->mrq == &mrq2) + mmc_test_nonblock_reset(&mrq2, &cmd2, + &stop2, &data2); + else + mmc_test_nonblock_reset(&mrq1, &cmd1, + &stop1, &data1); + } + done_areq = cur_areq; + cur_areq = other_areq; + other_areq = done_areq; + dev_addr += blocks; + } + + done_areq = mmc_start_req(test->card->host, NULL, &ret); + + return ret; +err: + return ret; +} + +/* * Tests a basic transfer with certain parameters */ static int mmc_test_simple_transfer(struct mmc_test_card *test, @@ -1302,7 +1419,7 @@ static int mmc_test_no_highmem(struct mmc_test_card *test) * Map sz bytes so that it can be transferred. */ static int mmc_test_area_map(struct mmc_test_card *test, unsigned long sz, - int max_scatter) + int max_scatter, int min_sg_len) { struct mmc_test_area *t = &test->area; int err; @@ -1315,7 +1432,7 @@ static int mmc_test_area_map(struct mmc_test_card *test, unsigned long sz, &t->sg_len); } else { err = mmc_test_map_sg(t->mem, sz, t->sg, 1, t->max_segs, - t->max_seg_sz, &t->sg_len); + t->max_seg_sz, &t->sg_len, min_sg_len); } if (err) printk(KERN_INFO "%s: Failed to map sg list\n", @@ -1336,14 +1453,17 @@ static int mmc_test_area_transfer(struct mmc_test_card *test, } /* - * Map and transfer bytes. + * Map and transfer bytes for multiple transfers. */ -static int mmc_test_area_io(struct mmc_test_card *test, unsigned long sz, - unsigned int dev_addr, int write, int max_scatter, - int timed) +static int mmc_test_area_io_seq(struct mmc_test_card *test, unsigned long sz, + unsigned int dev_addr, int write, + int max_scatter, int timed, int count, + bool nonblock, int min_sg_len) { struct timespec ts1, ts2; - int ret; + int ret = 0; + int i; + struct mmc_test_area *t = &test->area; /* * In the case of a maximally scattered transfer, the maximum transfer @@ -1361,14 +1481,21 @@ static int mmc_test_area_io(struct mmc_test_card *test, unsigned long sz, sz = max_tfr; } - ret = mmc_test_area_map(test, sz, max_scatter); + ret = mmc_test_area_map(test, sz, max_scatter, min_sg_len); if (ret) return ret; if (timed) getnstimeofday(&ts1); + if (nonblock) + ret = mmc_test_nonblock_transfer(test, t->sg, t->sg_len, + dev_addr, t->blocks, 512, write, count); + else + for (i = 0; i < count && ret == 0; i++) { + ret = mmc_test_area_transfer(test, dev_addr, write); + dev_addr += sz >> 9; + } - ret = mmc_test_area_transfer(test, dev_addr, write); if (ret) return ret; @@ -1376,11 +1503,19 @@ static int mmc_test_area_io(struct mmc_test_card *test, unsigned long sz, getnstimeofday(&ts2); if (timed) - mmc_test_print_rate(test, sz, &ts1, &ts2); + mmc_test_print_avg_rate(test, sz, count, &ts1, &ts2); return 0; } +static int mmc_test_area_io(struct mmc_test_card *test, unsigned long sz, + unsigned int dev_addr, int write, int max_scatter, + int timed) +{ + return mmc_test_area_io_seq(test, sz, dev_addr, write, max_scatter, + timed, 1, false, 0); +} + /* * Write the test area entirely. */ @@ -1954,6 +2089,245 @@ static int mmc_test_large_seq_write_perf(struct mmc_test_card *test) return mmc_test_large_seq_perf(test, 1); } +static int mmc_test_rw_multiple(struct mmc_test_card *test, + struct mmc_test_multiple_rw *tdata, + unsigned int reqsize, unsigned int size, + int min_sg_len) +{ + unsigned int dev_addr; + struct mmc_test_area *t = &test->area; + int ret = 0; + + /* Set up test area */ + if (size > mmc_test_capacity(test->card) / 2 * 512) + size = mmc_test_capacity(test->card) / 2 * 512; + if (reqsize > t->max_tfr) + reqsize = t->max_tfr; + dev_addr = mmc_test_capacity(test->card) / 4; + if ((dev_addr & 0xffff0000)) + dev_addr &= 0xffff0000; /* Round to 64MiB boundary */ + else + dev_addr &= 0xfffff800; /* Round to 1MiB boundary */ + if (!dev_addr) + goto err; + + if (reqsize > size) + return 0; + + /* prepare test area */ + if (mmc_can_erase(test->card) && + tdata->prepare & MMC_TEST_PREP_ERASE) { + ret = mmc_erase(test->card, dev_addr, + size / 512, MMC_SECURE_ERASE_ARG); + if (ret) + ret = mmc_erase(test->card, dev_addr, + size / 512, MMC_ERASE_ARG); + if (ret) + goto err; + } + + /* Run test */ + ret = mmc_test_area_io_seq(test, reqsize, dev_addr, + tdata->do_write, 0, 1, size / reqsize, + tdata->do_nonblock_req, min_sg_len); + if (ret) + goto err; + + return ret; + err: + printk(KERN_INFO "[%s] error\n", __func__); + return ret; +} + +static int mmc_test_rw_multiple_size(struct mmc_test_card *test, + struct mmc_test_multiple_rw *rw) +{ + int ret = 0; + int i; + void *pre_req = test->card->host->ops->pre_req; + void *post_req = test->card->host->ops->post_req; + + if (rw->do_nonblock_req && + ((!pre_req && post_req) || (pre_req && !post_req))) { + printk(KERN_INFO "error: only one of pre/post is defined\n"); + return -EINVAL; + } + + for (i = 0 ; i < rw->len && ret == 0; i++) { + ret = mmc_test_rw_multiple(test, rw, rw->bs[i], rw->size, 0); + if (ret) + break; + } + return ret; +} + +static int mmc_test_rw_multiple_sg_len(struct mmc_test_card *test, + struct mmc_test_multiple_rw *rw) +{ + int ret = 0; + int i; + + for (i = 0 ; i < rw->len && ret == 0; i++) { + ret = mmc_test_rw_multiple(test, rw, 512*1024, rw->size, + rw->sg_len[i]); + if (ret) + break; + } + return ret; +} + +/* + * Multiple blocking write 4k to 4 MB chunks + */ +static int mmc_test_profile_mult_write_blocking_perf(struct mmc_test_card *test) +{ + unsigned int bs[] = {1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, + 1 << 17, 1 << 18, 1 << 19, 1 << 20, 1 << 22}; + struct mmc_test_multiple_rw test_data = { + .bs = bs, + .size = TEST_AREA_MAX_SIZE, + .len = ARRAY_SIZE(bs), + .do_write = true, + .do_nonblock_req = false, + .prepare = MMC_TEST_PREP_ERASE, + }; + + return mmc_test_rw_multiple_size(test, &test_data); +}; + +/* + * Multiple non-blocking write 4k to 4 MB chunks + */ +static int mmc_test_profile_mult_write_nonblock_perf(struct mmc_test_card *test) +{ + unsigned int bs[] = {1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, + 1 << 17, 1 << 18, 1 << 19, 1 << 20, 1 << 22}; + struct mmc_test_multiple_rw test_data = { + .bs = bs, + .size = TEST_AREA_MAX_SIZE, + .len = ARRAY_SIZE(bs), + .do_write = true, + .do_nonblock_req = true, + .prepare = MMC_TEST_PREP_ERASE, + }; + + return mmc_test_rw_multiple_size(test, &test_data); +} + +/* + * Multiple blocking read 4k to 4 MB chunks + */ +static int mmc_test_profile_mult_read_blocking_perf(struct mmc_test_card *test) +{ + unsigned int bs[] = {1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, + 1 << 17, 1 << 18, 1 << 19, 1 << 20, 1 << 22}; + struct mmc_test_multiple_rw test_data = { + .bs = bs, + .size = TEST_AREA_MAX_SIZE, + .len = ARRAY_SIZE(bs), + .do_write = false, + .do_nonblock_req = false, + .prepare = MMC_TEST_PREP_NONE, + }; + + return mmc_test_rw_multiple_size(test, &test_data); +} + +/* + * Multiple non-blocking read 4k to 4 MB chunks + */ +static int mmc_test_profile_mult_read_nonblock_perf(struct mmc_test_card *test) +{ + unsigned int bs[] = {1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, + 1 << 17, 1 << 18, 1 << 19, 1 << 20, 1 << 22}; + struct mmc_test_multiple_rw test_data = { + .bs = bs, + .size = TEST_AREA_MAX_SIZE, + .len = ARRAY_SIZE(bs), + .do_write = false, + .do_nonblock_req = true, + .prepare = MMC_TEST_PREP_NONE, + }; + + return mmc_test_rw_multiple_size(test, &test_data); +} + +/* + * Multiple blocking write 1 to 512 sg elements + */ +static int mmc_test_profile_sglen_wr_blocking_perf(struct mmc_test_card *test) +{ + unsigned int sg_len[] = {1, 1 << 3, 1 << 4, 1 << 5, 1 << 6, + 1 << 7, 1 << 8, 1 << 9}; + struct mmc_test_multiple_rw test_data = { + .sg_len = sg_len, + .size = TEST_AREA_MAX_SIZE, + .len = ARRAY_SIZE(sg_len), + .do_write = true, + .do_nonblock_req = false, + .prepare = MMC_TEST_PREP_ERASE, + }; + + return mmc_test_rw_multiple_sg_len(test, &test_data); +}; + +/* + * Multiple non-blocking write 1 to 512 sg elements + */ +static int mmc_test_profile_sglen_wr_nonblock_perf(struct mmc_test_card *test) +{ + unsigned int sg_len[] = {1, 1 << 3, 1 << 4, 1 << 5, 1 << 6, + 1 << 7, 1 << 8, 1 << 9}; + struct mmc_test_multiple_rw test_data = { + .sg_len = sg_len, + .size = TEST_AREA_MAX_SIZE, + .len = ARRAY_SIZE(sg_len), + .do_write = true, + .do_nonblock_req = true, + .prepare = MMC_TEST_PREP_ERASE, + }; + + return mmc_test_rw_multiple_sg_len(test, &test_data); +} + +/* + * Multiple blocking read 1 to 512 sg elements + */ +static int mmc_test_profile_sglen_r_blocking_perf(struct mmc_test_card *test) +{ + unsigned int sg_len[] = {1, 1 << 3, 1 << 4, 1 << 5, 1 << 6, + 1 << 7, 1 << 8, 1 << 9}; + struct mmc_test_multiple_rw test_data = { + .sg_len = sg_len, + .size = TEST_AREA_MAX_SIZE, + .len = ARRAY_SIZE(sg_len), + .do_write = false, + .do_nonblock_req = false, + .prepare = MMC_TEST_PREP_NONE, + }; + + return mmc_test_rw_multiple_sg_len(test, &test_data); +} + +/* + * Multiple non-blocking read 1 to 512 sg elements + */ +static int mmc_test_profile_sglen_r_nonblock_perf(struct mmc_test_card *test) +{ + unsigned int sg_len[] = {1, 1 << 3, 1 << 4, 1 << 5, 1 << 6, + 1 << 7, 1 << 8, 1 << 9}; + struct mmc_test_multiple_rw test_data = { + .sg_len = sg_len, + .size = TEST_AREA_MAX_SIZE, + .len = ARRAY_SIZE(sg_len), + .do_write = false, + .do_nonblock_req = true, + .prepare = MMC_TEST_PREP_NONE, + }; + + return mmc_test_rw_multiple_sg_len(test, &test_data); +} + static const struct mmc_test_case mmc_test_cases[] = { { .name = "Basic write (no data verification)", @@ -2221,6 +2595,61 @@ static const struct mmc_test_case mmc_test_cases[] = { .cleanup = mmc_test_area_cleanup, }, + { + .name = "Write performance with blocking req 4k to 4MB", + .prepare = mmc_test_area_prepare, + .run = mmc_test_profile_mult_write_blocking_perf, + .cleanup = mmc_test_area_cleanup, + }, + + { + .name = "Write performance with non-blocking req 4k to 4MB", + .prepare = mmc_test_area_prepare, + .run = mmc_test_profile_mult_write_nonblock_perf, + .cleanup = mmc_test_area_cleanup, + }, + + { + .name = "Read performance with blocking req 4k to 4MB", + .prepare = mmc_test_area_prepare, + .run = mmc_test_profile_mult_read_blocking_perf, + .cleanup = mmc_test_area_cleanup, + }, + + { + .name = "Read performance with non-blocking req 4k to 4MB", + .prepare = mmc_test_area_prepare, + .run = mmc_test_profile_mult_read_nonblock_perf, + .cleanup = mmc_test_area_cleanup, + }, + + { + .name = "Write performance blocking req 1 to 512 sg elems", + .prepare = mmc_test_area_prepare, + .run = mmc_test_profile_sglen_wr_blocking_perf, + .cleanup = mmc_test_area_cleanup, + }, + + { + .name = "Write performance non-blocking req 1 to 512 sg elems", + .prepare = mmc_test_area_prepare, + .run = mmc_test_profile_sglen_wr_nonblock_perf, + .cleanup = mmc_test_area_cleanup, + }, + + { + .name = "Read performance blocking req 1 to 512 sg elems", + .prepare = mmc_test_area_prepare, + .run = mmc_test_profile_sglen_r_blocking_perf, + .cleanup = mmc_test_area_cleanup, + }, + + { + .name = "Read performance non-blocking req 1 to 512 sg elems", + .prepare = mmc_test_area_prepare, + .run = mmc_test_profile_sglen_r_nonblock_perf, + .cleanup = mmc_test_area_cleanup, + }, }; static DEFINE_MUTEX(mmc_test_lock); @@ -2445,6 +2874,32 @@ static const struct file_operations mmc_test_fops_test = { .release = single_release, }; +static int mtf_testlist_show(struct seq_file *sf, void *data) +{ + int i; + + mutex_lock(&mmc_test_lock); + + for (i = 0; i < ARRAY_SIZE(mmc_test_cases); i++) + seq_printf(sf, "%d:\t%s\n", i+1, mmc_test_cases[i].name); + + mutex_unlock(&mmc_test_lock); + + return 0; +} + +static int mtf_testlist_open(struct inode *inode, struct file *file) +{ + return single_open(file, mtf_testlist_show, inode->i_private); +} + +static const struct file_operations mmc_test_fops_testlist = { + .open = mtf_testlist_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static void mmc_test_free_file_test(struct mmc_card *card) { struct mmc_test_dbgfs_file *df, *dfs; @@ -2476,7 +2931,18 @@ static int mmc_test_register_file_test(struct mmc_card *card) if (IS_ERR_OR_NULL(file)) { dev_err(&card->dev, - "Can't create file. Perhaps debugfs is disabled.\n"); + "Can't create test. Perhaps debugfs is disabled.\n"); + ret = -ENODEV; + goto err; + } + + if (card->debugfs_root) + file = debugfs_create_file("testlist", S_IRUGO, + card->debugfs_root, card, &mmc_test_fops_testlist); + + if (IS_ERR_OR_NULL(file)) { + dev_err(&card->dev, + "Can't create testlist. Perhaps debugfs is disabled.\n"); ret = -ENODEV; goto err; } diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c index 6413afa318d2..45fb362e3f01 100644 --- a/drivers/mmc/card/queue.c +++ b/drivers/mmc/card/queue.c @@ -52,14 +52,18 @@ static int mmc_queue_thread(void *d) down(&mq->thread_sem); do { struct request *req = NULL; + struct mmc_queue_req *tmp; spin_lock_irq(q->queue_lock); set_current_state(TASK_INTERRUPTIBLE); req = blk_fetch_request(q); - mq->req = req; + mq->mqrq_cur->req = req; spin_unlock_irq(q->queue_lock); - if (!req) { + if (req || mq->mqrq_prev->req) { + set_current_state(TASK_RUNNING); + mq->issue_fn(mq, req); + } else { if (kthread_should_stop()) { set_current_state(TASK_RUNNING); break; @@ -67,11 +71,14 @@ static int mmc_queue_thread(void *d) up(&mq->thread_sem); schedule(); down(&mq->thread_sem); - continue; } - set_current_state(TASK_RUNNING); - mq->issue_fn(mq, req); + /* Current request becomes previous request and vice versa. */ + mq->mqrq_prev->brq.mrq.data = NULL; + mq->mqrq_prev->req = NULL; + tmp = mq->mqrq_prev; + mq->mqrq_prev = mq->mqrq_cur; + mq->mqrq_cur = tmp; } while (1); up(&mq->thread_sem); @@ -97,10 +104,46 @@ static void mmc_request(struct request_queue *q) return; } - if (!mq->req) + if (!mq->mqrq_cur->req && !mq->mqrq_prev->req) wake_up_process(mq->thread); } +struct scatterlist *mmc_alloc_sg(int sg_len, int *err) +{ + struct scatterlist *sg; + + sg = kmalloc(sizeof(struct scatterlist)*sg_len, GFP_KERNEL); + if (!sg) + *err = -ENOMEM; + else { + *err = 0; + sg_init_table(sg, sg_len); + } + + return sg; +} + +static void mmc_queue_setup_discard(struct request_queue *q, + struct mmc_card *card) +{ + unsigned max_discard; + + max_discard = mmc_calc_max_discard(card); + if (!max_discard) + return; + + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); + q->limits.max_discard_sectors = max_discard; + if (card->erased_byte == 0) + q->limits.discard_zeroes_data = 1; + q->limits.discard_granularity = card->pref_erase << 9; + /* granularity must not be greater than max. discard */ + if (card->pref_erase > max_discard) + q->limits.discard_granularity = 0; + if (mmc_can_secure_erase_trim(card)) + queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q); +} + /** * mmc_init_queue - initialise a queue structure. * @mq: mmc queue @@ -116,6 +159,8 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, struct mmc_host *host = card->host; u64 limit = BLK_BOUNCE_HIGH; int ret; + struct mmc_queue_req *mqrq_cur = &mq->mqrq[0]; + struct mmc_queue_req *mqrq_prev = &mq->mqrq[1]; if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask) limit = *mmc_dev(host)->dma_mask; @@ -125,21 +170,16 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, if (!mq->queue) return -ENOMEM; + memset(&mq->mqrq_cur, 0, sizeof(mq->mqrq_cur)); + memset(&mq->mqrq_prev, 0, sizeof(mq->mqrq_prev)); + mq->mqrq_cur = mqrq_cur; + mq->mqrq_prev = mqrq_prev; mq->queue->queuedata = mq; - mq->req = NULL; blk_queue_prep_rq(mq->queue, mmc_prep_request); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue); - if (mmc_can_erase(card)) { - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mq->queue); - mq->queue->limits.max_discard_sectors = UINT_MAX; - if (card->erased_byte == 0) - mq->queue->limits.discard_zeroes_data = 1; - mq->queue->limits.discard_granularity = card->pref_erase << 9; - if (mmc_can_secure_erase_trim(card)) - queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, - mq->queue); - } + if (mmc_can_erase(card)) + mmc_queue_setup_discard(mq->queue, card); #ifdef CONFIG_MMC_BLOCK_BOUNCE if (host->max_segs == 1) { @@ -155,53 +195,64 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, bouncesz = host->max_blk_count * 512; if (bouncesz > 512) { - mq->bounce_buf = kmalloc(bouncesz, GFP_KERNEL); - if (!mq->bounce_buf) { + mqrq_cur->bounce_buf = kmalloc(bouncesz, GFP_KERNEL); + if (!mqrq_cur->bounce_buf) { + printk(KERN_WARNING "%s: unable to " + "allocate bounce cur buffer\n", + mmc_card_name(card)); + } + mqrq_prev->bounce_buf = kmalloc(bouncesz, GFP_KERNEL); + if (!mqrq_prev->bounce_buf) { printk(KERN_WARNING "%s: unable to " - "allocate bounce buffer\n", + "allocate bounce prev buffer\n", mmc_card_name(card)); + kfree(mqrq_cur->bounce_buf); + mqrq_cur->bounce_buf = NULL; } } - if (mq->bounce_buf) { + if (mqrq_cur->bounce_buf && mqrq_prev->bounce_buf) { blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY); blk_queue_max_hw_sectors(mq->queue, bouncesz / 512); blk_queue_max_segments(mq->queue, bouncesz / 512); blk_queue_max_segment_size(mq->queue, bouncesz); - mq->sg = kmalloc(sizeof(struct scatterlist), - GFP_KERNEL); - if (!mq->sg) { - ret = -ENOMEM; + mqrq_cur->sg = mmc_alloc_sg(1, &ret); + if (ret) goto cleanup_queue; - } - sg_init_table(mq->sg, 1); - mq->bounce_sg = kmalloc(sizeof(struct scatterlist) * - bouncesz / 512, GFP_KERNEL); - if (!mq->bounce_sg) { - ret = -ENOMEM; + mqrq_cur->bounce_sg = + mmc_alloc_sg(bouncesz / 512, &ret); + if (ret) + goto cleanup_queue; + + mqrq_prev->sg = mmc_alloc_sg(1, &ret); + if (ret) + goto cleanup_queue; + + mqrq_prev->bounce_sg = + mmc_alloc_sg(bouncesz / 512, &ret); + if (ret) goto cleanup_queue; - } - sg_init_table(mq->bounce_sg, bouncesz / 512); } } #endif - if (!mq->bounce_buf) { + if (!mqrq_cur->bounce_buf && !mqrq_prev->bounce_buf) { blk_queue_bounce_limit(mq->queue, limit); blk_queue_max_hw_sectors(mq->queue, min(host->max_blk_count, host->max_req_size / 512)); blk_queue_max_segments(mq->queue, host->max_segs); blk_queue_max_segment_size(mq->queue, host->max_seg_size); - mq->sg = kmalloc(sizeof(struct scatterlist) * - host->max_segs, GFP_KERNEL); - if (!mq->sg) { - ret = -ENOMEM; + mqrq_cur->sg = mmc_alloc_sg(host->max_segs, &ret); + if (ret) + goto cleanup_queue; + + + mqrq_prev->sg = mmc_alloc_sg(host->max_segs, &ret); + if (ret) goto cleanup_queue; - } - sg_init_table(mq->sg, host->max_segs); } sema_init(&mq->thread_sem, 1); @@ -216,16 +267,22 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, return 0; free_bounce_sg: - if (mq->bounce_sg) - kfree(mq->bounce_sg); - mq->bounce_sg = NULL; + kfree(mqrq_cur->bounce_sg); + mqrq_cur->bounce_sg = NULL; + kfree(mqrq_prev->bounce_sg); + mqrq_prev->bounce_sg = NULL; + cleanup_queue: - if (mq->sg) - kfree(mq->sg); - mq->sg = NULL; - if (mq->bounce_buf) - kfree(mq->bounce_buf); - mq->bounce_buf = NULL; + kfree(mqrq_cur->sg); + mqrq_cur->sg = NULL; + kfree(mqrq_cur->bounce_buf); + mqrq_cur->bounce_buf = NULL; + + kfree(mqrq_prev->sg); + mqrq_prev->sg = NULL; + kfree(mqrq_prev->bounce_buf); + mqrq_prev->bounce_buf = NULL; + blk_cleanup_queue(mq->queue); return ret; } @@ -234,6 +291,8 @@ void mmc_cleanup_queue(struct mmc_queue *mq) { struct request_queue *q = mq->queue; unsigned long flags; + struct mmc_queue_req *mqrq_cur = mq->mqrq_cur; + struct mmc_queue_req *mqrq_prev = mq->mqrq_prev; /* Make sure the queue isn't suspended, as that will deadlock */ mmc_queue_resume(mq); @@ -247,16 +306,23 @@ void mmc_cleanup_queue(struct mmc_queue *mq) blk_start_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); - if (mq->bounce_sg) - kfree(mq->bounce_sg); - mq->bounce_sg = NULL; + kfree(mqrq_cur->bounce_sg); + mqrq_cur->bounce_sg = NULL; - kfree(mq->sg); - mq->sg = NULL; + kfree(mqrq_cur->sg); + mqrq_cur->sg = NULL; - if (mq->bounce_buf) - kfree(mq->bounce_buf); - mq->bounce_buf = NULL; + kfree(mqrq_cur->bounce_buf); + mqrq_cur->bounce_buf = NULL; + + kfree(mqrq_prev->bounce_sg); + mqrq_prev->bounce_sg = NULL; + + kfree(mqrq_prev->sg); + mqrq_prev->sg = NULL; + + kfree(mqrq_prev->bounce_buf); + mqrq_prev->bounce_buf = NULL; mq->card = NULL; } @@ -309,27 +375,27 @@ void mmc_queue_resume(struct mmc_queue *mq) /* * Prepare the sg list(s) to be handed of to the host driver */ -unsigned int mmc_queue_map_sg(struct mmc_queue *mq) +unsigned int mmc_queue_map_sg(struct mmc_queue *mq, struct mmc_queue_req *mqrq) { unsigned int sg_len; size_t buflen; struct scatterlist *sg; int i; - if (!mq->bounce_buf) - return blk_rq_map_sg(mq->queue, mq->req, mq->sg); + if (!mqrq->bounce_buf) + return blk_rq_map_sg(mq->queue, mqrq->req, mqrq->sg); - BUG_ON(!mq->bounce_sg); + BUG_ON(!mqrq->bounce_sg); - sg_len = blk_rq_map_sg(mq->queue, mq->req, mq->bounce_sg); + sg_len = blk_rq_map_sg(mq->queue, mqrq->req, mqrq->bounce_sg); - mq->bounce_sg_len = sg_len; + mqrq->bounce_sg_len = sg_len; buflen = 0; - for_each_sg(mq->bounce_sg, sg, sg_len, i) + for_each_sg(mqrq->bounce_sg, sg, sg_len, i) buflen += sg->length; - sg_init_one(mq->sg, mq->bounce_buf, buflen); + sg_init_one(mqrq->sg, mqrq->bounce_buf, buflen); return 1; } @@ -338,31 +404,30 @@ unsigned int mmc_queue_map_sg(struct mmc_queue *mq) * If writing, bounce the data to the buffer before the request * is sent to the host driver */ -void mmc_queue_bounce_pre(struct mmc_queue *mq) +void mmc_queue_bounce_pre(struct mmc_queue_req *mqrq) { - if (!mq->bounce_buf) + if (!mqrq->bounce_buf) return; - if (rq_data_dir(mq->req) != WRITE) + if (rq_data_dir(mqrq->req) != WRITE) return; - sg_copy_to_buffer(mq->bounce_sg, mq->bounce_sg_len, - mq->bounce_buf, mq->sg[0].length); + sg_copy_to_buffer(mqrq->bounce_sg, mqrq->bounce_sg_len, + mqrq->bounce_buf, mqrq->sg[0].length); } /* * If reading, bounce the data from the buffer after the request * has been handled by the host driver */ -void mmc_queue_bounce_post(struct mmc_queue *mq) +void mmc_queue_bounce_post(struct mmc_queue_req *mqrq) { - if (!mq->bounce_buf) + if (!mqrq->bounce_buf) return; - if (rq_data_dir(mq->req) != READ) + if (rq_data_dir(mqrq->req) != READ) return; - sg_copy_from_buffer(mq->bounce_sg, mq->bounce_sg_len, - mq->bounce_buf, mq->sg[0].length); + sg_copy_from_buffer(mqrq->bounce_sg, mqrq->bounce_sg_len, + mqrq->bounce_buf, mqrq->sg[0].length); } - diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h index 6223ef8dc9cd..d2a1eb4b9f9f 100644 --- a/drivers/mmc/card/queue.h +++ b/drivers/mmc/card/queue.h @@ -4,19 +4,35 @@ struct request; struct task_struct; +struct mmc_blk_request { + struct mmc_request mrq; + struct mmc_command sbc; + struct mmc_command cmd; + struct mmc_command stop; + struct mmc_data data; +}; + +struct mmc_queue_req { + struct request *req; + struct mmc_blk_request brq; + struct scatterlist *sg; + char *bounce_buf; + struct scatterlist *bounce_sg; + unsigned int bounce_sg_len; + struct mmc_async_req mmc_active; +}; + struct mmc_queue { struct mmc_card *card; struct task_struct *thread; struct semaphore thread_sem; unsigned int flags; - struct request *req; int (*issue_fn)(struct mmc_queue *, struct request *); void *data; struct request_queue *queue; - struct scatterlist *sg; - char *bounce_buf; - struct scatterlist *bounce_sg; - unsigned int bounce_sg_len; + struct mmc_queue_req mqrq[2]; + struct mmc_queue_req *mqrq_cur; + struct mmc_queue_req *mqrq_prev; }; extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *, @@ -25,8 +41,9 @@ extern void mmc_cleanup_queue(struct mmc_queue *); extern void mmc_queue_suspend(struct mmc_queue *); extern void mmc_queue_resume(struct mmc_queue *); -extern unsigned int mmc_queue_map_sg(struct mmc_queue *); -extern void mmc_queue_bounce_pre(struct mmc_queue *); -extern void mmc_queue_bounce_post(struct mmc_queue *); +extern unsigned int mmc_queue_map_sg(struct mmc_queue *, + struct mmc_queue_req *); +extern void mmc_queue_bounce_pre(struct mmc_queue_req *); +extern void mmc_queue_bounce_post(struct mmc_queue_req *); #endif diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 7843efe22359..f091b43d00c4 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -198,9 +198,109 @@ mmc_start_request(struct mmc_host *host, struct mmc_request *mrq) static void mmc_wait_done(struct mmc_request *mrq) { - complete(mrq->done_data); + complete(&mrq->completion); } +static void __mmc_start_req(struct mmc_host *host, struct mmc_request *mrq) +{ + init_completion(&mrq->completion); + mrq->done = mmc_wait_done; + mmc_start_request(host, mrq); +} + +static void mmc_wait_for_req_done(struct mmc_host *host, + struct mmc_request *mrq) +{ + wait_for_completion(&mrq->completion); +} + +/** + * mmc_pre_req - Prepare for a new request + * @host: MMC host to prepare command + * @mrq: MMC request to prepare for + * @is_first_req: true if there is no previous started request + * that may run in parellel to this call, otherwise false + * + * mmc_pre_req() is called in prior to mmc_start_req() to let + * host prepare for the new request. Preparation of a request may be + * performed while another request is running on the host. + */ +static void mmc_pre_req(struct mmc_host *host, struct mmc_request *mrq, + bool is_first_req) +{ + if (host->ops->pre_req) + host->ops->pre_req(host, mrq, is_first_req); +} + +/** + * mmc_post_req - Post process a completed request + * @host: MMC host to post process command + * @mrq: MMC request to post process for + * @err: Error, if non zero, clean up any resources made in pre_req + * + * Let the host post process a completed request. Post processing of + * a request may be performed while another reuqest is running. + */ +static void mmc_post_req(struct mmc_host *host, struct mmc_request *mrq, + int err) +{ + if (host->ops->post_req) + host->ops->post_req(host, mrq, err); +} + +/** + * mmc_start_req - start a non-blocking request + * @host: MMC host to start command + * @areq: async request to start + * @error: out parameter returns 0 for success, otherwise non zero + * + * Start a new MMC custom command request for a host. + * If there is on ongoing async request wait for completion + * of that request and start the new one and return. + * Does not wait for the new request to complete. + * + * Returns the completed request, NULL in case of none completed. + * Wait for the an ongoing request (previoulsy started) to complete and + * return the completed request. If there is no ongoing request, NULL + * is returned without waiting. NULL is not an error condition. + */ +struct mmc_async_req *mmc_start_req(struct mmc_host *host, + struct mmc_async_req *areq, int *error) +{ + int err = 0; + struct mmc_async_req *data = host->areq; + + /* Prepare a new request */ + if (areq) + mmc_pre_req(host, areq->mrq, !host->areq); + + if (host->areq) { + mmc_wait_for_req_done(host, host->areq->mrq); + err = host->areq->err_check(host->card, host->areq); + if (err) { + mmc_post_req(host, host->areq->mrq, 0); + if (areq) + mmc_post_req(host, areq->mrq, -EINVAL); + + host->areq = NULL; + goto out; + } + } + + if (areq) + __mmc_start_req(host, areq->mrq); + + if (host->areq) + mmc_post_req(host, host->areq->mrq, 0); + + host->areq = areq; + out: + if (error) + *error = err; + return data; +} +EXPORT_SYMBOL(mmc_start_req); + /** * mmc_wait_for_req - start a request and wait for completion * @host: MMC host to start command @@ -212,16 +312,9 @@ static void mmc_wait_done(struct mmc_request *mrq) */ void mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq) { - DECLARE_COMPLETION_ONSTACK(complete); - - mrq->done_data = &complete; - mrq->done = mmc_wait_done; - - mmc_start_request(host, mrq); - - wait_for_completion(&complete); + __mmc_start_req(host, mrq); + mmc_wait_for_req_done(host, mrq); } - EXPORT_SYMBOL(mmc_wait_for_req); /** @@ -1516,6 +1609,82 @@ int mmc_erase_group_aligned(struct mmc_card *card, unsigned int from, } EXPORT_SYMBOL(mmc_erase_group_aligned); +static unsigned int mmc_do_calc_max_discard(struct mmc_card *card, + unsigned int arg) +{ + struct mmc_host *host = card->host; + unsigned int max_discard, x, y, qty = 0, max_qty, timeout; + unsigned int last_timeout = 0; + + if (card->erase_shift) + max_qty = UINT_MAX >> card->erase_shift; + else if (mmc_card_sd(card)) + max_qty = UINT_MAX; + else + max_qty = UINT_MAX / card->erase_size; + + /* Find the largest qty with an OK timeout */ + do { + y = 0; + for (x = 1; x && x <= max_qty && max_qty - x >= qty; x <<= 1) { + timeout = mmc_erase_timeout(card, arg, qty + x); + if (timeout > host->max_discard_to) + break; + if (timeout < last_timeout) + break; + last_timeout = timeout; + y = x; + } + qty += y; + } while (y); + + if (!qty) + return 0; + + if (qty == 1) + return 1; + + /* Convert qty to sectors */ + if (card->erase_shift) + max_discard = --qty << card->erase_shift; + else if (mmc_card_sd(card)) + max_discard = qty; + else + max_discard = --qty * card->erase_size; + + return max_discard; +} + +unsigned int mmc_calc_max_discard(struct mmc_card *card) +{ + struct mmc_host *host = card->host; + unsigned int max_discard, max_trim; + + if (!host->max_discard_to) + return UINT_MAX; + + /* + * Without erase_group_def set, MMC erase timeout depends on clock + * frequence which can change. In that case, the best choice is + * just the preferred erase size. + */ + if (mmc_card_mmc(card) && !(card->ext_csd.erase_group_def & 1)) + return card->pref_erase; + + max_discard = mmc_do_calc_max_discard(card, MMC_ERASE_ARG); + if (mmc_can_trim(card)) { + max_trim = mmc_do_calc_max_discard(card, MMC_TRIM_ARG); + if (max_trim < max_discard) + max_discard = max_trim; + } else if (max_discard < card->erase_size) { + max_discard = 0; + } + pr_debug("%s: calculated max. discard sectors %u for timeout %u ms\n", + mmc_hostname(host), max_discard, host->max_discard_to); + return max_discard; +} +EXPORT_SYMBOL(mmc_calc_max_discard); + int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen) { struct mmc_command cmd = {0}; @@ -1663,6 +1832,10 @@ int mmc_power_save_host(struct mmc_host *host) { int ret = 0; +#ifdef CONFIG_MMC_DEBUG + pr_info("%s: %s: powering down\n", mmc_hostname(host), __func__); +#endif + mmc_bus_get(host); if (!host->bus_ops || host->bus_dead || !host->bus_ops->power_restore) { @@ -1685,6 +1858,10 @@ int mmc_power_restore_host(struct mmc_host *host) { int ret; +#ifdef CONFIG_MMC_DEBUG + pr_info("%s: %s: powering up\n", mmc_hostname(host), __func__); +#endif + mmc_bus_get(host); if (!host->bus_ops || host->bus_dead || !host->bus_ops->power_restore) { diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index ff2774128aa9..633975ff2bb3 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -409,52 +409,62 @@ out: static int sd_select_driver_type(struct mmc_card *card, u8 *status) { - int host_drv_type = 0, card_drv_type = 0; + int host_drv_type = SD_DRIVER_TYPE_B; + int card_drv_type = SD_DRIVER_TYPE_B; + int drive_strength; int err; /* * If the host doesn't support any of the Driver Types A,C or D, - * default Driver Type B is used. + * or there is no board specific handler then default Driver + * Type B is used. */ if (!(card->host->caps & (MMC_CAP_DRIVER_TYPE_A | MMC_CAP_DRIVER_TYPE_C | MMC_CAP_DRIVER_TYPE_D))) return 0; - if (card->host->caps & MMC_CAP_DRIVER_TYPE_A) { - host_drv_type = MMC_SET_DRIVER_TYPE_A; - if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_A) - card_drv_type = MMC_SET_DRIVER_TYPE_A; - else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_B) - card_drv_type = MMC_SET_DRIVER_TYPE_B; - else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C) - card_drv_type = MMC_SET_DRIVER_TYPE_C; - } else if (card->host->caps & MMC_CAP_DRIVER_TYPE_C) { - host_drv_type = MMC_SET_DRIVER_TYPE_C; - if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C) - card_drv_type = MMC_SET_DRIVER_TYPE_C; - } else if (!(card->host->caps & MMC_CAP_DRIVER_TYPE_D)) { - /* - * If we are here, that means only the default driver type - * B is supported by the host. - */ - host_drv_type = MMC_SET_DRIVER_TYPE_B; - if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_B) - card_drv_type = MMC_SET_DRIVER_TYPE_B; - else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C) - card_drv_type = MMC_SET_DRIVER_TYPE_C; - } + if (!card->host->ops->select_drive_strength) + return 0; + + if (card->host->caps & MMC_CAP_DRIVER_TYPE_A) + host_drv_type |= SD_DRIVER_TYPE_A; + + if (card->host->caps & MMC_CAP_DRIVER_TYPE_C) + host_drv_type |= SD_DRIVER_TYPE_C; + + if (card->host->caps & MMC_CAP_DRIVER_TYPE_D) + host_drv_type |= SD_DRIVER_TYPE_D; + + if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_A) + card_drv_type |= SD_DRIVER_TYPE_A; + + if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C) + card_drv_type |= SD_DRIVER_TYPE_C; + + if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_D) + card_drv_type |= SD_DRIVER_TYPE_D; + + /* + * The drive strength that the hardware can support + * depends on the board design. Pass the appropriate + * information and let the hardware specific code + * return what is possible given the options + */ + drive_strength = card->host->ops->select_drive_strength( + card->sw_caps.uhs_max_dtr, + host_drv_type, card_drv_type); - err = mmc_sd_switch(card, 1, 2, card_drv_type, status); + err = mmc_sd_switch(card, 1, 2, drive_strength, status); if (err) return err; - if ((status[15] & 0xF) != card_drv_type) { - printk(KERN_WARNING "%s: Problem setting driver strength!\n", + if ((status[15] & 0xF) != drive_strength) { + printk(KERN_WARNING "%s: Problem setting drive strength!\n", mmc_hostname(card->host)); return 0; } - mmc_set_driver_type(card->host, host_drv_type); + mmc_set_driver_type(card->host, drive_strength); return 0; } diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c index d2565df8a7fb..e4e6822d09e3 100644 --- a/drivers/mmc/core/sdio_bus.c +++ b/drivers/mmc/core/sdio_bus.c @@ -167,11 +167,8 @@ static int sdio_bus_remove(struct device *dev) int ret = 0; /* Make sure card is powered before invoking ->remove() */ - if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD) { - ret = pm_runtime_get_sync(dev); - if (ret < 0) - goto out; - } + if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD) + pm_runtime_get_sync(dev); drv->remove(func); @@ -191,7 +188,6 @@ static int sdio_bus_remove(struct device *dev) if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD) pm_runtime_put_sync(dev); -out: return ret; } diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 56dbf3f6ad08..8c87096531e9 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -81,28 +81,32 @@ config MMC_RICOH_MMC If unsure, say Y. -config MMC_SDHCI_OF - tristate "SDHCI support on OpenFirmware platforms" - depends on MMC_SDHCI && OF +config MMC_SDHCI_PLTFM + tristate "SDHCI platform and OF driver helper" + depends on MMC_SDHCI help - This selects the OF support for Secure Digital Host Controller - Interfaces. + This selects the common helper functions support for Secure Digital + Host Controller Interface based platform and OF drivers. + + If you have a controller with this interface, say Y or M here. If unsure, say N. config MMC_SDHCI_OF_ESDHC - bool "SDHCI OF support for the Freescale eSDHC controller" - depends on MMC_SDHCI_OF + tristate "SDHCI OF support for the Freescale eSDHC controller" + depends on MMC_SDHCI_PLTFM depends on PPC_OF select MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER help This selects the Freescale eSDHC controller support. + If you have a controller with this interface, say Y or M here. + If unsure, say N. config MMC_SDHCI_OF_HLWD - bool "SDHCI OF support for the Nintendo Wii SDHCI controllers" - depends on MMC_SDHCI_OF + tristate "SDHCI OF support for the Nintendo Wii SDHCI controllers" + depends on MMC_SDHCI_PLTFM depends on PPC_OF select MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER help @@ -110,40 +114,36 @@ config MMC_SDHCI_OF_HLWD found in the "Hollywood" chipset of the Nintendo Wii video game console. - If unsure, say N. - -config MMC_SDHCI_PLTFM - tristate "SDHCI support on the platform specific bus" - depends on MMC_SDHCI - help - This selects the platform specific bus support for Secure Digital Host - Controller Interface. - If you have a controller with this interface, say Y or M here. If unsure, say N. config MMC_SDHCI_CNS3XXX - bool "SDHCI support on the Cavium Networks CNS3xxx SoC" + tristate "SDHCI support on the Cavium Networks CNS3xxx SoC" depends on ARCH_CNS3XXX depends on MMC_SDHCI_PLTFM help This selects the SDHCI support for CNS3xxx System-on-Chip devices. + If you have a controller with this interface, say Y or M here. + If unsure, say N. config MMC_SDHCI_ESDHC_IMX - bool "SDHCI platform support for the Freescale eSDHC i.MX controller" - depends on MMC_SDHCI_PLTFM && (ARCH_MX25 || ARCH_MX35 || ARCH_MX5) + tristate "SDHCI platform support for the Freescale eSDHC i.MX controller" + depends on ARCH_MX25 || ARCH_MX35 || ARCH_MX5 + depends on MMC_SDHCI_PLTFM select MMC_SDHCI_IO_ACCESSORS help This selects the Freescale eSDHC controller support on the platform bus, found on platforms like mx35/51. + If you have a controller with this interface, say Y or M here. + If unsure, say N. config MMC_SDHCI_DOVE - bool "SDHCI support on Marvell's Dove SoC" + tristate "SDHCI support on Marvell's Dove SoC" depends on ARCH_DOVE depends on MMC_SDHCI_PLTFM select MMC_SDHCI_IO_ACCESSORS @@ -151,11 +151,14 @@ config MMC_SDHCI_DOVE This selects the Secure Digital Host Controller Interface in Marvell's Dove SoC. + If you have a controller with this interface, say Y or M here. + If unsure, say N. config MMC_SDHCI_TEGRA - bool "SDHCI platform support for the Tegra SD/MMC Controller" - depends on MMC_SDHCI_PLTFM && ARCH_TEGRA + tristate "SDHCI platform support for the Tegra SD/MMC Controller" + depends on ARCH_TEGRA + depends on MMC_SDHCI_PLTFM select MMC_SDHCI_IO_ACCESSORS help This selects the Tegra SD/MMC controller. If you have a Tegra @@ -178,14 +181,28 @@ config MMC_SDHCI_S3C If unsure, say N. -config MMC_SDHCI_PXA - tristate "Marvell PXA168/PXA910/MMP2 SD Host Controller support" - depends on ARCH_PXA || ARCH_MMP +config MMC_SDHCI_PXAV3 + tristate "Marvell MMP2 SD Host Controller support (PXAV3)" + depends on CLKDEV_LOOKUP select MMC_SDHCI - select MMC_SDHCI_IO_ACCESSORS + select MMC_SDHCI_PLTFM + default CPU_MMP2 + help + This selects the Marvell(R) PXAV3 SD Host Controller. + If you have a MMP2 platform with SD Host Controller + and a card slot, say Y or M here. + + If unsure, say N. + +config MMC_SDHCI_PXAV2 + tristate "Marvell PXA9XX SD Host Controller support (PXAV2)" + depends on CLKDEV_LOOKUP + select MMC_SDHCI + select MMC_SDHCI_PLTFM + default CPU_PXA910 help - This selects the Marvell(R) PXA168/PXA910/MMP2 SD Host Controller. - If you have a PXA168/PXA910/MMP2 platform with SD Host Controller + This selects the Marvell(R) PXAV2 SD Host Controller. + If you have a PXA9XX platform with SD Host Controller and a card slot, say Y or M here. If unsure, say N. @@ -281,13 +298,12 @@ config MMC_ATMELMCI endchoice config MMC_ATMELMCI_DMA - bool "Atmel MCI DMA support (EXPERIMENTAL)" - depends on MMC_ATMELMCI && (AVR32 || ARCH_AT91SAM9G45) && DMA_ENGINE && EXPERIMENTAL + bool "Atmel MCI DMA support" + depends on MMC_ATMELMCI && (AVR32 || ARCH_AT91SAM9G45) && DMA_ENGINE help Say Y here to have the Atmel MCI driver use a DMA engine to do data transfers and thus increase the throughput and - reduce the CPU utilization. Note that this is highly - experimental and may cause the driver to lock up. + reduce the CPU utilization. If unsure, say N. diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile index 58a5cf73d6e9..b4b83f302e32 100644 --- a/drivers/mmc/host/Makefile +++ b/drivers/mmc/host/Makefile @@ -9,7 +9,8 @@ obj-$(CONFIG_MMC_MXC) += mxcmmc.o obj-$(CONFIG_MMC_MXS) += mxs-mmc.o obj-$(CONFIG_MMC_SDHCI) += sdhci.o obj-$(CONFIG_MMC_SDHCI_PCI) += sdhci-pci.o -obj-$(CONFIG_MMC_SDHCI_PXA) += sdhci-pxa.o +obj-$(CONFIG_MMC_SDHCI_PXAV3) += sdhci-pxav3.o +obj-$(CONFIG_MMC_SDHCI_PXAV2) += sdhci-pxav2.o obj-$(CONFIG_MMC_SDHCI_S3C) += sdhci-s3c.o obj-$(CONFIG_MMC_SDHCI_SPEAR) += sdhci-spear.o obj-$(CONFIG_MMC_WBSD) += wbsd.o @@ -31,9 +32,7 @@ obj-$(CONFIG_MMC_SDRICOH_CS) += sdricoh_cs.o obj-$(CONFIG_MMC_TMIO) += tmio_mmc.o obj-$(CONFIG_MMC_TMIO_CORE) += tmio_mmc_core.o tmio_mmc_core-y := tmio_mmc_pio.o -ifneq ($(CONFIG_MMC_SDHI),n) -tmio_mmc_core-y += tmio_mmc_dma.o -endif +tmio_mmc_core-$(subst m,y,$(CONFIG_MMC_SDHI)) += tmio_mmc_dma.o obj-$(CONFIG_MMC_SDHI) += sh_mobile_sdhi.o obj-$(CONFIG_MMC_CB710) += cb710-mmc.o obj-$(CONFIG_MMC_VIA_SDMMC) += via-sdmmc.o @@ -44,17 +43,13 @@ obj-$(CONFIG_MMC_JZ4740) += jz4740_mmc.o obj-$(CONFIG_MMC_VUB300) += vub300.o obj-$(CONFIG_MMC_USHC) += ushc.o -obj-$(CONFIG_MMC_SDHCI_PLTFM) += sdhci-platform.o -sdhci-platform-y := sdhci-pltfm.o -sdhci-platform-$(CONFIG_MMC_SDHCI_CNS3XXX) += sdhci-cns3xxx.o -sdhci-platform-$(CONFIG_MMC_SDHCI_ESDHC_IMX) += sdhci-esdhc-imx.o -sdhci-platform-$(CONFIG_MMC_SDHCI_DOVE) += sdhci-dove.o -sdhci-platform-$(CONFIG_MMC_SDHCI_TEGRA) += sdhci-tegra.o - -obj-$(CONFIG_MMC_SDHCI_OF) += sdhci-of.o -sdhci-of-y := sdhci-of-core.o -sdhci-of-$(CONFIG_MMC_SDHCI_OF_ESDHC) += sdhci-of-esdhc.o -sdhci-of-$(CONFIG_MMC_SDHCI_OF_HLWD) += sdhci-of-hlwd.o +obj-$(CONFIG_MMC_SDHCI_PLTFM) += sdhci-pltfm.o +obj-$(CONFIG_MMC_SDHCI_CNS3XXX) += sdhci-cns3xxx.o +obj-$(CONFIG_MMC_SDHCI_ESDHC_IMX) += sdhci-esdhc-imx.o +obj-$(CONFIG_MMC_SDHCI_DOVE) += sdhci-dove.o +obj-$(CONFIG_MMC_SDHCI_TEGRA) += sdhci-tegra.o +obj-$(CONFIG_MMC_SDHCI_OF_ESDHC) += sdhci-of-esdhc.o +obj-$(CONFIG_MMC_SDHCI_OF_HLWD) += sdhci-of-hlwd.o ifeq ($(CONFIG_CB710_DEBUG),y) CFLAGS-cb710-mmc += -DDEBUG diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c index d3e6a962f423..a4aa3af86fed 100644 --- a/drivers/mmc/host/at91_mci.c +++ b/drivers/mmc/host/at91_mci.c @@ -77,7 +77,8 @@ #include <mach/board.h> #include <mach/cpu.h> -#include <mach/at91_mci.h> + +#include "at91_mci.h" #define DRIVER_NAME "at91_mci" diff --git a/drivers/mmc/host/at91_mci.h b/drivers/mmc/host/at91_mci.h new file mode 100644 index 000000000000..eec3a6b1c2bc --- /dev/null +++ b/drivers/mmc/host/at91_mci.h @@ -0,0 +1,115 @@ +/* + * drivers/mmc/host/at91_mci.h + * + * Copyright (C) 2005 Ivan Kokshaysky + * Copyright (C) SAN People + * + * MultiMedia Card Interface (MCI) registers. + * Based on AT91RM9200 datasheet revision F. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef AT91_MCI_H +#define AT91_MCI_H + +#define AT91_MCI_CR 0x00 /* Control Register */ +#define AT91_MCI_MCIEN (1 << 0) /* Multi-Media Interface Enable */ +#define AT91_MCI_MCIDIS (1 << 1) /* Multi-Media Interface Disable */ +#define AT91_MCI_PWSEN (1 << 2) /* Power Save Mode Enable */ +#define AT91_MCI_PWSDIS (1 << 3) /* Power Save Mode Disable */ +#define AT91_MCI_SWRST (1 << 7) /* Software Reset */ + +#define AT91_MCI_MR 0x04 /* Mode Register */ +#define AT91_MCI_CLKDIV (0xff << 0) /* Clock Divider */ +#define AT91_MCI_PWSDIV (7 << 8) /* Power Saving Divider */ +#define AT91_MCI_RDPROOF (1 << 11) /* Read Proof Enable [SAM926[03] only] */ +#define AT91_MCI_WRPROOF (1 << 12) /* Write Proof Enable [SAM926[03] only] */ +#define AT91_MCI_PDCFBYTE (1 << 13) /* PDC Force Byte Transfer [SAM926[03] only] */ +#define AT91_MCI_PDCPADV (1 << 14) /* PDC Padding Value */ +#define AT91_MCI_PDCMODE (1 << 15) /* PDC-orientated Mode */ +#define AT91_MCI_BLKLEN (0xfff << 18) /* Data Block Length */ + +#define AT91_MCI_DTOR 0x08 /* Data Timeout Register */ +#define AT91_MCI_DTOCYC (0xf << 0) /* Data Timeout Cycle Number */ +#define AT91_MCI_DTOMUL (7 << 4) /* Data Timeout Multiplier */ +#define AT91_MCI_DTOMUL_1 (0 << 4) +#define AT91_MCI_DTOMUL_16 (1 << 4) +#define AT91_MCI_DTOMUL_128 (2 << 4) +#define AT91_MCI_DTOMUL_256 (3 << 4) +#define AT91_MCI_DTOMUL_1K (4 << 4) +#define AT91_MCI_DTOMUL_4K (5 << 4) +#define AT91_MCI_DTOMUL_64K (6 << 4) +#define AT91_MCI_DTOMUL_1M (7 << 4) + +#define AT91_MCI_SDCR 0x0c /* SD Card Register */ +#define AT91_MCI_SDCSEL (3 << 0) /* SD Card Selector */ +#define AT91_MCI_SDCBUS (1 << 7) /* 1-bit or 4-bit bus */ + +#define AT91_MCI_ARGR 0x10 /* Argument Register */ + +#define AT91_MCI_CMDR 0x14 /* Command Register */ +#define AT91_MCI_CMDNB (0x3f << 0) /* Command Number */ +#define AT91_MCI_RSPTYP (3 << 6) /* Response Type */ +#define AT91_MCI_RSPTYP_NONE (0 << 6) +#define AT91_MCI_RSPTYP_48 (1 << 6) +#define AT91_MCI_RSPTYP_136 (2 << 6) +#define AT91_MCI_SPCMD (7 << 8) /* Special Command */ +#define AT91_MCI_SPCMD_NONE (0 << 8) +#define AT91_MCI_SPCMD_INIT (1 << 8) +#define AT91_MCI_SPCMD_SYNC (2 << 8) +#define AT91_MCI_SPCMD_ICMD (4 << 8) +#define AT91_MCI_SPCMD_IRESP (5 << 8) +#define AT91_MCI_OPDCMD (1 << 11) /* Open Drain Command */ +#define AT91_MCI_MAXLAT (1 << 12) /* Max Latency for Command to Response */ +#define AT91_MCI_TRCMD (3 << 16) /* Transfer Command */ +#define AT91_MCI_TRCMD_NONE (0 << 16) +#define AT91_MCI_TRCMD_START (1 << 16) +#define AT91_MCI_TRCMD_STOP (2 << 16) +#define AT91_MCI_TRDIR (1 << 18) /* Transfer Direction */ +#define AT91_MCI_TRTYP (3 << 19) /* Transfer Type */ +#define AT91_MCI_TRTYP_BLOCK (0 << 19) +#define AT91_MCI_TRTYP_MULTIPLE (1 << 19) +#define AT91_MCI_TRTYP_STREAM (2 << 19) +#define AT91_MCI_TRTYP_SDIO_BYTE (4 << 19) +#define AT91_MCI_TRTYP_SDIO_BLOCK (5 << 19) + +#define AT91_MCI_BLKR 0x18 /* Block Register */ +#define AT91_MCI_BLKR_BCNT(n) ((0xffff & (n)) << 0) /* Block count */ +#define AT91_MCI_BLKR_BLKLEN(n) ((0xffff & (n)) << 16) /* Block length */ + +#define AT91_MCI_RSPR(n) (0x20 + ((n) * 4)) /* Response Registers 0-3 */ +#define AT91_MCR_RDR 0x30 /* Receive Data Register */ +#define AT91_MCR_TDR 0x34 /* Transmit Data Register */ + +#define AT91_MCI_SR 0x40 /* Status Register */ +#define AT91_MCI_CMDRDY (1 << 0) /* Command Ready */ +#define AT91_MCI_RXRDY (1 << 1) /* Receiver Ready */ +#define AT91_MCI_TXRDY (1 << 2) /* Transmit Ready */ +#define AT91_MCI_BLKE (1 << 3) /* Data Block Ended */ +#define AT91_MCI_DTIP (1 << 4) /* Data Transfer in Progress */ +#define AT91_MCI_NOTBUSY (1 << 5) /* Data Not Busy */ +#define AT91_MCI_ENDRX (1 << 6) /* End of RX Buffer */ +#define AT91_MCI_ENDTX (1 << 7) /* End fo TX Buffer */ +#define AT91_MCI_SDIOIRQA (1 << 8) /* SDIO Interrupt for Slot A */ +#define AT91_MCI_SDIOIRQB (1 << 9) /* SDIO Interrupt for Slot B */ +#define AT91_MCI_RXBUFF (1 << 14) /* RX Buffer Full */ +#define AT91_MCI_TXBUFE (1 << 15) /* TX Buffer Empty */ +#define AT91_MCI_RINDE (1 << 16) /* Response Index Error */ +#define AT91_MCI_RDIRE (1 << 17) /* Response Direction Error */ +#define AT91_MCI_RCRCE (1 << 18) /* Response CRC Error */ +#define AT91_MCI_RENDE (1 << 19) /* Response End Bit Error */ +#define AT91_MCI_RTOE (1 << 20) /* Response Time-out Error */ +#define AT91_MCI_DCRCE (1 << 21) /* Data CRC Error */ +#define AT91_MCI_DTOE (1 << 22) /* Data Time-out Error */ +#define AT91_MCI_OVRE (1 << 30) /* Overrun */ +#define AT91_MCI_UNRE (1 << 31) /* Underrun */ + +#define AT91_MCI_IER 0x44 /* Interrupt Enable Register */ +#define AT91_MCI_IDR 0x48 /* Interrupt Disable Register */ +#define AT91_MCI_IMR 0x4c /* Interrupt Mask Register */ + +#endif diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c index aa8039f473c4..fa8cae1d7005 100644 --- a/drivers/mmc/host/atmel-mci.c +++ b/drivers/mmc/host/atmel-mci.c @@ -203,6 +203,7 @@ struct atmel_mci_slot { #define ATMCI_CARD_PRESENT 0 #define ATMCI_CARD_NEED_INIT 1 #define ATMCI_SHUTDOWN 2 +#define ATMCI_SUSPENDED 3 int detect_pin; int wp_pin; @@ -1878,10 +1879,72 @@ static int __exit atmci_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM +static int atmci_suspend(struct device *dev) +{ + struct atmel_mci *host = dev_get_drvdata(dev); + int i; + + for (i = 0; i < ATMEL_MCI_MAX_NR_SLOTS; i++) { + struct atmel_mci_slot *slot = host->slot[i]; + int ret; + + if (!slot) + continue; + ret = mmc_suspend_host(slot->mmc); + if (ret < 0) { + while (--i >= 0) { + slot = host->slot[i]; + if (slot + && test_bit(ATMCI_SUSPENDED, &slot->flags)) { + mmc_resume_host(host->slot[i]->mmc); + clear_bit(ATMCI_SUSPENDED, &slot->flags); + } + } + return ret; + } else { + set_bit(ATMCI_SUSPENDED, &slot->flags); + } + } + + return 0; +} + +static int atmci_resume(struct device *dev) +{ + struct atmel_mci *host = dev_get_drvdata(dev); + int i; + int ret = 0; + + for (i = 0; i < ATMEL_MCI_MAX_NR_SLOTS; i++) { + struct atmel_mci_slot *slot = host->slot[i]; + int err; + + slot = host->slot[i]; + if (!slot) + continue; + if (!test_bit(ATMCI_SUSPENDED, &slot->flags)) + continue; + err = mmc_resume_host(slot->mmc); + if (err < 0) + ret = err; + else + clear_bit(ATMCI_SUSPENDED, &slot->flags); + } + + return ret; +} +static SIMPLE_DEV_PM_OPS(atmci_pm, atmci_suspend, atmci_resume); +#define ATMCI_PM_OPS (&atmci_pm) +#else +#define ATMCI_PM_OPS NULL +#endif + static struct platform_driver atmci_driver = { .remove = __exit_p(atmci_remove), .driver = { .name = "atmel_mci", + .pm = ATMCI_PM_OPS, }, }; diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 66dcddb9c205..0c839d3338db 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -33,6 +33,7 @@ #include <linux/mmc/dw_mmc.h> #include <linux/bitops.h> #include <linux/regulator/consumer.h> +#include <linux/workqueue.h> #include "dw_mmc.h" @@ -100,6 +101,8 @@ struct dw_mci_slot { int last_detect_state; }; +static struct workqueue_struct *dw_mci_card_workqueue; + #if defined(CONFIG_DEBUG_FS) static int dw_mci_req_show(struct seq_file *s, void *v) { @@ -284,7 +287,7 @@ static void send_stop_cmd(struct dw_mci *host, struct mmc_data *data) /* DMA interface functions */ static void dw_mci_stop_dma(struct dw_mci *host) { - if (host->use_dma) { + if (host->using_dma) { host->dma_ops->stop(host); host->dma_ops->cleanup(host); } else { @@ -432,6 +435,8 @@ static int dw_mci_submit_data_dma(struct dw_mci *host, struct mmc_data *data) unsigned int i, direction, sg_len; u32 temp; + host->using_dma = 0; + /* If we don't have a channel, we can't do DMA */ if (!host->use_dma) return -ENODEV; @@ -451,6 +456,8 @@ static int dw_mci_submit_data_dma(struct dw_mci *host, struct mmc_data *data) return -EINVAL; } + host->using_dma = 1; + if (data->flags & MMC_DATA_READ) direction = DMA_FROM_DEVICE; else @@ -489,14 +496,18 @@ static void dw_mci_submit_data(struct dw_mci *host, struct mmc_data *data) host->sg = NULL; host->data = data; + if (data->flags & MMC_DATA_READ) + host->dir_status = DW_MCI_RECV_STATUS; + else + host->dir_status = DW_MCI_SEND_STATUS; + if (dw_mci_submit_data_dma(host, data)) { host->sg = data->sg; host->pio_offset = 0; - if (data->flags & MMC_DATA_READ) - host->dir_status = DW_MCI_RECV_STATUS; - else - host->dir_status = DW_MCI_SEND_STATUS; + host->part_buf_start = 0; + host->part_buf_count = 0; + mci_writel(host, RINTSTS, SDMMC_INT_TXDR | SDMMC_INT_RXDR); temp = mci_readl(host, INTMASK); temp |= SDMMC_INT_TXDR | SDMMC_INT_RXDR; mci_writel(host, INTMASK, temp); @@ -574,7 +585,7 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot) } /* Set the current slot bus width */ - mci_writel(host, CTYPE, slot->ctype); + mci_writel(host, CTYPE, (slot->ctype << slot->id)); } static void dw_mci_start_request(struct dw_mci *host, @@ -624,13 +635,13 @@ static void dw_mci_start_request(struct dw_mci *host, host->stop_cmdr = dw_mci_prepare_command(slot->mmc, mrq->stop); } +/* must be called with host->lock held */ static void dw_mci_queue_request(struct dw_mci *host, struct dw_mci_slot *slot, struct mmc_request *mrq) { dev_vdbg(&slot->mmc->class_dev, "queue request: state=%d\n", host->state); - spin_lock_bh(&host->lock); slot->mrq = mrq; if (host->state == STATE_IDLE) { @@ -639,8 +650,6 @@ static void dw_mci_queue_request(struct dw_mci *host, struct dw_mci_slot *slot, } else { list_add_tail(&slot->queue_node, &host->queue); } - - spin_unlock_bh(&host->lock); } static void dw_mci_request(struct mmc_host *mmc, struct mmc_request *mrq) @@ -650,14 +659,23 @@ static void dw_mci_request(struct mmc_host *mmc, struct mmc_request *mrq) WARN_ON(slot->mrq); + /* + * The check for card presence and queueing of the request must be + * atomic, otherwise the card could be removed in between and the + * request wouldn't fail until another card was inserted. + */ + spin_lock_bh(&host->lock); + if (!test_bit(DW_MMC_CARD_PRESENT, &slot->flags)) { + spin_unlock_bh(&host->lock); mrq->cmd->error = -ENOMEDIUM; mmc_request_done(mmc, mrq); return; } - /* We don't support multiple blocks of weird lengths. */ dw_mci_queue_request(host, slot, mrq); + + spin_unlock_bh(&host->lock); } static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) @@ -831,7 +849,7 @@ static void dw_mci_tasklet_func(unsigned long priv) struct mmc_command *cmd; enum dw_mci_state state; enum dw_mci_state prev_state; - u32 status; + u32 status, ctrl; spin_lock(&host->lock); @@ -891,13 +909,19 @@ static void dw_mci_tasklet_func(unsigned long priv) if (status & DW_MCI_DATA_ERROR_FLAGS) { if (status & SDMMC_INT_DTO) { - dev_err(&host->pdev->dev, - "data timeout error\n"); data->error = -ETIMEDOUT; } else if (status & SDMMC_INT_DCRC) { - dev_err(&host->pdev->dev, - "data CRC error\n"); data->error = -EILSEQ; + } else if (status & SDMMC_INT_EBE && + host->dir_status == + DW_MCI_SEND_STATUS) { + /* + * No data CRC status was returned. + * The number of bytes transferred will + * be exaggerated in PIO mode. + */ + data->bytes_xfered = 0; + data->error = -ETIMEDOUT; } else { dev_err(&host->pdev->dev, "data FIFO error " @@ -905,6 +929,16 @@ static void dw_mci_tasklet_func(unsigned long priv) status); data->error = -EIO; } + /* + * After an error, there may be data lingering + * in the FIFO, so reset it - doing so + * generates a block interrupt, hence setting + * the scatter-gather pointer to NULL. + */ + host->sg = NULL; + ctrl = mci_readl(host, CTRL); + ctrl |= SDMMC_CTRL_FIFO_RESET; + mci_writel(host, CTRL, ctrl); } else { data->bytes_xfered = data->blocks * data->blksz; data->error = 0; @@ -946,84 +980,278 @@ unlock: } -static void dw_mci_push_data16(struct dw_mci *host, void *buf, int cnt) +/* push final bytes to part_buf, only use during push */ +static void dw_mci_set_part_bytes(struct dw_mci *host, void *buf, int cnt) { - u16 *pdata = (u16 *)buf; + memcpy((void *)&host->part_buf, buf, cnt); + host->part_buf_count = cnt; +} - WARN_ON(cnt % 2 != 0); +/* append bytes to part_buf, only use during push */ +static int dw_mci_push_part_bytes(struct dw_mci *host, void *buf, int cnt) +{ + cnt = min(cnt, (1 << host->data_shift) - host->part_buf_count); + memcpy((void *)&host->part_buf + host->part_buf_count, buf, cnt); + host->part_buf_count += cnt; + return cnt; +} - cnt = cnt >> 1; - while (cnt > 0) { - mci_writew(host, DATA, *pdata++); - cnt--; +/* pull first bytes from part_buf, only use during pull */ +static int dw_mci_pull_part_bytes(struct dw_mci *host, void *buf, int cnt) +{ + cnt = min(cnt, (int)host->part_buf_count); + if (cnt) { + memcpy(buf, (void *)&host->part_buf + host->part_buf_start, + cnt); + host->part_buf_count -= cnt; + host->part_buf_start += cnt; } + return cnt; } -static void dw_mci_pull_data16(struct dw_mci *host, void *buf, int cnt) +/* pull final bytes from the part_buf, assuming it's just been filled */ +static void dw_mci_pull_final_bytes(struct dw_mci *host, void *buf, int cnt) { - u16 *pdata = (u16 *)buf; + memcpy(buf, &host->part_buf, cnt); + host->part_buf_start = cnt; + host->part_buf_count = (1 << host->data_shift) - cnt; +} - WARN_ON(cnt % 2 != 0); +static void dw_mci_push_data16(struct dw_mci *host, void *buf, int cnt) +{ + /* try and push anything in the part_buf */ + if (unlikely(host->part_buf_count)) { + int len = dw_mci_push_part_bytes(host, buf, cnt); + buf += len; + cnt -= len; + if (!sg_next(host->sg) || host->part_buf_count == 2) { + mci_writew(host, DATA, host->part_buf16); + host->part_buf_count = 0; + } + } +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (unlikely((unsigned long)buf & 0x1)) { + while (cnt >= 2) { + u16 aligned_buf[64]; + int len = min(cnt & -2, (int)sizeof(aligned_buf)); + int items = len >> 1; + int i; + /* memcpy from input buffer into aligned buffer */ + memcpy(aligned_buf, buf, len); + buf += len; + cnt -= len; + /* push data from aligned buffer into fifo */ + for (i = 0; i < items; ++i) + mci_writew(host, DATA, aligned_buf[i]); + } + } else +#endif + { + u16 *pdata = buf; + for (; cnt >= 2; cnt -= 2) + mci_writew(host, DATA, *pdata++); + buf = pdata; + } + /* put anything remaining in the part_buf */ + if (cnt) { + dw_mci_set_part_bytes(host, buf, cnt); + if (!sg_next(host->sg)) + mci_writew(host, DATA, host->part_buf16); + } +} - cnt = cnt >> 1; - while (cnt > 0) { - *pdata++ = mci_readw(host, DATA); - cnt--; +static void dw_mci_pull_data16(struct dw_mci *host, void *buf, int cnt) +{ +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (unlikely((unsigned long)buf & 0x1)) { + while (cnt >= 2) { + /* pull data from fifo into aligned buffer */ + u16 aligned_buf[64]; + int len = min(cnt & -2, (int)sizeof(aligned_buf)); + int items = len >> 1; + int i; + for (i = 0; i < items; ++i) + aligned_buf[i] = mci_readw(host, DATA); + /* memcpy from aligned buffer into output buffer */ + memcpy(buf, aligned_buf, len); + buf += len; + cnt -= len; + } + } else +#endif + { + u16 *pdata = buf; + for (; cnt >= 2; cnt -= 2) + *pdata++ = mci_readw(host, DATA); + buf = pdata; + } + if (cnt) { + host->part_buf16 = mci_readw(host, DATA); + dw_mci_pull_final_bytes(host, buf, cnt); } } static void dw_mci_push_data32(struct dw_mci *host, void *buf, int cnt) { - u32 *pdata = (u32 *)buf; - - WARN_ON(cnt % 4 != 0); - WARN_ON((unsigned long)pdata & 0x3); - - cnt = cnt >> 2; - while (cnt > 0) { - mci_writel(host, DATA, *pdata++); - cnt--; + /* try and push anything in the part_buf */ + if (unlikely(host->part_buf_count)) { + int len = dw_mci_push_part_bytes(host, buf, cnt); + buf += len; + cnt -= len; + if (!sg_next(host->sg) || host->part_buf_count == 4) { + mci_writel(host, DATA, host->part_buf32); + host->part_buf_count = 0; + } + } +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (unlikely((unsigned long)buf & 0x3)) { + while (cnt >= 4) { + u32 aligned_buf[32]; + int len = min(cnt & -4, (int)sizeof(aligned_buf)); + int items = len >> 2; + int i; + /* memcpy from input buffer into aligned buffer */ + memcpy(aligned_buf, buf, len); + buf += len; + cnt -= len; + /* push data from aligned buffer into fifo */ + for (i = 0; i < items; ++i) + mci_writel(host, DATA, aligned_buf[i]); + } + } else +#endif + { + u32 *pdata = buf; + for (; cnt >= 4; cnt -= 4) + mci_writel(host, DATA, *pdata++); + buf = pdata; + } + /* put anything remaining in the part_buf */ + if (cnt) { + dw_mci_set_part_bytes(host, buf, cnt); + if (!sg_next(host->sg)) + mci_writel(host, DATA, host->part_buf32); } } static void dw_mci_pull_data32(struct dw_mci *host, void *buf, int cnt) { - u32 *pdata = (u32 *)buf; - - WARN_ON(cnt % 4 != 0); - WARN_ON((unsigned long)pdata & 0x3); - - cnt = cnt >> 2; - while (cnt > 0) { - *pdata++ = mci_readl(host, DATA); - cnt--; +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (unlikely((unsigned long)buf & 0x3)) { + while (cnt >= 4) { + /* pull data from fifo into aligned buffer */ + u32 aligned_buf[32]; + int len = min(cnt & -4, (int)sizeof(aligned_buf)); + int items = len >> 2; + int i; + for (i = 0; i < items; ++i) + aligned_buf[i] = mci_readl(host, DATA); + /* memcpy from aligned buffer into output buffer */ + memcpy(buf, aligned_buf, len); + buf += len; + cnt -= len; + } + } else +#endif + { + u32 *pdata = buf; + for (; cnt >= 4; cnt -= 4) + *pdata++ = mci_readl(host, DATA); + buf = pdata; + } + if (cnt) { + host->part_buf32 = mci_readl(host, DATA); + dw_mci_pull_final_bytes(host, buf, cnt); } } static void dw_mci_push_data64(struct dw_mci *host, void *buf, int cnt) { - u64 *pdata = (u64 *)buf; - - WARN_ON(cnt % 8 != 0); - - cnt = cnt >> 3; - while (cnt > 0) { - mci_writeq(host, DATA, *pdata++); - cnt--; + /* try and push anything in the part_buf */ + if (unlikely(host->part_buf_count)) { + int len = dw_mci_push_part_bytes(host, buf, cnt); + buf += len; + cnt -= len; + if (!sg_next(host->sg) || host->part_buf_count == 8) { + mci_writew(host, DATA, host->part_buf); + host->part_buf_count = 0; + } + } +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (unlikely((unsigned long)buf & 0x7)) { + while (cnt >= 8) { + u64 aligned_buf[16]; + int len = min(cnt & -8, (int)sizeof(aligned_buf)); + int items = len >> 3; + int i; + /* memcpy from input buffer into aligned buffer */ + memcpy(aligned_buf, buf, len); + buf += len; + cnt -= len; + /* push data from aligned buffer into fifo */ + for (i = 0; i < items; ++i) + mci_writeq(host, DATA, aligned_buf[i]); + } + } else +#endif + { + u64 *pdata = buf; + for (; cnt >= 8; cnt -= 8) + mci_writeq(host, DATA, *pdata++); + buf = pdata; + } + /* put anything remaining in the part_buf */ + if (cnt) { + dw_mci_set_part_bytes(host, buf, cnt); + if (!sg_next(host->sg)) + mci_writeq(host, DATA, host->part_buf); } } static void dw_mci_pull_data64(struct dw_mci *host, void *buf, int cnt) { - u64 *pdata = (u64 *)buf; +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (unlikely((unsigned long)buf & 0x7)) { + while (cnt >= 8) { + /* pull data from fifo into aligned buffer */ + u64 aligned_buf[16]; + int len = min(cnt & -8, (int)sizeof(aligned_buf)); + int items = len >> 3; + int i; + for (i = 0; i < items; ++i) + aligned_buf[i] = mci_readq(host, DATA); + /* memcpy from aligned buffer into output buffer */ + memcpy(buf, aligned_buf, len); + buf += len; + cnt -= len; + } + } else +#endif + { + u64 *pdata = buf; + for (; cnt >= 8; cnt -= 8) + *pdata++ = mci_readq(host, DATA); + buf = pdata; + } + if (cnt) { + host->part_buf = mci_readq(host, DATA); + dw_mci_pull_final_bytes(host, buf, cnt); + } +} - WARN_ON(cnt % 8 != 0); +static void dw_mci_pull_data(struct dw_mci *host, void *buf, int cnt) +{ + int len; - cnt = cnt >> 3; - while (cnt > 0) { - *pdata++ = mci_readq(host, DATA); - cnt--; - } + /* get remaining partial bytes */ + len = dw_mci_pull_part_bytes(host, buf, cnt); + if (unlikely(len == cnt)) + return; + buf += len; + cnt -= len; + + /* get the rest of the data */ + host->pull_data(host, buf, cnt); } static void dw_mci_read_data_pio(struct dw_mci *host) @@ -1037,9 +1265,10 @@ static void dw_mci_read_data_pio(struct dw_mci *host) unsigned int nbytes = 0, len; do { - len = SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift; + len = host->part_buf_count + + (SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift); if (offset + len <= sg->length) { - host->pull_data(host, (void *)(buf + offset), len); + dw_mci_pull_data(host, (void *)(buf + offset), len); offset += len; nbytes += len; @@ -1055,8 +1284,8 @@ static void dw_mci_read_data_pio(struct dw_mci *host) } } else { unsigned int remaining = sg->length - offset; - host->pull_data(host, (void *)(buf + offset), - remaining); + dw_mci_pull_data(host, (void *)(buf + offset), + remaining); nbytes += remaining; flush_dcache_page(sg_page(sg)); @@ -1066,7 +1295,7 @@ static void dw_mci_read_data_pio(struct dw_mci *host) offset = len - remaining; buf = sg_virt(sg); - host->pull_data(host, buf, offset); + dw_mci_pull_data(host, buf, offset); nbytes += offset; } @@ -1083,7 +1312,6 @@ static void dw_mci_read_data_pio(struct dw_mci *host) return; } } while (status & SDMMC_INT_RXDR); /*if the RXDR is ready read again*/ - len = SDMMC_GET_FCNT(mci_readl(host, STATUS)); host->pio_offset = offset; data->bytes_xfered += nbytes; return; @@ -1105,8 +1333,9 @@ static void dw_mci_write_data_pio(struct dw_mci *host) unsigned int nbytes = 0, len; do { - len = SDMMC_FIFO_SZ - - (SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift); + len = ((host->fifo_depth - + SDMMC_GET_FCNT(mci_readl(host, STATUS))) << shift) + - host->part_buf_count; if (offset + len <= sg->length) { host->push_data(host, (void *)(buf + offset), len); @@ -1151,10 +1380,8 @@ static void dw_mci_write_data_pio(struct dw_mci *host) return; } } while (status & SDMMC_INT_TXDR); /* if TXDR write again */ - host->pio_offset = offset; data->bytes_xfered += nbytes; - return; done: @@ -1202,7 +1429,6 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id) host->cmd_status = status; smp_wmb(); set_bit(EVENT_CMD_COMPLETE, &host->pending_events); - tasklet_schedule(&host->tasklet); } if (pending & DW_MCI_DATA_ERROR_FLAGS) { @@ -1211,7 +1437,9 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id) host->data_status = status; smp_wmb(); set_bit(EVENT_DATA_ERROR, &host->pending_events); - tasklet_schedule(&host->tasklet); + if (!(pending & (SDMMC_INT_DTO | SDMMC_INT_DCRC | + SDMMC_INT_SBE | SDMMC_INT_EBE))) + tasklet_schedule(&host->tasklet); } if (pending & SDMMC_INT_DATA_OVER) { @@ -1229,13 +1457,13 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id) if (pending & SDMMC_INT_RXDR) { mci_writel(host, RINTSTS, SDMMC_INT_RXDR); - if (host->sg) + if (host->dir_status == DW_MCI_RECV_STATUS && host->sg) dw_mci_read_data_pio(host); } if (pending & SDMMC_INT_TXDR) { mci_writel(host, RINTSTS, SDMMC_INT_TXDR); - if (host->sg) + if (host->dir_status == DW_MCI_SEND_STATUS && host->sg) dw_mci_write_data_pio(host); } @@ -1246,7 +1474,7 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id) if (pending & SDMMC_INT_CD) { mci_writel(host, RINTSTS, SDMMC_INT_CD); - tasklet_schedule(&host->card_tasklet); + queue_work(dw_mci_card_workqueue, &host->card_work); } } while (pass_count++ < 5); @@ -1265,9 +1493,9 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static void dw_mci_tasklet_card(unsigned long data) +static void dw_mci_work_routine_card(struct work_struct *work) { - struct dw_mci *host = (struct dw_mci *)data; + struct dw_mci *host = container_of(work, struct dw_mci, card_work); int i; for (i = 0; i < host->num_slots; i++) { @@ -1279,22 +1507,21 @@ static void dw_mci_tasklet_card(unsigned long data) present = dw_mci_get_cd(mmc); while (present != slot->last_detect_state) { - spin_lock(&host->lock); - dev_dbg(&slot->mmc->class_dev, "card %s\n", present ? "inserted" : "removed"); + /* Power up slot (before spin_lock, may sleep) */ + if (present != 0 && host->pdata->setpower) + host->pdata->setpower(slot->id, mmc->ocr_avail); + + spin_lock_bh(&host->lock); + /* Card change detected */ slot->last_detect_state = present; - /* Power up slot */ - if (present != 0) { - if (host->pdata->setpower) - host->pdata->setpower(slot->id, - mmc->ocr_avail); - + /* Mark card as present if applicable */ + if (present != 0) set_bit(DW_MMC_CARD_PRESENT, &slot->flags); - } /* Clean up queue if present */ mrq = slot->mrq; @@ -1344,8 +1571,6 @@ static void dw_mci_tasklet_card(unsigned long data) /* Power down slot */ if (present == 0) { - if (host->pdata->setpower) - host->pdata->setpower(slot->id, 0); clear_bit(DW_MMC_CARD_PRESENT, &slot->flags); /* @@ -1367,7 +1592,12 @@ static void dw_mci_tasklet_card(unsigned long data) } - spin_unlock(&host->lock); + spin_unlock_bh(&host->lock); + + /* Power down slot (after spin_unlock, may sleep) */ + if (present == 0 && host->pdata->setpower) + host->pdata->setpower(slot->id, 0); + present = dw_mci_get_cd(mmc); } @@ -1467,7 +1697,7 @@ static int __init dw_mci_init_slot(struct dw_mci *host, unsigned int id) * Card may have been plugged in prior to boot so we * need to run the detect tasklet */ - tasklet_schedule(&host->card_tasklet); + queue_work(dw_mci_card_workqueue, &host->card_work); return 0; } @@ -1645,8 +1875,19 @@ static int dw_mci_probe(struct platform_device *pdev) * FIFO threshold settings RxMark = fifo_size / 2 - 1, * Tx Mark = fifo_size / 2 DMA Size = 8 */ - fifo_size = mci_readl(host, FIFOTH); - fifo_size = (fifo_size >> 16) & 0x7ff; + if (!host->pdata->fifo_depth) { + /* + * Power-on value of RX_WMark is FIFO_DEPTH-1, but this may + * have been overwritten by the bootloader, just like we're + * about to do, so if you know the value for your hardware, you + * should put it in the platform data. + */ + fifo_size = mci_readl(host, FIFOTH); + fifo_size = 1 + ((fifo_size >> 16) & 0x7ff); + } else { + fifo_size = host->pdata->fifo_depth; + } + host->fifo_depth = fifo_size; host->fifoth_val = ((0x2 << 28) | ((fifo_size/2 - 1) << 16) | ((fifo_size/2) << 0)); mci_writel(host, FIFOTH, host->fifoth_val); @@ -1656,12 +1897,15 @@ static int dw_mci_probe(struct platform_device *pdev) mci_writel(host, CLKSRC, 0); tasklet_init(&host->tasklet, dw_mci_tasklet_func, (unsigned long)host); - tasklet_init(&host->card_tasklet, - dw_mci_tasklet_card, (unsigned long)host); + dw_mci_card_workqueue = alloc_workqueue("dw-mci-card", + WQ_MEM_RECLAIM | WQ_NON_REENTRANT, 1); + if (!dw_mci_card_workqueue) + goto err_dmaunmap; + INIT_WORK(&host->card_work, dw_mci_work_routine_card); ret = request_irq(irq, dw_mci_interrupt, 0, "dw-mci", host); if (ret) - goto err_dmaunmap; + goto err_workqueue; platform_set_drvdata(pdev, host); @@ -1690,7 +1934,9 @@ static int dw_mci_probe(struct platform_device *pdev) mci_writel(host, CTRL, SDMMC_CTRL_INT_ENABLE); /* Enable mci interrupt */ dev_info(&pdev->dev, "DW MMC controller at irq %d, " - "%d bit host data width\n", irq, width); + "%d bit host data width, " + "%u deep fifo\n", + irq, width, fifo_size); if (host->quirks & DW_MCI_QUIRK_IDMAC_DTO) dev_info(&pdev->dev, "Internal DMAC interrupt fix enabled.\n"); @@ -1705,6 +1951,9 @@ err_init_slot: } free_irq(irq, host); +err_workqueue: + destroy_workqueue(dw_mci_card_workqueue); + err_dmaunmap: if (host->use_dma && host->dma_ops->exit) host->dma_ops->exit(host); @@ -1744,6 +1993,7 @@ static int __exit dw_mci_remove(struct platform_device *pdev) mci_writel(host, CLKSRC, 0); free_irq(platform_get_irq(pdev, 0), host); + destroy_workqueue(dw_mci_card_workqueue); dma_free_coherent(&pdev->dev, PAGE_SIZE, host->sg_cpu, host->sg_dma); if (host->use_dma && host->dma_ops->exit) diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h index 23c662af5616..027d37735394 100644 --- a/drivers/mmc/host/dw_mmc.h +++ b/drivers/mmc/host/dw_mmc.h @@ -118,7 +118,6 @@ #define SDMMC_CMD_INDX(n) ((n) & 0x1F) /* Status register defines */ #define SDMMC_GET_FCNT(x) (((x)>>17) & 0x1FF) -#define SDMMC_FIFO_SZ 32 /* Internal DMAC interrupt defines */ #define SDMMC_IDMAC_INT_AI BIT(9) #define SDMMC_IDMAC_INT_NI BIT(8) @@ -134,22 +133,22 @@ /* Register access macros */ #define mci_readl(dev, reg) \ - __raw_readl(dev->regs + SDMMC_##reg) + __raw_readl((dev)->regs + SDMMC_##reg) #define mci_writel(dev, reg, value) \ - __raw_writel((value), dev->regs + SDMMC_##reg) + __raw_writel((value), (dev)->regs + SDMMC_##reg) /* 16-bit FIFO access macros */ #define mci_readw(dev, reg) \ - __raw_readw(dev->regs + SDMMC_##reg) + __raw_readw((dev)->regs + SDMMC_##reg) #define mci_writew(dev, reg, value) \ - __raw_writew((value), dev->regs + SDMMC_##reg) + __raw_writew((value), (dev)->regs + SDMMC_##reg) /* 64-bit FIFO access macros */ #ifdef readq #define mci_readq(dev, reg) \ - __raw_readq(dev->regs + SDMMC_##reg) + __raw_readq((dev)->regs + SDMMC_##reg) #define mci_writeq(dev, reg, value) \ - __raw_writeq((value), dev->regs + SDMMC_##reg) + __raw_writeq((value), (dev)->regs + SDMMC_##reg) #else /* * Dummy readq implementation for architectures that don't define it. @@ -160,9 +159,9 @@ * rest of the code free from ifdefs. */ #define mci_readq(dev, reg) \ - (*(volatile u64 __force *)(dev->regs + SDMMC_##reg)) + (*(volatile u64 __force *)((dev)->regs + SDMMC_##reg)) #define mci_writeq(dev, reg, value) \ - (*(volatile u64 __force *)(dev->regs + SDMMC_##reg) = value) + (*(volatile u64 __force *)((dev)->regs + SDMMC_##reg) = (value)) #endif #endif /* _DW_MMC_H_ */ diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index fe140724a02e..fef7140eb1d0 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -226,6 +226,9 @@ static void __devinit mmci_dma_setup(struct mmci_host *host) return; } + /* initialize pre request cookie */ + host->next_data.cookie = 1; + /* Try to acquire a generic DMA engine slave channel */ dma_cap_zero(mask); dma_cap_set(DMA_SLAVE, mask); @@ -335,7 +338,8 @@ static void mmci_dma_unmap(struct mmci_host *host, struct mmc_data *data) dir = DMA_FROM_DEVICE; } - dma_unmap_sg(chan->device->dev, data->sg, data->sg_len, dir); + if (!data->host_cookie) + dma_unmap_sg(chan->device->dev, data->sg, data->sg_len, dir); /* * Use of DMA with scatter-gather is impossible. @@ -353,7 +357,8 @@ static void mmci_dma_data_error(struct mmci_host *host) dmaengine_terminate_all(host->dma_current); } -static int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl) +static int mmci_dma_prep_data(struct mmci_host *host, struct mmc_data *data, + struct mmci_host_next *next) { struct variant_data *variant = host->variant; struct dma_slave_config conf = { @@ -364,13 +369,20 @@ static int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl) .src_maxburst = variant->fifohalfsize >> 2, /* # of words */ .dst_maxburst = variant->fifohalfsize >> 2, /* # of words */ }; - struct mmc_data *data = host->data; struct dma_chan *chan; struct dma_device *device; struct dma_async_tx_descriptor *desc; int nr_sg; - host->dma_current = NULL; + /* Check if next job is already prepared */ + if (data->host_cookie && !next && + host->dma_current && host->dma_desc_current) + return 0; + + if (!next) { + host->dma_current = NULL; + host->dma_desc_current = NULL; + } if (data->flags & MMC_DATA_READ) { conf.direction = DMA_FROM_DEVICE; @@ -385,7 +397,7 @@ static int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl) return -EINVAL; /* If less than or equal to the fifo size, don't bother with DMA */ - if (host->size <= variant->fifosize) + if (data->blksz * data->blocks <= variant->fifosize) return -EINVAL; device = chan->device; @@ -399,14 +411,38 @@ static int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl) if (!desc) goto unmap_exit; - /* Okay, go for it. */ - host->dma_current = chan; + if (next) { + next->dma_chan = chan; + next->dma_desc = desc; + } else { + host->dma_current = chan; + host->dma_desc_current = desc; + } + + return 0; + unmap_exit: + if (!next) + dmaengine_terminate_all(chan); + dma_unmap_sg(device->dev, data->sg, data->sg_len, conf.direction); + return -ENOMEM; +} + +static int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl) +{ + int ret; + struct mmc_data *data = host->data; + + ret = mmci_dma_prep_data(host, host->data, NULL); + if (ret) + return ret; + + /* Okay, go for it. */ dev_vdbg(mmc_dev(host->mmc), "Submit MMCI DMA job, sglen %d blksz %04x blks %04x flags %08x\n", data->sg_len, data->blksz, data->blocks, data->flags); - dmaengine_submit(desc); - dma_async_issue_pending(chan); + dmaengine_submit(host->dma_desc_current); + dma_async_issue_pending(host->dma_current); datactrl |= MCI_DPSM_DMAENABLE; @@ -421,14 +457,90 @@ static int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl) writel(readl(host->base + MMCIMASK0) | MCI_DATAENDMASK, host->base + MMCIMASK0); return 0; +} -unmap_exit: - dmaengine_terminate_all(chan); - dma_unmap_sg(device->dev, data->sg, data->sg_len, conf.direction); - return -ENOMEM; +static void mmci_get_next_data(struct mmci_host *host, struct mmc_data *data) +{ + struct mmci_host_next *next = &host->next_data; + + if (data->host_cookie && data->host_cookie != next->cookie) { + printk(KERN_WARNING "[%s] invalid cookie: data->host_cookie %d" + " host->next_data.cookie %d\n", + __func__, data->host_cookie, host->next_data.cookie); + data->host_cookie = 0; + } + + if (!data->host_cookie) + return; + + host->dma_desc_current = next->dma_desc; + host->dma_current = next->dma_chan; + + next->dma_desc = NULL; + next->dma_chan = NULL; } + +static void mmci_pre_request(struct mmc_host *mmc, struct mmc_request *mrq, + bool is_first_req) +{ + struct mmci_host *host = mmc_priv(mmc); + struct mmc_data *data = mrq->data; + struct mmci_host_next *nd = &host->next_data; + + if (!data) + return; + + if (data->host_cookie) { + data->host_cookie = 0; + return; + } + + /* if config for dma */ + if (((data->flags & MMC_DATA_WRITE) && host->dma_tx_channel) || + ((data->flags & MMC_DATA_READ) && host->dma_rx_channel)) { + if (mmci_dma_prep_data(host, data, nd)) + data->host_cookie = 0; + else + data->host_cookie = ++nd->cookie < 0 ? 1 : nd->cookie; + } +} + +static void mmci_post_request(struct mmc_host *mmc, struct mmc_request *mrq, + int err) +{ + struct mmci_host *host = mmc_priv(mmc); + struct mmc_data *data = mrq->data; + struct dma_chan *chan; + enum dma_data_direction dir; + + if (!data) + return; + + if (data->flags & MMC_DATA_READ) { + dir = DMA_FROM_DEVICE; + chan = host->dma_rx_channel; + } else { + dir = DMA_TO_DEVICE; + chan = host->dma_tx_channel; + } + + + /* if config for dma */ + if (chan) { + if (err) + dmaengine_terminate_all(chan); + if (err || data->host_cookie) + dma_unmap_sg(mmc_dev(host->mmc), data->sg, + data->sg_len, dir); + mrq->data->host_cookie = 0; + } +} + #else /* Blank functions if the DMA engine is not available */ +static void mmci_get_next_data(struct mmci_host *host, struct mmc_data *data) +{ +} static inline void mmci_dma_setup(struct mmci_host *host) { } @@ -449,6 +561,10 @@ static inline int mmci_dma_start_data(struct mmci_host *host, unsigned int datac { return -ENOSYS; } + +#define mmci_pre_request NULL +#define mmci_post_request NULL + #endif static void mmci_start_data(struct mmci_host *host, struct mmc_data *data) @@ -872,6 +988,9 @@ static void mmci_request(struct mmc_host *mmc, struct mmc_request *mrq) host->mrq = mrq; + if (mrq->data) + mmci_get_next_data(host, mrq->data); + if (mrq->data && mrq->data->flags & MMC_DATA_READ) mmci_start_data(host, mrq->data); @@ -986,6 +1105,8 @@ static irqreturn_t mmci_cd_irq(int irq, void *dev_id) static const struct mmc_host_ops mmci_ops = { .request = mmci_request, + .pre_req = mmci_pre_request, + .post_req = mmci_post_request, .set_ios = mmci_set_ios, .get_ro = mmci_get_ro, .get_cd = mmci_get_cd, diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h index 2164e8c6476c..79e4143ab9df 100644 --- a/drivers/mmc/host/mmci.h +++ b/drivers/mmc/host/mmci.h @@ -166,6 +166,12 @@ struct clk; struct variant_data; struct dma_chan; +struct mmci_host_next { + struct dma_async_tx_descriptor *dma_desc; + struct dma_chan *dma_chan; + s32 cookie; +}; + struct mmci_host { phys_addr_t phybase; void __iomem *base; @@ -203,6 +209,8 @@ struct mmci_host { struct dma_chan *dma_current; struct dma_chan *dma_rx_channel; struct dma_chan *dma_tx_channel; + struct dma_async_tx_descriptor *dma_desc_current; + struct mmci_host_next next_data; #define dma_inprogress(host) ((host)->dma_current) #else diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c index 99d39a6a1032..d513d47364d0 100644 --- a/drivers/mmc/host/mxs-mmc.c +++ b/drivers/mmc/host/mxs-mmc.c @@ -564,40 +564,38 @@ static void mxs_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq) static void mxs_mmc_set_clk_rate(struct mxs_mmc_host *host, unsigned int rate) { - unsigned int ssp_rate, bit_rate; - u32 div1, div2; + unsigned int ssp_clk, ssp_sck; + u32 clock_divide, clock_rate; u32 val; - ssp_rate = clk_get_rate(host->clk); + ssp_clk = clk_get_rate(host->clk); - for (div1 = 2; div1 < 254; div1 += 2) { - div2 = ssp_rate / rate / div1; - if (div2 < 0x100) + for (clock_divide = 2; clock_divide <= 254; clock_divide += 2) { + clock_rate = DIV_ROUND_UP(ssp_clk, rate * clock_divide); + clock_rate = (clock_rate > 0) ? clock_rate - 1 : 0; + if (clock_rate <= 255) break; } - if (div1 >= 254) { + if (clock_divide > 254) { dev_err(mmc_dev(host->mmc), "%s: cannot set clock to %d\n", __func__, rate); return; } - if (div2 == 0) - bit_rate = ssp_rate / div1; - else - bit_rate = ssp_rate / div1 / div2; + ssp_sck = ssp_clk / clock_divide / (1 + clock_rate); val = readl(host->base + HW_SSP_TIMING); val &= ~(BM_SSP_TIMING_CLOCK_DIVIDE | BM_SSP_TIMING_CLOCK_RATE); - val |= BF_SSP(div1, TIMING_CLOCK_DIVIDE); - val |= BF_SSP(div2 - 1, TIMING_CLOCK_RATE); + val |= BF_SSP(clock_divide, TIMING_CLOCK_DIVIDE); + val |= BF_SSP(clock_rate, TIMING_CLOCK_RATE); writel(val, host->base + HW_SSP_TIMING); - host->clk_rate = bit_rate; + host->clk_rate = ssp_sck; dev_dbg(mmc_dev(host->mmc), - "%s: div1 %d, div2 %d, ssp %d, bit %d, rate %d\n", - __func__, div1, div2, ssp_rate, bit_rate, rate); + "%s: clock_divide %d, clock_rate %d, ssp_clk %d, rate_actual %d, rate_requested %d\n", + __func__, clock_divide, clock_rate, ssp_clk, ssp_sck, rate); } static void mxs_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index dedf3dab8a3b..21e4a799df48 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -17,6 +17,7 @@ #include <linux/module.h> #include <linux/init.h> +#include <linux/kernel.h> #include <linux/debugfs.h> #include <linux/seq_file.h> #include <linux/interrupt.h> @@ -33,6 +34,7 @@ #include <linux/semaphore.h> #include <linux/gpio.h> #include <linux/regulator/consumer.h> +#include <linux/pm_runtime.h> #include <plat/dma.h> #include <mach/hardware.h> #include <plat/board.h> @@ -116,15 +118,13 @@ #define OMAP_MMC4_DEVID 3 #define OMAP_MMC5_DEVID 4 +#define MMC_AUTOSUSPEND_DELAY 100 #define MMC_TIMEOUT_MS 20 #define OMAP_MMC_MASTER_CLOCK 96000000 +#define OMAP_MMC_MIN_CLOCK 400000 +#define OMAP_MMC_MAX_CLOCK 52000000 #define DRIVER_NAME "omap_hsmmc" -/* Timeouts for entering power saving states on inactivity, msec */ -#define OMAP_MMC_DISABLED_TIMEOUT 100 -#define OMAP_MMC_SLEEP_TIMEOUT 1000 -#define OMAP_MMC_OFF_TIMEOUT 8000 - /* * One controller can have multiple slots, like on some omap boards using * omap.c controller driver. Luckily this is not currently done on any known @@ -141,6 +141,11 @@ #define OMAP_HSMMC_WRITE(base, reg, val) \ __raw_writel((val), (base) + OMAP_HSMMC_##reg) +struct omap_hsmmc_next { + unsigned int dma_len; + s32 cookie; +}; + struct omap_hsmmc_host { struct device *dev; struct mmc_host *mmc; @@ -148,7 +153,6 @@ struct omap_hsmmc_host { struct mmc_command *cmd; struct mmc_data *data; struct clk *fclk; - struct clk *iclk; struct clk *dbclk; /* * vcc == configured supply @@ -184,6 +188,7 @@ struct omap_hsmmc_host { int reqs_blocked; int use_reg; int req_in_progress; + struct omap_hsmmc_next next_data; struct omap_mmc_platform_data *pdata; }; @@ -548,6 +553,15 @@ static void omap_hsmmc_gpio_free(struct omap_mmc_platform_data *pdata) } /* + * Start clock to the card + */ +static void omap_hsmmc_start_clock(struct omap_hsmmc_host *host) +{ + OMAP_HSMMC_WRITE(host->base, SYSCTL, + OMAP_HSMMC_READ(host->base, SYSCTL) | CEN); +} + +/* * Stop clock to the card */ static void omap_hsmmc_stop_clock(struct omap_hsmmc_host *host) @@ -584,6 +598,81 @@ static void omap_hsmmc_disable_irq(struct omap_hsmmc_host *host) OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR); } +/* Calculate divisor for the given clock frequency */ +static u16 calc_divisor(struct mmc_ios *ios) +{ + u16 dsor = 0; + + if (ios->clock) { + dsor = DIV_ROUND_UP(OMAP_MMC_MASTER_CLOCK, ios->clock); + if (dsor > 250) + dsor = 250; + } + + return dsor; +} + +static void omap_hsmmc_set_clock(struct omap_hsmmc_host *host) +{ + struct mmc_ios *ios = &host->mmc->ios; + unsigned long regval; + unsigned long timeout; + + dev_dbg(mmc_dev(host->mmc), "Set clock to %uHz\n", ios->clock); + + omap_hsmmc_stop_clock(host); + + regval = OMAP_HSMMC_READ(host->base, SYSCTL); + regval = regval & ~(CLKD_MASK | DTO_MASK); + regval = regval | (calc_divisor(ios) << 6) | (DTO << 16); + OMAP_HSMMC_WRITE(host->base, SYSCTL, regval); + OMAP_HSMMC_WRITE(host->base, SYSCTL, + OMAP_HSMMC_READ(host->base, SYSCTL) | ICE); + + /* Wait till the ICS bit is set */ + timeout = jiffies + msecs_to_jiffies(MMC_TIMEOUT_MS); + while ((OMAP_HSMMC_READ(host->base, SYSCTL) & ICS) != ICS + && time_before(jiffies, timeout)) + cpu_relax(); + + omap_hsmmc_start_clock(host); +} + +static void omap_hsmmc_set_bus_width(struct omap_hsmmc_host *host) +{ + struct mmc_ios *ios = &host->mmc->ios; + u32 con; + + con = OMAP_HSMMC_READ(host->base, CON); + switch (ios->bus_width) { + case MMC_BUS_WIDTH_8: + OMAP_HSMMC_WRITE(host->base, CON, con | DW8); + break; + case MMC_BUS_WIDTH_4: + OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8); + OMAP_HSMMC_WRITE(host->base, HCTL, + OMAP_HSMMC_READ(host->base, HCTL) | FOUR_BIT); + break; + case MMC_BUS_WIDTH_1: + OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8); + OMAP_HSMMC_WRITE(host->base, HCTL, + OMAP_HSMMC_READ(host->base, HCTL) & ~FOUR_BIT); + break; + } +} + +static void omap_hsmmc_set_bus_mode(struct omap_hsmmc_host *host) +{ + struct mmc_ios *ios = &host->mmc->ios; + u32 con; + + con = OMAP_HSMMC_READ(host->base, CON); + if (ios->bus_mode == MMC_BUSMODE_OPENDRAIN) + OMAP_HSMMC_WRITE(host->base, CON, con | OD); + else + OMAP_HSMMC_WRITE(host->base, CON, con & ~OD); +} + #ifdef CONFIG_PM /* @@ -595,8 +684,7 @@ static int omap_hsmmc_context_restore(struct omap_hsmmc_host *host) struct mmc_ios *ios = &host->mmc->ios; struct omap_mmc_platform_data *pdata = host->pdata; int context_loss = 0; - u32 hctl, capa, con; - u16 dsor = 0; + u32 hctl, capa; unsigned long timeout; if (pdata->get_context_loss_count) { @@ -658,54 +746,12 @@ static int omap_hsmmc_context_restore(struct omap_hsmmc_host *host) if (host->power_mode == MMC_POWER_OFF) goto out; - con = OMAP_HSMMC_READ(host->base, CON); - switch (ios->bus_width) { - case MMC_BUS_WIDTH_8: - OMAP_HSMMC_WRITE(host->base, CON, con | DW8); - break; - case MMC_BUS_WIDTH_4: - OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8); - OMAP_HSMMC_WRITE(host->base, HCTL, - OMAP_HSMMC_READ(host->base, HCTL) | FOUR_BIT); - break; - case MMC_BUS_WIDTH_1: - OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8); - OMAP_HSMMC_WRITE(host->base, HCTL, - OMAP_HSMMC_READ(host->base, HCTL) & ~FOUR_BIT); - break; - } - - if (ios->clock) { - dsor = OMAP_MMC_MASTER_CLOCK / ios->clock; - if (dsor < 1) - dsor = 1; - - if (OMAP_MMC_MASTER_CLOCK / dsor > ios->clock) - dsor++; - - if (dsor > 250) - dsor = 250; - } - - OMAP_HSMMC_WRITE(host->base, SYSCTL, - OMAP_HSMMC_READ(host->base, SYSCTL) & ~CEN); - OMAP_HSMMC_WRITE(host->base, SYSCTL, (dsor << 6) | (DTO << 16)); - OMAP_HSMMC_WRITE(host->base, SYSCTL, - OMAP_HSMMC_READ(host->base, SYSCTL) | ICE); + omap_hsmmc_set_bus_width(host); - timeout = jiffies + msecs_to_jiffies(MMC_TIMEOUT_MS); - while ((OMAP_HSMMC_READ(host->base, SYSCTL) & ICS) != ICS - && time_before(jiffies, timeout)) - ; + omap_hsmmc_set_clock(host); - OMAP_HSMMC_WRITE(host->base, SYSCTL, - OMAP_HSMMC_READ(host->base, SYSCTL) | CEN); + omap_hsmmc_set_bus_mode(host); - con = OMAP_HSMMC_READ(host->base, CON); - if (ios->bus_mode == MMC_BUSMODE_OPENDRAIN) - OMAP_HSMMC_WRITE(host->base, CON, con | OD); - else - OMAP_HSMMC_WRITE(host->base, CON, con & ~OD); out: host->context_loss = context_loss; @@ -973,14 +1019,14 @@ static void omap_hsmmc_dma_cleanup(struct omap_hsmmc_host *host, int errno) * Readable error output */ #ifdef CONFIG_MMC_DEBUG -static void omap_hsmmc_report_irq(struct omap_hsmmc_host *host, u32 status) +static void omap_hsmmc_dbg_report_irq(struct omap_hsmmc_host *host, u32 status) { /* --- means reserved bit without definition at documentation */ static const char *omap_hsmmc_status_bits[] = { - "CC", "TC", "BGE", "---", "BWR", "BRR", "---", "---", "CIRQ", - "OBI", "---", "---", "---", "---", "---", "ERRI", "CTO", "CCRC", - "CEB", "CIE", "DTO", "DCRC", "DEB", "---", "ACE", "---", - "---", "---", "---", "CERR", "CERR", "BADA", "---", "---", "---" + "CC" , "TC" , "BGE", "---", "BWR" , "BRR" , "---" , "---" , + "CIRQ", "OBI" , "---", "---", "---" , "---" , "---" , "ERRI", + "CTO" , "CCRC", "CEB", "CIE", "DTO" , "DCRC", "DEB" , "---" , + "ACE" , "---" , "---", "---", "CERR", "BADA", "---" , "---" }; char res[256]; char *buf = res; @@ -997,6 +1043,11 @@ static void omap_hsmmc_report_irq(struct omap_hsmmc_host *host, u32 status) dev_dbg(mmc_dev(host->mmc), "%s\n", res); } +#else +static inline void omap_hsmmc_dbg_report_irq(struct omap_hsmmc_host *host, + u32 status) +{ +} #endif /* CONFIG_MMC_DEBUG */ /* @@ -1055,9 +1106,7 @@ static void omap_hsmmc_do_irq(struct omap_hsmmc_host *host, int status) dev_dbg(mmc_dev(host->mmc), "IRQ Status is %x\n", status); if (status & ERR) { -#ifdef CONFIG_MMC_DEBUG - omap_hsmmc_report_irq(host, status); -#endif + omap_hsmmc_dbg_report_irq(host, status); if ((status & CMD_TIMEOUT) || (status & CMD_CRC)) { if (host->cmd) { @@ -1155,8 +1204,7 @@ static int omap_hsmmc_switch_opcond(struct omap_hsmmc_host *host, int vdd) int ret; /* Disable the clocks */ - clk_disable(host->fclk); - clk_disable(host->iclk); + pm_runtime_put_sync(host->dev); if (host->got_dbclk) clk_disable(host->dbclk); @@ -1167,8 +1215,7 @@ static int omap_hsmmc_switch_opcond(struct omap_hsmmc_host *host, int vdd) if (!ret) ret = mmc_slot(host).set_power(host->dev, host->slot_id, 1, vdd); - clk_enable(host->iclk); - clk_enable(host->fclk); + pm_runtime_get_sync(host->dev); if (host->got_dbclk) clk_enable(host->dbclk); @@ -1322,7 +1369,7 @@ static void omap_hsmmc_config_dma_params(struct omap_hsmmc_host *host, static void omap_hsmmc_dma_cb(int lch, u16 ch_status, void *cb_data) { struct omap_hsmmc_host *host = cb_data; - struct mmc_data *data = host->mrq->data; + struct mmc_data *data; int dma_ch, req_in_progress; if (!(ch_status & OMAP_DMA_BLOCK_IRQ)) { @@ -1337,6 +1384,7 @@ static void omap_hsmmc_dma_cb(int lch, u16 ch_status, void *cb_data) return; } + data = host->mrq->data; host->dma_sg_idx++; if (host->dma_sg_idx < host->dma_len) { /* Fire up the next transfer. */ @@ -1346,8 +1394,9 @@ static void omap_hsmmc_dma_cb(int lch, u16 ch_status, void *cb_data) return; } - dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, - omap_hsmmc_get_dma_dir(host, data)); + if (!data->host_cookie) + dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, + omap_hsmmc_get_dma_dir(host, data)); req_in_progress = host->req_in_progress; dma_ch = host->dma_ch; @@ -1365,6 +1414,45 @@ static void omap_hsmmc_dma_cb(int lch, u16 ch_status, void *cb_data) } } +static int omap_hsmmc_pre_dma_transfer(struct omap_hsmmc_host *host, + struct mmc_data *data, + struct omap_hsmmc_next *next) +{ + int dma_len; + + if (!next && data->host_cookie && + data->host_cookie != host->next_data.cookie) { + printk(KERN_WARNING "[%s] invalid cookie: data->host_cookie %d" + " host->next_data.cookie %d\n", + __func__, data->host_cookie, host->next_data.cookie); + data->host_cookie = 0; + } + + /* Check if next job is already prepared */ + if (next || + (!next && data->host_cookie != host->next_data.cookie)) { + dma_len = dma_map_sg(mmc_dev(host->mmc), data->sg, + data->sg_len, + omap_hsmmc_get_dma_dir(host, data)); + + } else { + dma_len = host->next_data.dma_len; + host->next_data.dma_len = 0; + } + + + if (dma_len == 0) + return -EINVAL; + + if (next) { + next->dma_len = dma_len; + data->host_cookie = ++next->cookie < 0 ? 1 : next->cookie; + } else + host->dma_len = dma_len; + + return 0; +} + /* * Routine to configure and start DMA for the MMC card */ @@ -1398,9 +1486,10 @@ static int omap_hsmmc_start_dma_transfer(struct omap_hsmmc_host *host, mmc_hostname(host->mmc), ret); return ret; } + ret = omap_hsmmc_pre_dma_transfer(host, data, NULL); + if (ret) + return ret; - host->dma_len = dma_map_sg(mmc_dev(host->mmc), data->sg, - data->sg_len, omap_hsmmc_get_dma_dir(host, data)); host->dma_ch = dma_ch; host->dma_sg_idx = 0; @@ -1480,6 +1569,35 @@ omap_hsmmc_prepare_data(struct omap_hsmmc_host *host, struct mmc_request *req) return 0; } +static void omap_hsmmc_post_req(struct mmc_host *mmc, struct mmc_request *mrq, + int err) +{ + struct omap_hsmmc_host *host = mmc_priv(mmc); + struct mmc_data *data = mrq->data; + + if (host->use_dma) { + dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, + omap_hsmmc_get_dma_dir(host, data)); + data->host_cookie = 0; + } +} + +static void omap_hsmmc_pre_req(struct mmc_host *mmc, struct mmc_request *mrq, + bool is_first_req) +{ + struct omap_hsmmc_host *host = mmc_priv(mmc); + + if (mrq->data->host_cookie) { + mrq->data->host_cookie = 0; + return ; + } + + if (host->use_dma) + if (omap_hsmmc_pre_dma_transfer(host, mrq->data, + &host->next_data)) + mrq->data->host_cookie = 0; +} + /* * Request function. for read/write operation */ @@ -1528,13 +1646,9 @@ static void omap_hsmmc_request(struct mmc_host *mmc, struct mmc_request *req) static void omap_hsmmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) { struct omap_hsmmc_host *host = mmc_priv(mmc); - u16 dsor = 0; - unsigned long regval; - unsigned long timeout; - u32 con; int do_send_init_stream = 0; - mmc_host_enable(host->mmc); + pm_runtime_get_sync(host->dev); if (ios->power_mode != host->power_mode) { switch (ios->power_mode) { @@ -1557,22 +1671,7 @@ static void omap_hsmmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) /* FIXME: set registers based only on changes to ios */ - con = OMAP_HSMMC_READ(host->base, CON); - switch (mmc->ios.bus_width) { - case MMC_BUS_WIDTH_8: - OMAP_HSMMC_WRITE(host->base, CON, con | DW8); - break; - case MMC_BUS_WIDTH_4: - OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8); - OMAP_HSMMC_WRITE(host->base, HCTL, - OMAP_HSMMC_READ(host->base, HCTL) | FOUR_BIT); - break; - case MMC_BUS_WIDTH_1: - OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8); - OMAP_HSMMC_WRITE(host->base, HCTL, - OMAP_HSMMC_READ(host->base, HCTL) & ~FOUR_BIT); - break; - } + omap_hsmmc_set_bus_width(host); if (host->pdata->controller_flags & OMAP_HSMMC_SUPPORTS_DUAL_VOLT) { /* Only MMC1 can interface at 3V without some flavor @@ -1592,47 +1691,14 @@ static void omap_hsmmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) } } - if (ios->clock) { - dsor = OMAP_MMC_MASTER_CLOCK / ios->clock; - if (dsor < 1) - dsor = 1; - - if (OMAP_MMC_MASTER_CLOCK / dsor > ios->clock) - dsor++; - - if (dsor > 250) - dsor = 250; - } - omap_hsmmc_stop_clock(host); - regval = OMAP_HSMMC_READ(host->base, SYSCTL); - regval = regval & ~(CLKD_MASK); - regval = regval | (dsor << 6) | (DTO << 16); - OMAP_HSMMC_WRITE(host->base, SYSCTL, regval); - OMAP_HSMMC_WRITE(host->base, SYSCTL, - OMAP_HSMMC_READ(host->base, SYSCTL) | ICE); - - /* Wait till the ICS bit is set */ - timeout = jiffies + msecs_to_jiffies(MMC_TIMEOUT_MS); - while ((OMAP_HSMMC_READ(host->base, SYSCTL) & ICS) != ICS - && time_before(jiffies, timeout)) - msleep(1); - - OMAP_HSMMC_WRITE(host->base, SYSCTL, - OMAP_HSMMC_READ(host->base, SYSCTL) | CEN); + omap_hsmmc_set_clock(host); if (do_send_init_stream) send_init_stream(host); - con = OMAP_HSMMC_READ(host->base, CON); - if (ios->bus_mode == MMC_BUSMODE_OPENDRAIN) - OMAP_HSMMC_WRITE(host->base, CON, con | OD); - else - OMAP_HSMMC_WRITE(host->base, CON, con & ~OD); + omap_hsmmc_set_bus_mode(host); - if (host->power_mode == MMC_POWER_OFF) - mmc_host_disable(host->mmc); - else - mmc_host_lazy_disable(host->mmc); + pm_runtime_put_autosuspend(host->dev); } static int omap_hsmmc_get_cd(struct mmc_host *mmc) @@ -1688,230 +1754,12 @@ static void omap_hsmmc_conf_bus_power(struct omap_hsmmc_host *host) set_sd_bus_power(host); } -/* - * Dynamic power saving handling, FSM: - * ENABLED -> DISABLED -> CARDSLEEP / REGSLEEP -> OFF - * ^___________| | | - * |______________________|______________________| - * - * ENABLED: mmc host is fully functional - * DISABLED: fclk is off - * CARDSLEEP: fclk is off, card is asleep, voltage regulator is asleep - * REGSLEEP: fclk is off, voltage regulator is asleep - * OFF: fclk is off, voltage regulator is off - * - * Transition handlers return the timeout for the next state transition - * or negative error. - */ - -enum {ENABLED = 0, DISABLED, CARDSLEEP, REGSLEEP, OFF}; - -/* Handler for [ENABLED -> DISABLED] transition */ -static int omap_hsmmc_enabled_to_disabled(struct omap_hsmmc_host *host) -{ - omap_hsmmc_context_save(host); - clk_disable(host->fclk); - host->dpm_state = DISABLED; - - dev_dbg(mmc_dev(host->mmc), "ENABLED -> DISABLED\n"); - - if (host->power_mode == MMC_POWER_OFF) - return 0; - - return OMAP_MMC_SLEEP_TIMEOUT; -} - -/* Handler for [DISABLED -> REGSLEEP / CARDSLEEP] transition */ -static int omap_hsmmc_disabled_to_sleep(struct omap_hsmmc_host *host) -{ - int err, new_state; - - if (!mmc_try_claim_host(host->mmc)) - return 0; - - clk_enable(host->fclk); - omap_hsmmc_context_restore(host); - if (mmc_card_can_sleep(host->mmc)) { - err = mmc_card_sleep(host->mmc); - if (err < 0) { - clk_disable(host->fclk); - mmc_release_host(host->mmc); - return err; - } - new_state = CARDSLEEP; - } else { - new_state = REGSLEEP; - } - if (mmc_slot(host).set_sleep) - mmc_slot(host).set_sleep(host->dev, host->slot_id, 1, 0, - new_state == CARDSLEEP); - /* FIXME: turn off bus power and perhaps interrupts too */ - clk_disable(host->fclk); - host->dpm_state = new_state; - - mmc_release_host(host->mmc); - - dev_dbg(mmc_dev(host->mmc), "DISABLED -> %s\n", - host->dpm_state == CARDSLEEP ? "CARDSLEEP" : "REGSLEEP"); - - if (mmc_slot(host).no_off) - return 0; - - if ((host->mmc->caps & MMC_CAP_NONREMOVABLE) || - mmc_slot(host).card_detect || - (mmc_slot(host).get_cover_state && - mmc_slot(host).get_cover_state(host->dev, host->slot_id))) - return OMAP_MMC_OFF_TIMEOUT; - - return 0; -} - -/* Handler for [REGSLEEP / CARDSLEEP -> OFF] transition */ -static int omap_hsmmc_sleep_to_off(struct omap_hsmmc_host *host) -{ - if (!mmc_try_claim_host(host->mmc)) - return 0; - - if (mmc_slot(host).no_off) - return 0; - - if (!((host->mmc->caps & MMC_CAP_NONREMOVABLE) || - mmc_slot(host).card_detect || - (mmc_slot(host).get_cover_state && - mmc_slot(host).get_cover_state(host->dev, host->slot_id)))) { - mmc_release_host(host->mmc); - return 0; - } - - mmc_slot(host).set_power(host->dev, host->slot_id, 0, 0); - host->vdd = 0; - host->power_mode = MMC_POWER_OFF; - - dev_dbg(mmc_dev(host->mmc), "%s -> OFF\n", - host->dpm_state == CARDSLEEP ? "CARDSLEEP" : "REGSLEEP"); - - host->dpm_state = OFF; - - mmc_release_host(host->mmc); - - return 0; -} - -/* Handler for [DISABLED -> ENABLED] transition */ -static int omap_hsmmc_disabled_to_enabled(struct omap_hsmmc_host *host) -{ - int err; - - err = clk_enable(host->fclk); - if (err < 0) - return err; - - omap_hsmmc_context_restore(host); - host->dpm_state = ENABLED; - - dev_dbg(mmc_dev(host->mmc), "DISABLED -> ENABLED\n"); - - return 0; -} - -/* Handler for [SLEEP -> ENABLED] transition */ -static int omap_hsmmc_sleep_to_enabled(struct omap_hsmmc_host *host) -{ - if (!mmc_try_claim_host(host->mmc)) - return 0; - - clk_enable(host->fclk); - omap_hsmmc_context_restore(host); - if (mmc_slot(host).set_sleep) - mmc_slot(host).set_sleep(host->dev, host->slot_id, 0, - host->vdd, host->dpm_state == CARDSLEEP); - if (mmc_card_can_sleep(host->mmc)) - mmc_card_awake(host->mmc); - - dev_dbg(mmc_dev(host->mmc), "%s -> ENABLED\n", - host->dpm_state == CARDSLEEP ? "CARDSLEEP" : "REGSLEEP"); - - host->dpm_state = ENABLED; - - mmc_release_host(host->mmc); - - return 0; -} - -/* Handler for [OFF -> ENABLED] transition */ -static int omap_hsmmc_off_to_enabled(struct omap_hsmmc_host *host) -{ - clk_enable(host->fclk); - - omap_hsmmc_context_restore(host); - omap_hsmmc_conf_bus_power(host); - mmc_power_restore_host(host->mmc); - - host->dpm_state = ENABLED; - - dev_dbg(mmc_dev(host->mmc), "OFF -> ENABLED\n"); - - return 0; -} - -/* - * Bring MMC host to ENABLED from any other PM state. - */ -static int omap_hsmmc_enable(struct mmc_host *mmc) -{ - struct omap_hsmmc_host *host = mmc_priv(mmc); - - switch (host->dpm_state) { - case DISABLED: - return omap_hsmmc_disabled_to_enabled(host); - case CARDSLEEP: - case REGSLEEP: - return omap_hsmmc_sleep_to_enabled(host); - case OFF: - return omap_hsmmc_off_to_enabled(host); - default: - dev_dbg(mmc_dev(host->mmc), "UNKNOWN state\n"); - return -EINVAL; - } -} - -/* - * Bring MMC host in PM state (one level deeper). - */ -static int omap_hsmmc_disable(struct mmc_host *mmc, int lazy) -{ - struct omap_hsmmc_host *host = mmc_priv(mmc); - - switch (host->dpm_state) { - case ENABLED: { - int delay; - - delay = omap_hsmmc_enabled_to_disabled(host); - if (lazy || delay < 0) - return delay; - return 0; - } - case DISABLED: - return omap_hsmmc_disabled_to_sleep(host); - case CARDSLEEP: - case REGSLEEP: - return omap_hsmmc_sleep_to_off(host); - default: - dev_dbg(mmc_dev(host->mmc), "UNKNOWN state\n"); - return -EINVAL; - } -} - static int omap_hsmmc_enable_fclk(struct mmc_host *mmc) { struct omap_hsmmc_host *host = mmc_priv(mmc); - int err; - err = clk_enable(host->fclk); - if (err) - return err; - dev_dbg(mmc_dev(host->mmc), "mmc_fclk: enabled\n"); - omap_hsmmc_context_restore(host); + pm_runtime_get_sync(host->dev); + return 0; } @@ -1919,26 +1767,17 @@ static int omap_hsmmc_disable_fclk(struct mmc_host *mmc, int lazy) { struct omap_hsmmc_host *host = mmc_priv(mmc); - omap_hsmmc_context_save(host); - clk_disable(host->fclk); - dev_dbg(mmc_dev(host->mmc), "mmc_fclk: disabled\n"); + pm_runtime_mark_last_busy(host->dev); + pm_runtime_put_autosuspend(host->dev); + return 0; } static const struct mmc_host_ops omap_hsmmc_ops = { .enable = omap_hsmmc_enable_fclk, .disable = omap_hsmmc_disable_fclk, - .request = omap_hsmmc_request, - .set_ios = omap_hsmmc_set_ios, - .get_cd = omap_hsmmc_get_cd, - .get_ro = omap_hsmmc_get_ro, - .init_card = omap_hsmmc_init_card, - /* NYET -- enable_sdio_irq */ -}; - -static const struct mmc_host_ops omap_hsmmc_ps_ops = { - .enable = omap_hsmmc_enable, - .disable = omap_hsmmc_disable, + .post_req = omap_hsmmc_post_req, + .pre_req = omap_hsmmc_pre_req, .request = omap_hsmmc_request, .set_ios = omap_hsmmc_set_ios, .get_cd = omap_hsmmc_get_cd, @@ -1968,15 +1807,12 @@ static int omap_hsmmc_regs_show(struct seq_file *s, void *data) host->dpm_state, mmc->nesting_cnt, host->context_loss, context_loss); - if (host->suspended || host->dpm_state == OFF) { + if (host->suspended) { seq_printf(s, "host suspended, can't read registers\n"); return 0; } - if (clk_enable(host->fclk) != 0) { - seq_printf(s, "can't read the regs\n"); - return 0; - } + pm_runtime_get_sync(host->dev); seq_printf(s, "SYSCONFIG:\t0x%08x\n", OMAP_HSMMC_READ(host->base, SYSCONFIG)); @@ -1993,7 +1829,8 @@ static int omap_hsmmc_regs_show(struct seq_file *s, void *data) seq_printf(s, "CAPA:\t\t0x%08x\n", OMAP_HSMMC_READ(host->base, CAPA)); - clk_disable(host->fclk); + pm_runtime_mark_last_busy(host->dev); + pm_runtime_put_autosuspend(host->dev); return 0; } @@ -2077,14 +1914,12 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev) host->mapbase = res->start; host->base = ioremap(host->mapbase, SZ_4K); host->power_mode = MMC_POWER_OFF; + host->next_data.cookie = 1; platform_set_drvdata(pdev, host); INIT_WORK(&host->mmc_carddetect_work, omap_hsmmc_detect); - if (mmc_slot(host).power_saving) - mmc->ops = &omap_hsmmc_ps_ops; - else - mmc->ops = &omap_hsmmc_ops; + mmc->ops = &omap_hsmmc_ops; /* * If regulator_disable can only put vcc_aux to sleep then there is @@ -2093,44 +1928,26 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev) if (mmc_slot(host).vcc_aux_disable_is_sleep) mmc_slot(host).no_off = 1; - mmc->f_min = 400000; - mmc->f_max = 52000000; + mmc->f_min = OMAP_MMC_MIN_CLOCK; + mmc->f_max = OMAP_MMC_MAX_CLOCK; spin_lock_init(&host->irq_lock); - host->iclk = clk_get(&pdev->dev, "ick"); - if (IS_ERR(host->iclk)) { - ret = PTR_ERR(host->iclk); - host->iclk = NULL; - goto err1; - } host->fclk = clk_get(&pdev->dev, "fck"); if (IS_ERR(host->fclk)) { ret = PTR_ERR(host->fclk); host->fclk = NULL; - clk_put(host->iclk); goto err1; } omap_hsmmc_context_save(host); mmc->caps |= MMC_CAP_DISABLE; - mmc_set_disable_delay(mmc, OMAP_MMC_DISABLED_TIMEOUT); - /* we start off in DISABLED state */ - host->dpm_state = DISABLED; - if (clk_enable(host->iclk) != 0) { - clk_put(host->iclk); - clk_put(host->fclk); - goto err1; - } - - if (mmc_host_enable(host->mmc) != 0) { - clk_disable(host->iclk); - clk_put(host->iclk); - clk_put(host->fclk); - goto err1; - } + pm_runtime_enable(host->dev); + pm_runtime_get_sync(host->dev); + pm_runtime_set_autosuspend_delay(host->dev, MMC_AUTOSUSPEND_DELAY); + pm_runtime_use_autosuspend(host->dev); if (cpu_is_omap2430()) { host->dbclk = clk_get(&pdev->dev, "mmchsdb_fck"); @@ -2240,8 +2057,6 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev) omap_hsmmc_disable_irq(host); - mmc_host_lazy_disable(host->mmc); - omap_hsmmc_protect_card(host); mmc_add_host(mmc); @@ -2259,6 +2074,8 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev) } omap_hsmmc_debugfs(mmc); + pm_runtime_mark_last_busy(host->dev); + pm_runtime_put_autosuspend(host->dev); return 0; @@ -2274,10 +2091,9 @@ err_reg: err_irq_cd_init: free_irq(host->irq, host); err_irq: - mmc_host_disable(host->mmc); - clk_disable(host->iclk); + pm_runtime_mark_last_busy(host->dev); + pm_runtime_put_autosuspend(host->dev); clk_put(host->fclk); - clk_put(host->iclk); if (host->got_dbclk) { clk_disable(host->dbclk); clk_put(host->dbclk); @@ -2299,7 +2115,7 @@ static int omap_hsmmc_remove(struct platform_device *pdev) struct resource *res; if (host) { - mmc_host_enable(host->mmc); + pm_runtime_get_sync(host->dev); mmc_remove_host(host->mmc); if (host->use_reg) omap_hsmmc_reg_put(host); @@ -2310,10 +2126,9 @@ static int omap_hsmmc_remove(struct platform_device *pdev) free_irq(mmc_slot(host).card_detect_irq, host); flush_work_sync(&host->mmc_carddetect_work); - mmc_host_disable(host->mmc); - clk_disable(host->iclk); + pm_runtime_put_sync(host->dev); + pm_runtime_disable(host->dev); clk_put(host->fclk); - clk_put(host->iclk); if (host->got_dbclk) { clk_disable(host->dbclk); clk_put(host->dbclk); @@ -2343,6 +2158,7 @@ static int omap_hsmmc_suspend(struct device *dev) return 0; if (host) { + pm_runtime_get_sync(host->dev); host->suspended = 1; if (host->pdata->suspend) { ret = host->pdata->suspend(&pdev->dev, @@ -2357,13 +2173,11 @@ static int omap_hsmmc_suspend(struct device *dev) } cancel_work_sync(&host->mmc_carddetect_work); ret = mmc_suspend_host(host->mmc); - mmc_host_enable(host->mmc); + if (ret == 0) { omap_hsmmc_disable_irq(host); OMAP_HSMMC_WRITE(host->base, HCTL, OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP); - mmc_host_disable(host->mmc); - clk_disable(host->iclk); if (host->got_dbclk) clk_disable(host->dbclk); } else { @@ -2375,9 +2189,8 @@ static int omap_hsmmc_suspend(struct device *dev) dev_dbg(mmc_dev(host->mmc), "Unmask interrupt failed\n"); } - mmc_host_disable(host->mmc); } - + pm_runtime_put_sync(host->dev); } return ret; } @@ -2393,14 +2206,7 @@ static int omap_hsmmc_resume(struct device *dev) return 0; if (host) { - ret = clk_enable(host->iclk); - if (ret) - goto clk_en_err; - - if (mmc_host_enable(host->mmc) != 0) { - clk_disable(host->iclk); - goto clk_en_err; - } + pm_runtime_get_sync(host->dev); if (host->got_dbclk) clk_enable(host->dbclk); @@ -2421,15 +2227,12 @@ static int omap_hsmmc_resume(struct device *dev) if (ret == 0) host->suspended = 0; - mmc_host_lazy_disable(host->mmc); + pm_runtime_mark_last_busy(host->dev); + pm_runtime_put_autosuspend(host->dev); } return ret; -clk_en_err: - dev_dbg(mmc_dev(host->mmc), - "Failed to enable MMC clocks during resume\n"); - return ret; } #else @@ -2437,9 +2240,33 @@ clk_en_err: #define omap_hsmmc_resume NULL #endif +static int omap_hsmmc_runtime_suspend(struct device *dev) +{ + struct omap_hsmmc_host *host; + + host = platform_get_drvdata(to_platform_device(dev)); + omap_hsmmc_context_save(host); + dev_dbg(mmc_dev(host->mmc), "disabled\n"); + + return 0; +} + +static int omap_hsmmc_runtime_resume(struct device *dev) +{ + struct omap_hsmmc_host *host; + + host = platform_get_drvdata(to_platform_device(dev)); + omap_hsmmc_context_restore(host); + dev_dbg(mmc_dev(host->mmc), "enabled\n"); + + return 0; +} + static struct dev_pm_ops omap_hsmmc_dev_pm_ops = { .suspend = omap_hsmmc_suspend, .resume = omap_hsmmc_resume, + .runtime_suspend = omap_hsmmc_runtime_suspend, + .runtime_resume = omap_hsmmc_runtime_resume, }; static struct platform_driver omap_hsmmc_driver = { diff --git a/drivers/mmc/host/sdhci-cns3xxx.c b/drivers/mmc/host/sdhci-cns3xxx.c index 9ebd1d7759dc..4b920b7621cf 100644 --- a/drivers/mmc/host/sdhci-cns3xxx.c +++ b/drivers/mmc/host/sdhci-cns3xxx.c @@ -15,9 +15,7 @@ #include <linux/delay.h> #include <linux/device.h> #include <linux/mmc/host.h> -#include <linux/mmc/sdhci-pltfm.h> #include <mach/cns3xxx.h> -#include "sdhci.h" #include "sdhci-pltfm.h" static unsigned int sdhci_cns3xxx_get_max_clk(struct sdhci_host *host) @@ -86,7 +84,7 @@ static struct sdhci_ops sdhci_cns3xxx_ops = { .set_clock = sdhci_cns3xxx_set_clock, }; -struct sdhci_pltfm_data sdhci_cns3xxx_pdata = { +static struct sdhci_pltfm_data sdhci_cns3xxx_pdata = { .ops = &sdhci_cns3xxx_ops, .quirks = SDHCI_QUIRK_BROKEN_DMA | SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK | @@ -95,3 +93,43 @@ struct sdhci_pltfm_data sdhci_cns3xxx_pdata = { SDHCI_QUIRK_BROKEN_TIMEOUT_VAL | SDHCI_QUIRK_NONSTANDARD_CLOCK, }; + +static int __devinit sdhci_cns3xxx_probe(struct platform_device *pdev) +{ + return sdhci_pltfm_register(pdev, &sdhci_cns3xxx_pdata); +} + +static int __devexit sdhci_cns3xxx_remove(struct platform_device *pdev) +{ + return sdhci_pltfm_unregister(pdev); +} + +static struct platform_driver sdhci_cns3xxx_driver = { + .driver = { + .name = "sdhci-cns3xxx", + .owner = THIS_MODULE, + }, + .probe = sdhci_cns3xxx_probe, + .remove = __devexit_p(sdhci_cns3xxx_remove), +#ifdef CONFIG_PM + .suspend = sdhci_pltfm_suspend, + .resume = sdhci_pltfm_resume, +#endif +}; + +static int __init sdhci_cns3xxx_init(void) +{ + return platform_driver_register(&sdhci_cns3xxx_driver); +} +module_init(sdhci_cns3xxx_init); + +static void __exit sdhci_cns3xxx_exit(void) +{ + platform_driver_unregister(&sdhci_cns3xxx_driver); +} +module_exit(sdhci_cns3xxx_exit); + +MODULE_DESCRIPTION("SDHCI driver for CNS3xxx"); +MODULE_AUTHOR("Scott Shu, " + "Anton Vorontsov <avorontsov@mvista.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/mmc/host/sdhci-dove.c b/drivers/mmc/host/sdhci-dove.c index 2aeef4ffed8c..f2d29dca4420 100644 --- a/drivers/mmc/host/sdhci-dove.c +++ b/drivers/mmc/host/sdhci-dove.c @@ -22,7 +22,6 @@ #include <linux/io.h> #include <linux/mmc/host.h> -#include "sdhci.h" #include "sdhci-pltfm.h" static u16 sdhci_dove_readw(struct sdhci_host *host, int reg) @@ -61,10 +60,50 @@ static struct sdhci_ops sdhci_dove_ops = { .read_l = sdhci_dove_readl, }; -struct sdhci_pltfm_data sdhci_dove_pdata = { +static struct sdhci_pltfm_data sdhci_dove_pdata = { .ops = &sdhci_dove_ops, .quirks = SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER | SDHCI_QUIRK_NO_BUSY_IRQ | SDHCI_QUIRK_BROKEN_TIMEOUT_VAL | SDHCI_QUIRK_FORCE_DMA, }; + +static int __devinit sdhci_dove_probe(struct platform_device *pdev) +{ + return sdhci_pltfm_register(pdev, &sdhci_dove_pdata); +} + +static int __devexit sdhci_dove_remove(struct platform_device *pdev) +{ + return sdhci_pltfm_unregister(pdev); +} + +static struct platform_driver sdhci_dove_driver = { + .driver = { + .name = "sdhci-dove", + .owner = THIS_MODULE, + }, + .probe = sdhci_dove_probe, + .remove = __devexit_p(sdhci_dove_remove), +#ifdef CONFIG_PM + .suspend = sdhci_pltfm_suspend, + .resume = sdhci_pltfm_resume, +#endif +}; + +static int __init sdhci_dove_init(void) +{ + return platform_driver_register(&sdhci_dove_driver); +} +module_init(sdhci_dove_init); + +static void __exit sdhci_dove_exit(void) +{ + platform_driver_unregister(&sdhci_dove_driver); +} +module_exit(sdhci_dove_exit); + +MODULE_DESCRIPTION("SDHCI driver for Dove"); +MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>, " + "Mike Rapoport <mike@compulab.co.il>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index a19967d0bfc4..710b706f4fcf 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -18,12 +18,10 @@ #include <linux/gpio.h> #include <linux/slab.h> #include <linux/mmc/host.h> -#include <linux/mmc/sdhci-pltfm.h> #include <linux/mmc/mmc.h> #include <linux/mmc/sdio.h> #include <mach/hardware.h> #include <mach/esdhc.h> -#include "sdhci.h" #include "sdhci-pltfm.h" #include "sdhci-esdhc.h" @@ -31,7 +29,7 @@ #define SDHCI_VENDOR_SPEC 0xC0 #define SDHCI_VENDOR_SPEC_SDIO_QUIRK 0x00000002 -#define ESDHC_FLAG_GPIO_FOR_CD_WP (1 << 0) +#define ESDHC_FLAG_GPIO_FOR_CD (1 << 0) /* * The CMDTYPE of the CMD register (offset 0xE) should be set to * "11" when the STOP CMD12 is issued on imx53 to abort one @@ -67,14 +65,14 @@ static u32 esdhc_readl_le(struct sdhci_host *host, int reg) u32 val = readl(host->ioaddr + reg); if (unlikely((reg == SDHCI_PRESENT_STATE) - && (imx_data->flags & ESDHC_FLAG_GPIO_FOR_CD_WP))) { + && (imx_data->flags & ESDHC_FLAG_GPIO_FOR_CD))) { struct esdhc_platform_data *boarddata = host->mmc->parent->platform_data; if (boarddata && gpio_is_valid(boarddata->cd_gpio) && gpio_get_value(boarddata->cd_gpio)) /* no card, if a valid gpio says so... */ - val &= SDHCI_CARD_PRESENT; + val &= ~SDHCI_CARD_PRESENT; else /* ... in all other cases assume card is present */ val |= SDHCI_CARD_PRESENT; @@ -89,7 +87,7 @@ static void esdhc_writel_le(struct sdhci_host *host, u32 val, int reg) struct pltfm_imx_data *imx_data = pltfm_host->priv; if (unlikely((reg == SDHCI_INT_ENABLE || reg == SDHCI_SIGNAL_ENABLE) - && (imx_data->flags & ESDHC_FLAG_GPIO_FOR_CD_WP))) + && (imx_data->flags & ESDHC_FLAG_GPIO_FOR_CD))) /* * these interrupts won't work with a custom card_detect gpio * (only applied to mx25/35) @@ -191,16 +189,6 @@ static unsigned int esdhc_pltfm_get_min_clock(struct sdhci_host *host) return clk_get_rate(pltfm_host->clk) / 256 / 16; } -static unsigned int esdhc_pltfm_get_ro(struct sdhci_host *host) -{ - struct esdhc_platform_data *boarddata = host->mmc->parent->platform_data; - - if (boarddata && gpio_is_valid(boarddata->wp_gpio)) - return gpio_get_value(boarddata->wp_gpio); - else - return -ENOSYS; -} - static struct sdhci_ops sdhci_esdhc_ops = { .read_l = esdhc_readl_le, .read_w = esdhc_readw_le, @@ -212,6 +200,24 @@ static struct sdhci_ops sdhci_esdhc_ops = { .get_min_clock = esdhc_pltfm_get_min_clock, }; +static struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = { + .quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_BROKEN_ADMA + | SDHCI_QUIRK_BROKEN_CARD_DETECTION, + /* ADMA has issues. Might be fixable */ + .ops = &sdhci_esdhc_ops, +}; + +static unsigned int esdhc_pltfm_get_ro(struct sdhci_host *host) +{ + struct esdhc_platform_data *boarddata = + host->mmc->parent->platform_data; + + if (boarddata && gpio_is_valid(boarddata->wp_gpio)) + return gpio_get_value(boarddata->wp_gpio); + else + return -ENOSYS; +} + static irqreturn_t cd_irq(int irq, void *data) { struct sdhci_host *sdhost = (struct sdhci_host *)data; @@ -220,30 +226,35 @@ static irqreturn_t cd_irq(int irq, void *data) return IRQ_HANDLED; }; -static int esdhc_pltfm_init(struct sdhci_host *host, struct sdhci_pltfm_data *pdata) +static int __devinit sdhci_esdhc_imx_probe(struct platform_device *pdev) { - struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - struct esdhc_platform_data *boarddata = host->mmc->parent->platform_data; + struct sdhci_pltfm_host *pltfm_host; + struct sdhci_host *host; + struct esdhc_platform_data *boarddata; struct clk *clk; int err; struct pltfm_imx_data *imx_data; + host = sdhci_pltfm_init(pdev, &sdhci_esdhc_imx_pdata); + if (IS_ERR(host)) + return PTR_ERR(host); + + pltfm_host = sdhci_priv(host); + + imx_data = kzalloc(sizeof(struct pltfm_imx_data), GFP_KERNEL); + if (!imx_data) + return -ENOMEM; + pltfm_host->priv = imx_data; + clk = clk_get(mmc_dev(host->mmc), NULL); if (IS_ERR(clk)) { dev_err(mmc_dev(host->mmc), "clk err\n"); - return PTR_ERR(clk); + err = PTR_ERR(clk); + goto err_clk_get; } clk_enable(clk); pltfm_host->clk = clk; - imx_data = kzalloc(sizeof(struct pltfm_imx_data), GFP_KERNEL); - if (!imx_data) { - clk_disable(pltfm_host->clk); - clk_put(pltfm_host->clk); - return -ENOMEM; - } - pltfm_host->priv = imx_data; - if (!cpu_is_mx25()) host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL; @@ -257,6 +268,7 @@ static int esdhc_pltfm_init(struct sdhci_host *host, struct sdhci_pltfm_data *pd if (!(cpu_is_mx25() || cpu_is_mx35() || cpu_is_mx51())) imx_data->flags |= ESDHC_FLAG_MULTIBLK_NO_INT; + boarddata = host->mmc->parent->platform_data; if (boarddata) { err = gpio_request_one(boarddata->wp_gpio, GPIOF_IN, "ESDHC_WP"); if (err) { @@ -284,11 +296,15 @@ static int esdhc_pltfm_init(struct sdhci_host *host, struct sdhci_pltfm_data *pd goto no_card_detect_irq; } - imx_data->flags |= ESDHC_FLAG_GPIO_FOR_CD_WP; + imx_data->flags |= ESDHC_FLAG_GPIO_FOR_CD; /* Now we have a working card_detect again */ host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION; } + err = sdhci_add_host(host); + if (err) + goto err_add_host; + return 0; no_card_detect_irq: @@ -297,14 +313,23 @@ static int esdhc_pltfm_init(struct sdhci_host *host, struct sdhci_pltfm_data *pd boarddata->cd_gpio = err; not_supported: kfree(imx_data); - return 0; + err_add_host: + clk_disable(pltfm_host->clk); + clk_put(pltfm_host->clk); + err_clk_get: + sdhci_pltfm_free(pdev); + return err; } -static void esdhc_pltfm_exit(struct sdhci_host *host) +static int __devexit sdhci_esdhc_imx_remove(struct platform_device *pdev) { + struct sdhci_host *host = platform_get_drvdata(pdev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct esdhc_platform_data *boarddata = host->mmc->parent->platform_data; struct pltfm_imx_data *imx_data = pltfm_host->priv; + int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff); + + sdhci_remove_host(host, dead); if (boarddata && gpio_is_valid(boarddata->wp_gpio)) gpio_free(boarddata->wp_gpio); @@ -319,13 +344,37 @@ static void esdhc_pltfm_exit(struct sdhci_host *host) clk_disable(pltfm_host->clk); clk_put(pltfm_host->clk); kfree(imx_data); + + sdhci_pltfm_free(pdev); + + return 0; } -struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = { - .quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_BROKEN_ADMA - | SDHCI_QUIRK_BROKEN_CARD_DETECTION, - /* ADMA has issues. Might be fixable */ - .ops = &sdhci_esdhc_ops, - .init = esdhc_pltfm_init, - .exit = esdhc_pltfm_exit, +static struct platform_driver sdhci_esdhc_imx_driver = { + .driver = { + .name = "sdhci-esdhc-imx", + .owner = THIS_MODULE, + }, + .probe = sdhci_esdhc_imx_probe, + .remove = __devexit_p(sdhci_esdhc_imx_remove), +#ifdef CONFIG_PM + .suspend = sdhci_pltfm_suspend, + .resume = sdhci_pltfm_resume, +#endif }; + +static int __init sdhci_esdhc_imx_init(void) +{ + return platform_driver_register(&sdhci_esdhc_imx_driver); +} +module_init(sdhci_esdhc_imx_init); + +static void __exit sdhci_esdhc_imx_exit(void) +{ + platform_driver_unregister(&sdhci_esdhc_imx_driver); +} +module_exit(sdhci_esdhc_imx_exit); + +MODULE_DESCRIPTION("SDHCI driver for Freescale i.MX eSDHC"); +MODULE_AUTHOR("Wolfram Sang <w.sang@pengutronix.de>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/mmc/host/sdhci-of-core.c b/drivers/mmc/host/sdhci-of-core.c deleted file mode 100644 index 60e4186a4345..000000000000 --- a/drivers/mmc/host/sdhci-of-core.c +++ /dev/null @@ -1,253 +0,0 @@ -/* - * OpenFirmware bindings for Secure Digital Host Controller Interface. - * - * Copyright (c) 2007 Freescale Semiconductor, Inc. - * Copyright (c) 2009 MontaVista Software, Inc. - * - * Authors: Xiaobo Xie <X.Xie@freescale.com> - * Anton Vorontsov <avorontsov@ru.mvista.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - */ - -#include <linux/err.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/io.h> -#include <linux/interrupt.h> -#include <linux/delay.h> -#include <linux/of.h> -#include <linux/of_platform.h> -#include <linux/of_address.h> -#include <linux/of_irq.h> -#include <linux/mmc/host.h> -#ifdef CONFIG_PPC -#include <asm/machdep.h> -#endif -#include "sdhci-of.h" -#include "sdhci.h" - -#ifdef CONFIG_MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER - -/* - * These accessors are designed for big endian hosts doing I/O to - * little endian controllers incorporating a 32-bit hardware byte swapper. - */ - -u32 sdhci_be32bs_readl(struct sdhci_host *host, int reg) -{ - return in_be32(host->ioaddr + reg); -} - -u16 sdhci_be32bs_readw(struct sdhci_host *host, int reg) -{ - return in_be16(host->ioaddr + (reg ^ 0x2)); -} - -u8 sdhci_be32bs_readb(struct sdhci_host *host, int reg) -{ - return in_8(host->ioaddr + (reg ^ 0x3)); -} - -void sdhci_be32bs_writel(struct sdhci_host *host, u32 val, int reg) -{ - out_be32(host->ioaddr + reg, val); -} - -void sdhci_be32bs_writew(struct sdhci_host *host, u16 val, int reg) -{ - struct sdhci_of_host *of_host = sdhci_priv(host); - int base = reg & ~0x3; - int shift = (reg & 0x2) * 8; - - switch (reg) { - case SDHCI_TRANSFER_MODE: - /* - * Postpone this write, we must do it together with a - * command write that is down below. - */ - of_host->xfer_mode_shadow = val; - return; - case SDHCI_COMMAND: - sdhci_be32bs_writel(host, val << 16 | of_host->xfer_mode_shadow, - SDHCI_TRANSFER_MODE); - return; - } - clrsetbits_be32(host->ioaddr + base, 0xffff << shift, val << shift); -} - -void sdhci_be32bs_writeb(struct sdhci_host *host, u8 val, int reg) -{ - int base = reg & ~0x3; - int shift = (reg & 0x3) * 8; - - clrsetbits_be32(host->ioaddr + base , 0xff << shift, val << shift); -} -#endif /* CONFIG_MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER */ - -#ifdef CONFIG_PM - -static int sdhci_of_suspend(struct platform_device *ofdev, pm_message_t state) -{ - struct sdhci_host *host = dev_get_drvdata(&ofdev->dev); - - return mmc_suspend_host(host->mmc); -} - -static int sdhci_of_resume(struct platform_device *ofdev) -{ - struct sdhci_host *host = dev_get_drvdata(&ofdev->dev); - - return mmc_resume_host(host->mmc); -} - -#else - -#define sdhci_of_suspend NULL -#define sdhci_of_resume NULL - -#endif - -static bool __devinit sdhci_of_wp_inverted(struct device_node *np) -{ - if (of_get_property(np, "sdhci,wp-inverted", NULL)) - return true; - - /* Old device trees don't have the wp-inverted property. */ -#ifdef CONFIG_PPC - return machine_is(mpc837x_rdb) || machine_is(mpc837x_mds); -#else - return false; -#endif -} - -static const struct of_device_id sdhci_of_match[]; -static int __devinit sdhci_of_probe(struct platform_device *ofdev) -{ - const struct of_device_id *match; - struct device_node *np = ofdev->dev.of_node; - struct sdhci_of_data *sdhci_of_data; - struct sdhci_host *host; - struct sdhci_of_host *of_host; - const __be32 *clk; - int size; - int ret; - - match = of_match_device(sdhci_of_match, &ofdev->dev); - if (!match) - return -EINVAL; - sdhci_of_data = match->data; - - if (!of_device_is_available(np)) - return -ENODEV; - - host = sdhci_alloc_host(&ofdev->dev, sizeof(*of_host)); - if (IS_ERR(host)) - return -ENOMEM; - - of_host = sdhci_priv(host); - dev_set_drvdata(&ofdev->dev, host); - - host->ioaddr = of_iomap(np, 0); - if (!host->ioaddr) { - ret = -ENOMEM; - goto err_addr_map; - } - - host->irq = irq_of_parse_and_map(np, 0); - if (!host->irq) { - ret = -EINVAL; - goto err_no_irq; - } - - host->hw_name = dev_name(&ofdev->dev); - if (sdhci_of_data) { - host->quirks = sdhci_of_data->quirks; - host->ops = &sdhci_of_data->ops; - } - - if (of_get_property(np, "sdhci,auto-cmd12", NULL)) - host->quirks |= SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12; - - - if (of_get_property(np, "sdhci,1-bit-only", NULL)) - host->quirks |= SDHCI_QUIRK_FORCE_1_BIT_DATA; - - if (sdhci_of_wp_inverted(np)) - host->quirks |= SDHCI_QUIRK_INVERTED_WRITE_PROTECT; - - clk = of_get_property(np, "clock-frequency", &size); - if (clk && size == sizeof(*clk) && *clk) - of_host->clock = be32_to_cpup(clk); - - ret = sdhci_add_host(host); - if (ret) - goto err_add_host; - - return 0; - -err_add_host: - irq_dispose_mapping(host->irq); -err_no_irq: - iounmap(host->ioaddr); -err_addr_map: - sdhci_free_host(host); - return ret; -} - -static int __devexit sdhci_of_remove(struct platform_device *ofdev) -{ - struct sdhci_host *host = dev_get_drvdata(&ofdev->dev); - - sdhci_remove_host(host, 0); - sdhci_free_host(host); - irq_dispose_mapping(host->irq); - iounmap(host->ioaddr); - return 0; -} - -static const struct of_device_id sdhci_of_match[] = { -#ifdef CONFIG_MMC_SDHCI_OF_ESDHC - { .compatible = "fsl,mpc8379-esdhc", .data = &sdhci_esdhc, }, - { .compatible = "fsl,mpc8536-esdhc", .data = &sdhci_esdhc, }, - { .compatible = "fsl,esdhc", .data = &sdhci_esdhc, }, -#endif -#ifdef CONFIG_MMC_SDHCI_OF_HLWD - { .compatible = "nintendo,hollywood-sdhci", .data = &sdhci_hlwd, }, -#endif - { .compatible = "generic-sdhci", }, - {}, -}; -MODULE_DEVICE_TABLE(of, sdhci_of_match); - -static struct platform_driver sdhci_of_driver = { - .driver = { - .name = "sdhci-of", - .owner = THIS_MODULE, - .of_match_table = sdhci_of_match, - }, - .probe = sdhci_of_probe, - .remove = __devexit_p(sdhci_of_remove), - .suspend = sdhci_of_suspend, - .resume = sdhci_of_resume, -}; - -static int __init sdhci_of_init(void) -{ - return platform_driver_register(&sdhci_of_driver); -} -module_init(sdhci_of_init); - -static void __exit sdhci_of_exit(void) -{ - platform_driver_unregister(&sdhci_of_driver); -} -module_exit(sdhci_of_exit); - -MODULE_DESCRIPTION("Secure Digital Host Controller Interface OF driver"); -MODULE_AUTHOR("Xiaobo Xie <X.Xie@freescale.com>, " - "Anton Vorontsov <avorontsov@ru.mvista.com>"); -MODULE_LICENSE("GPL"); diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index ba40d6d035c7..fe604df65011 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -16,8 +16,7 @@ #include <linux/io.h> #include <linux/delay.h> #include <linux/mmc/host.h> -#include "sdhci-of.h" -#include "sdhci.h" +#include "sdhci-pltfm.h" #include "sdhci-esdhc.h" static u16 esdhc_readw(struct sdhci_host *host, int reg) @@ -60,32 +59,83 @@ static int esdhc_of_enable_dma(struct sdhci_host *host) static unsigned int esdhc_of_get_max_clock(struct sdhci_host *host) { - struct sdhci_of_host *of_host = sdhci_priv(host); + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - return of_host->clock; + return pltfm_host->clock; } static unsigned int esdhc_of_get_min_clock(struct sdhci_host *host) { - struct sdhci_of_host *of_host = sdhci_priv(host); + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - return of_host->clock / 256 / 16; + return pltfm_host->clock / 256 / 16; } -struct sdhci_of_data sdhci_esdhc = { +static struct sdhci_ops sdhci_esdhc_ops = { + .read_l = sdhci_be32bs_readl, + .read_w = esdhc_readw, + .read_b = sdhci_be32bs_readb, + .write_l = sdhci_be32bs_writel, + .write_w = esdhc_writew, + .write_b = esdhc_writeb, + .set_clock = esdhc_set_clock, + .enable_dma = esdhc_of_enable_dma, + .get_max_clock = esdhc_of_get_max_clock, + .get_min_clock = esdhc_of_get_min_clock, +}; + +static struct sdhci_pltfm_data sdhci_esdhc_pdata = { /* card detection could be handled via GPIO */ .quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_BROKEN_CARD_DETECTION | SDHCI_QUIRK_NO_CARD_NO_RESET, - .ops = { - .read_l = sdhci_be32bs_readl, - .read_w = esdhc_readw, - .read_b = sdhci_be32bs_readb, - .write_l = sdhci_be32bs_writel, - .write_w = esdhc_writew, - .write_b = esdhc_writeb, - .set_clock = esdhc_set_clock, - .enable_dma = esdhc_of_enable_dma, - .get_max_clock = esdhc_of_get_max_clock, - .get_min_clock = esdhc_of_get_min_clock, + .ops = &sdhci_esdhc_ops, +}; + +static int __devinit sdhci_esdhc_probe(struct platform_device *pdev) +{ + return sdhci_pltfm_register(pdev, &sdhci_esdhc_pdata); +} + +static int __devexit sdhci_esdhc_remove(struct platform_device *pdev) +{ + return sdhci_pltfm_unregister(pdev); +} + +static const struct of_device_id sdhci_esdhc_of_match[] = { + { .compatible = "fsl,mpc8379-esdhc" }, + { .compatible = "fsl,mpc8536-esdhc" }, + { .compatible = "fsl,esdhc" }, + { } +}; +MODULE_DEVICE_TABLE(of, sdhci_esdhc_of_match); + +static struct platform_driver sdhci_esdhc_driver = { + .driver = { + .name = "sdhci-esdhc", + .owner = THIS_MODULE, + .of_match_table = sdhci_esdhc_of_match, }, + .probe = sdhci_esdhc_probe, + .remove = __devexit_p(sdhci_esdhc_remove), +#ifdef CONFIG_PM + .suspend = sdhci_pltfm_suspend, + .resume = sdhci_pltfm_resume, +#endif }; + +static int __init sdhci_esdhc_init(void) +{ + return platform_driver_register(&sdhci_esdhc_driver); +} +module_init(sdhci_esdhc_init); + +static void __exit sdhci_esdhc_exit(void) +{ + platform_driver_unregister(&sdhci_esdhc_driver); +} +module_exit(sdhci_esdhc_exit); + +MODULE_DESCRIPTION("SDHCI OF driver for Freescale MPC eSDHC"); +MODULE_AUTHOR("Xiaobo Xie <X.Xie@freescale.com>, " + "Anton Vorontsov <avorontsov@ru.mvista.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/mmc/host/sdhci-of-hlwd.c b/drivers/mmc/host/sdhci-of-hlwd.c index 68ddb7546ae2..735be131dca9 100644 --- a/drivers/mmc/host/sdhci-of-hlwd.c +++ b/drivers/mmc/host/sdhci-of-hlwd.c @@ -21,8 +21,7 @@ #include <linux/delay.h> #include <linux/mmc/host.h> -#include "sdhci-of.h" -#include "sdhci.h" +#include "sdhci-pltfm.h" /* * Ops and quirks for the Nintendo Wii SDHCI controllers. @@ -51,15 +50,63 @@ static void sdhci_hlwd_writeb(struct sdhci_host *host, u8 val, int reg) udelay(SDHCI_HLWD_WRITE_DELAY); } -struct sdhci_of_data sdhci_hlwd = { +static struct sdhci_ops sdhci_hlwd_ops = { + .read_l = sdhci_be32bs_readl, + .read_w = sdhci_be32bs_readw, + .read_b = sdhci_be32bs_readb, + .write_l = sdhci_hlwd_writel, + .write_w = sdhci_hlwd_writew, + .write_b = sdhci_hlwd_writeb, +}; + +static struct sdhci_pltfm_data sdhci_hlwd_pdata = { .quirks = SDHCI_QUIRK_32BIT_DMA_ADDR | SDHCI_QUIRK_32BIT_DMA_SIZE, - .ops = { - .read_l = sdhci_be32bs_readl, - .read_w = sdhci_be32bs_readw, - .read_b = sdhci_be32bs_readb, - .write_l = sdhci_hlwd_writel, - .write_w = sdhci_hlwd_writew, - .write_b = sdhci_hlwd_writeb, + .ops = &sdhci_hlwd_ops, +}; + +static int __devinit sdhci_hlwd_probe(struct platform_device *pdev) +{ + return sdhci_pltfm_register(pdev, &sdhci_hlwd_pdata); +} + +static int __devexit sdhci_hlwd_remove(struct platform_device *pdev) +{ + return sdhci_pltfm_unregister(pdev); +} + +static const struct of_device_id sdhci_hlwd_of_match[] = { + { .compatible = "nintendo,hollywood-sdhci" }, + { } +}; +MODULE_DEVICE_TABLE(of, sdhci_hlwd_of_match); + +static struct platform_driver sdhci_hlwd_driver = { + .driver = { + .name = "sdhci-hlwd", + .owner = THIS_MODULE, + .of_match_table = sdhci_hlwd_of_match, }, + .probe = sdhci_hlwd_probe, + .remove = __devexit_p(sdhci_hlwd_remove), +#ifdef CONFIG_PM + .suspend = sdhci_pltfm_suspend, + .resume = sdhci_pltfm_resume, +#endif }; + +static int __init sdhci_hlwd_init(void) +{ + return platform_driver_register(&sdhci_hlwd_driver); +} +module_init(sdhci_hlwd_init); + +static void __exit sdhci_hlwd_exit(void) +{ + platform_driver_unregister(&sdhci_hlwd_driver); +} +module_exit(sdhci_hlwd_exit); + +MODULE_DESCRIPTION("Nintendo Wii SDHCI OF driver"); +MODULE_AUTHOR("The GameCube Linux Team, Albert Herranz"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/mmc/host/sdhci-of.h b/drivers/mmc/host/sdhci-of.h deleted file mode 100644 index ad09ad9915d8..000000000000 --- a/drivers/mmc/host/sdhci-of.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * OpenFirmware bindings for Secure Digital Host Controller Interface. - * - * Copyright (c) 2007 Freescale Semiconductor, Inc. - * Copyright (c) 2009 MontaVista Software, Inc. - * - * Authors: Xiaobo Xie <X.Xie@freescale.com> - * Anton Vorontsov <avorontsov@ru.mvista.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - */ - -#ifndef __SDHCI_OF_H -#define __SDHCI_OF_H - -#include <linux/types.h> -#include "sdhci.h" - -struct sdhci_of_data { - unsigned int quirks; - struct sdhci_ops ops; -}; - -struct sdhci_of_host { - unsigned int clock; - u16 xfer_mode_shadow; -}; - -extern u32 sdhci_be32bs_readl(struct sdhci_host *host, int reg); -extern u16 sdhci_be32bs_readw(struct sdhci_host *host, int reg); -extern u8 sdhci_be32bs_readb(struct sdhci_host *host, int reg); -extern void sdhci_be32bs_writel(struct sdhci_host *host, u32 val, int reg); -extern void sdhci_be32bs_writew(struct sdhci_host *host, u16 val, int reg); -extern void sdhci_be32bs_writeb(struct sdhci_host *host, u8 val, int reg); - -extern struct sdhci_of_data sdhci_esdhc; -extern struct sdhci_of_data sdhci_hlwd; - -#endif /* __SDHCI_OF_H */ diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index 936bbca19c0a..26c528648f3c 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -143,6 +143,12 @@ static const struct sdhci_pci_fixes sdhci_cafe = { SDHCI_QUIRK_BROKEN_TIMEOUT_VAL, }; +static int mrst_hc_probe_slot(struct sdhci_pci_slot *slot) +{ + slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA; + return 0; +} + /* * ADMA operation is disabled for Moorestown platform due to * hardware bugs. @@ -157,8 +163,15 @@ static int mrst_hc_probe(struct sdhci_pci_chip *chip) return 0; } +static int mfd_emmc_probe_slot(struct sdhci_pci_slot *slot) +{ + slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA; + return 0; +} + static const struct sdhci_pci_fixes sdhci_intel_mrst_hc0 = { .quirks = SDHCI_QUIRK_BROKEN_ADMA | SDHCI_QUIRK_NO_HISPD_BIT, + .probe_slot = mrst_hc_probe_slot, }; static const struct sdhci_pci_fixes sdhci_intel_mrst_hc1_hc2 = { @@ -170,8 +183,13 @@ static const struct sdhci_pci_fixes sdhci_intel_mfd_sd = { .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, }; -static const struct sdhci_pci_fixes sdhci_intel_mfd_emmc_sdio = { +static const struct sdhci_pci_fixes sdhci_intel_mfd_sdio = { + .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, +}; + +static const struct sdhci_pci_fixes sdhci_intel_mfd_emmc = { .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, + .probe_slot = mfd_emmc_probe_slot, }; /* O2Micro extra registers */ @@ -682,7 +700,7 @@ static const struct pci_device_id pci_ids[] __devinitdata = { .device = PCI_DEVICE_ID_INTEL_MFD_SDIO1, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, - .driver_data = (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio, + .driver_data = (kernel_ulong_t)&sdhci_intel_mfd_sdio, }, { @@ -690,7 +708,7 @@ static const struct pci_device_id pci_ids[] __devinitdata = { .device = PCI_DEVICE_ID_INTEL_MFD_SDIO2, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, - .driver_data = (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio, + .driver_data = (kernel_ulong_t)&sdhci_intel_mfd_sdio, }, { @@ -698,7 +716,7 @@ static const struct pci_device_id pci_ids[] __devinitdata = { .device = PCI_DEVICE_ID_INTEL_MFD_EMMC0, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, - .driver_data = (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio, + .driver_data = (kernel_ulong_t)&sdhci_intel_mfd_emmc, }, { @@ -706,7 +724,7 @@ static const struct pci_device_id pci_ids[] __devinitdata = { .device = PCI_DEVICE_ID_INTEL_MFD_EMMC1, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, - .driver_data = (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio, + .driver_data = (kernel_ulong_t)&sdhci_intel_mfd_emmc, }, { @@ -789,8 +807,34 @@ static int sdhci_pci_enable_dma(struct sdhci_host *host) return 0; } +static int sdhci_pci_8bit_width(struct sdhci_host *host, int width) +{ + u8 ctrl; + + ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); + + switch (width) { + case MMC_BUS_WIDTH_8: + ctrl |= SDHCI_CTRL_8BITBUS; + ctrl &= ~SDHCI_CTRL_4BITBUS; + break; + case MMC_BUS_WIDTH_4: + ctrl |= SDHCI_CTRL_4BITBUS; + ctrl &= ~SDHCI_CTRL_8BITBUS; + break; + default: + ctrl &= ~(SDHCI_CTRL_8BITBUS | SDHCI_CTRL_4BITBUS); + break; + } + + sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL); + + return 0; +} + static struct sdhci_ops sdhci_pci_ops = { .enable_dma = sdhci_pci_enable_dma, + .platform_8bit_width = sdhci_pci_8bit_width, }; /*****************************************************************************\ diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c index dbab0407f4b6..71c0ce1f6db0 100644 --- a/drivers/mmc/host/sdhci-pltfm.c +++ b/drivers/mmc/host/sdhci-pltfm.c @@ -2,6 +2,12 @@ * sdhci-pltfm.c Support for SDHCI platform devices * Copyright (c) 2009 Intel Corporation * + * Copyright (c) 2007 Freescale Semiconductor, Inc. + * Copyright (c) 2009 MontaVista Software, Inc. + * + * Authors: Xiaobo Xie <X.Xie@freescale.com> + * Anton Vorontsov <avorontsov@ru.mvista.com> + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. @@ -22,48 +28,66 @@ * Inspired by sdhci-pci.c, by Pierre Ossman */ -#include <linux/delay.h> -#include <linux/highmem.h> -#include <linux/mod_devicetable.h> -#include <linux/platform_device.h> +#include <linux/err.h> +#include <linux/of.h> +#ifdef CONFIG_PPC +#include <asm/machdep.h> +#endif +#include "sdhci-pltfm.h" -#include <linux/mmc/host.h> +static struct sdhci_ops sdhci_pltfm_ops = { +}; -#include <linux/io.h> -#include <linux/mmc/sdhci-pltfm.h> +#ifdef CONFIG_OF +static bool sdhci_of_wp_inverted(struct device_node *np) +{ + if (of_get_property(np, "sdhci,wp-inverted", NULL)) + return true; -#include "sdhci.h" -#include "sdhci-pltfm.h" + /* Old device trees don't have the wp-inverted property. */ +#ifdef CONFIG_PPC + return machine_is(mpc837x_rdb) || machine_is(mpc837x_mds); +#else + return false; +#endif /* CONFIG_PPC */ +} + +void sdhci_get_of_property(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct sdhci_host *host = platform_get_drvdata(pdev); + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + const __be32 *clk; + int size; -/*****************************************************************************\ - * * - * SDHCI core callbacks * - * * -\*****************************************************************************/ + if (of_device_is_available(np)) { + if (of_get_property(np, "sdhci,auto-cmd12", NULL)) + host->quirks |= SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12; -static struct sdhci_ops sdhci_pltfm_ops = { -}; + if (of_get_property(np, "sdhci,1-bit-only", NULL)) + host->quirks |= SDHCI_QUIRK_FORCE_1_BIT_DATA; -/*****************************************************************************\ - * * - * Device probing/removal * - * * -\*****************************************************************************/ + if (sdhci_of_wp_inverted(np)) + host->quirks |= SDHCI_QUIRK_INVERTED_WRITE_PROTECT; -static int __devinit sdhci_pltfm_probe(struct platform_device *pdev) + clk = of_get_property(np, "clock-frequency", &size); + if (clk && size == sizeof(*clk) && *clk) + pltfm_host->clock = be32_to_cpup(clk); + } +} +#else +void sdhci_get_of_property(struct platform_device *pdev) {} +#endif /* CONFIG_OF */ +EXPORT_SYMBOL_GPL(sdhci_get_of_property); + +struct sdhci_host *sdhci_pltfm_init(struct platform_device *pdev, + struct sdhci_pltfm_data *pdata) { - const struct platform_device_id *platid = platform_get_device_id(pdev); - struct sdhci_pltfm_data *pdata; struct sdhci_host *host; struct sdhci_pltfm_host *pltfm_host; struct resource *iomem; int ret; - if (platid && platid->driver_data) - pdata = (void *)platid->driver_data; - else - pdata = pdev->dev.platform_data; - iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!iomem) { ret = -ENOMEM; @@ -71,8 +95,7 @@ static int __devinit sdhci_pltfm_probe(struct platform_device *pdev) } if (resource_size(iomem) < 0x100) - dev_err(&pdev->dev, "Invalid iomem size. You may " - "experience problems.\n"); + dev_err(&pdev->dev, "Invalid iomem size!\n"); /* Some PCI-based MFD need the parent here */ if (pdev->dev.parent != &platform_bus) @@ -87,7 +110,7 @@ static int __devinit sdhci_pltfm_probe(struct platform_device *pdev) pltfm_host = sdhci_priv(host); - host->hw_name = "platform"; + host->hw_name = dev_name(&pdev->dev); if (pdata && pdata->ops) host->ops = pdata->ops; else @@ -110,126 +133,95 @@ static int __devinit sdhci_pltfm_probe(struct platform_device *pdev) goto err_remap; } - if (pdata && pdata->init) { - ret = pdata->init(host, pdata); - if (ret) - goto err_plat_init; - } - - ret = sdhci_add_host(host); - if (ret) - goto err_add_host; - platform_set_drvdata(pdev, host); - return 0; + return host; -err_add_host: - if (pdata && pdata->exit) - pdata->exit(host); -err_plat_init: - iounmap(host->ioaddr); err_remap: release_mem_region(iomem->start, resource_size(iomem)); err_request: sdhci_free_host(host); err: - printk(KERN_ERR"Probing of sdhci-pltfm failed: %d\n", ret); - return ret; + dev_err(&pdev->dev, "%s failed %d\n", __func__, ret); + return ERR_PTR(ret); } +EXPORT_SYMBOL_GPL(sdhci_pltfm_init); -static int __devexit sdhci_pltfm_remove(struct platform_device *pdev) +void sdhci_pltfm_free(struct platform_device *pdev) { - struct sdhci_pltfm_data *pdata = pdev->dev.platform_data; struct sdhci_host *host = platform_get_drvdata(pdev); struct resource *iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0); - int dead; - u32 scratch; - - dead = 0; - scratch = readl(host->ioaddr + SDHCI_INT_STATUS); - if (scratch == (u32)-1) - dead = 1; - sdhci_remove_host(host, dead); - if (pdata && pdata->exit) - pdata->exit(host); iounmap(host->ioaddr); release_mem_region(iomem->start, resource_size(iomem)); sdhci_free_host(host); platform_set_drvdata(pdev, NULL); +} +EXPORT_SYMBOL_GPL(sdhci_pltfm_free); - return 0; +int sdhci_pltfm_register(struct platform_device *pdev, + struct sdhci_pltfm_data *pdata) +{ + struct sdhci_host *host; + int ret = 0; + + host = sdhci_pltfm_init(pdev, pdata); + if (IS_ERR(host)) + return PTR_ERR(host); + + sdhci_get_of_property(pdev); + + ret = sdhci_add_host(host); + if (ret) + sdhci_pltfm_free(pdev); + + return ret; } +EXPORT_SYMBOL_GPL(sdhci_pltfm_register); -static const struct platform_device_id sdhci_pltfm_ids[] = { - { "sdhci", }, -#ifdef CONFIG_MMC_SDHCI_CNS3XXX - { "sdhci-cns3xxx", (kernel_ulong_t)&sdhci_cns3xxx_pdata }, -#endif -#ifdef CONFIG_MMC_SDHCI_ESDHC_IMX - { "sdhci-esdhc-imx", (kernel_ulong_t)&sdhci_esdhc_imx_pdata }, -#endif -#ifdef CONFIG_MMC_SDHCI_DOVE - { "sdhci-dove", (kernel_ulong_t)&sdhci_dove_pdata }, -#endif -#ifdef CONFIG_MMC_SDHCI_TEGRA - { "sdhci-tegra", (kernel_ulong_t)&sdhci_tegra_pdata }, -#endif - { }, -}; -MODULE_DEVICE_TABLE(platform, sdhci_pltfm_ids); +int sdhci_pltfm_unregister(struct platform_device *pdev) +{ + struct sdhci_host *host = platform_get_drvdata(pdev); + int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff); + + sdhci_remove_host(host, dead); + sdhci_pltfm_free(pdev); + + return 0; +} +EXPORT_SYMBOL_GPL(sdhci_pltfm_unregister); #ifdef CONFIG_PM -static int sdhci_pltfm_suspend(struct platform_device *dev, pm_message_t state) +int sdhci_pltfm_suspend(struct platform_device *dev, pm_message_t state) { struct sdhci_host *host = platform_get_drvdata(dev); return sdhci_suspend_host(host, state); } +EXPORT_SYMBOL_GPL(sdhci_pltfm_suspend); -static int sdhci_pltfm_resume(struct platform_device *dev) +int sdhci_pltfm_resume(struct platform_device *dev) { struct sdhci_host *host = platform_get_drvdata(dev); return sdhci_resume_host(host); } -#else -#define sdhci_pltfm_suspend NULL -#define sdhci_pltfm_resume NULL +EXPORT_SYMBOL_GPL(sdhci_pltfm_resume); #endif /* CONFIG_PM */ -static struct platform_driver sdhci_pltfm_driver = { - .driver = { - .name = "sdhci", - .owner = THIS_MODULE, - }, - .probe = sdhci_pltfm_probe, - .remove = __devexit_p(sdhci_pltfm_remove), - .id_table = sdhci_pltfm_ids, - .suspend = sdhci_pltfm_suspend, - .resume = sdhci_pltfm_resume, -}; - -/*****************************************************************************\ - * * - * Driver init/exit * - * * -\*****************************************************************************/ - -static int __init sdhci_drv_init(void) +static int __init sdhci_pltfm_drv_init(void) { - return platform_driver_register(&sdhci_pltfm_driver); + pr_info("sdhci-pltfm: SDHCI platform and OF driver helper\n"); + + return 0; } +module_init(sdhci_pltfm_drv_init); -static void __exit sdhci_drv_exit(void) +static void __exit sdhci_pltfm_drv_exit(void) { - platform_driver_unregister(&sdhci_pltfm_driver); } +module_exit(sdhci_pltfm_drv_exit); -module_init(sdhci_drv_init); -module_exit(sdhci_drv_exit); - -MODULE_DESCRIPTION("Secure Digital Host Controller Interface platform driver"); -MODULE_AUTHOR("Mocean Laboratories <info@mocean-labs.com>"); +MODULE_DESCRIPTION("SDHCI platform and OF driver helper"); +MODULE_AUTHOR("Intel Corporation"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/mmc/host/sdhci-pltfm.h b/drivers/mmc/host/sdhci-pltfm.h index 2b37016ad0ac..3a9fc3f40840 100644 --- a/drivers/mmc/host/sdhci-pltfm.h +++ b/drivers/mmc/host/sdhci-pltfm.h @@ -12,17 +12,95 @@ #define _DRIVERS_MMC_SDHCI_PLTFM_H #include <linux/clk.h> -#include <linux/types.h> -#include <linux/mmc/sdhci-pltfm.h> +#include <linux/platform_device.h> +#include "sdhci.h" + +struct sdhci_pltfm_data { + struct sdhci_ops *ops; + unsigned int quirks; +}; struct sdhci_pltfm_host { struct clk *clk; void *priv; /* to handle quirks across io-accessor calls */ + + /* migrate from sdhci_of_host */ + unsigned int clock; + u16 xfer_mode_shadow; }; -extern struct sdhci_pltfm_data sdhci_cns3xxx_pdata; -extern struct sdhci_pltfm_data sdhci_esdhc_imx_pdata; -extern struct sdhci_pltfm_data sdhci_dove_pdata; -extern struct sdhci_pltfm_data sdhci_tegra_pdata; +#ifdef CONFIG_MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER +/* + * These accessors are designed for big endian hosts doing I/O to + * little endian controllers incorporating a 32-bit hardware byte swapper. + */ +static inline u32 sdhci_be32bs_readl(struct sdhci_host *host, int reg) +{ + return in_be32(host->ioaddr + reg); +} + +static inline u16 sdhci_be32bs_readw(struct sdhci_host *host, int reg) +{ + return in_be16(host->ioaddr + (reg ^ 0x2)); +} + +static inline u8 sdhci_be32bs_readb(struct sdhci_host *host, int reg) +{ + return in_8(host->ioaddr + (reg ^ 0x3)); +} + +static inline void sdhci_be32bs_writel(struct sdhci_host *host, + u32 val, int reg) +{ + out_be32(host->ioaddr + reg, val); +} + +static inline void sdhci_be32bs_writew(struct sdhci_host *host, + u16 val, int reg) +{ + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + int base = reg & ~0x3; + int shift = (reg & 0x2) * 8; + + switch (reg) { + case SDHCI_TRANSFER_MODE: + /* + * Postpone this write, we must do it together with a + * command write that is down below. + */ + pltfm_host->xfer_mode_shadow = val; + return; + case SDHCI_COMMAND: + sdhci_be32bs_writel(host, + val << 16 | pltfm_host->xfer_mode_shadow, + SDHCI_TRANSFER_MODE); + return; + } + clrsetbits_be32(host->ioaddr + base, 0xffff << shift, val << shift); +} + +static inline void sdhci_be32bs_writeb(struct sdhci_host *host, u8 val, int reg) +{ + int base = reg & ~0x3; + int shift = (reg & 0x3) * 8; + + clrsetbits_be32(host->ioaddr + base , 0xff << shift, val << shift); +} +#endif /* CONFIG_MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER */ + +extern void sdhci_get_of_property(struct platform_device *pdev); + +extern struct sdhci_host *sdhci_pltfm_init(struct platform_device *pdev, + struct sdhci_pltfm_data *pdata); +extern void sdhci_pltfm_free(struct platform_device *pdev); + +extern int sdhci_pltfm_register(struct platform_device *pdev, + struct sdhci_pltfm_data *pdata); +extern int sdhci_pltfm_unregister(struct platform_device *pdev); + +#ifdef CONFIG_PM +extern int sdhci_pltfm_suspend(struct platform_device *dev, pm_message_t state); +extern int sdhci_pltfm_resume(struct platform_device *dev); +#endif #endif /* _DRIVERS_MMC_SDHCI_PLTFM_H */ diff --git a/drivers/mmc/host/sdhci-pxa.c b/drivers/mmc/host/sdhci-pxa.c deleted file mode 100644 index 089c9a68b7b1..000000000000 --- a/drivers/mmc/host/sdhci-pxa.c +++ /dev/null @@ -1,303 +0,0 @@ -/* linux/drivers/mmc/host/sdhci-pxa.c - * - * Copyright (C) 2010 Marvell International Ltd. - * Zhangfei Gao <zhangfei.gao@marvell.com> - * Kevin Wang <dwang4@marvell.com> - * Mingwei Wang <mwwang@marvell.com> - * Philip Rakity <prakity@marvell.com> - * Mark Brown <markb@marvell.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -/* Supports: - * SDHCI support for MMP2/PXA910/PXA168 - * - * Refer to sdhci-s3c.c. - */ - -#include <linux/delay.h> -#include <linux/platform_device.h> -#include <linux/mmc/host.h> -#include <linux/clk.h> -#include <linux/io.h> -#include <linux/err.h> -#include <plat/sdhci.h> -#include "sdhci.h" - -#define DRIVER_NAME "sdhci-pxa" - -#define SD_FIFO_PARAM 0x104 -#define DIS_PAD_SD_CLK_GATE 0x400 - -struct sdhci_pxa { - struct sdhci_host *host; - struct sdhci_pxa_platdata *pdata; - struct clk *clk; - struct resource *res; - - u8 clk_enable; -}; - -/*****************************************************************************\ - * * - * SDHCI core callbacks * - * * -\*****************************************************************************/ -static void set_clock(struct sdhci_host *host, unsigned int clock) -{ - struct sdhci_pxa *pxa = sdhci_priv(host); - u32 tmp = 0; - - if (clock == 0) { - if (pxa->clk_enable) { - clk_disable(pxa->clk); - pxa->clk_enable = 0; - } - } else { - if (0 == pxa->clk_enable) { - if (pxa->pdata->flags & PXA_FLAG_DISABLE_CLOCK_GATING) { - tmp = readl(host->ioaddr + SD_FIFO_PARAM); - tmp |= DIS_PAD_SD_CLK_GATE; - writel(tmp, host->ioaddr + SD_FIFO_PARAM); - } - clk_enable(pxa->clk); - pxa->clk_enable = 1; - } - } -} - -static int set_uhs_signaling(struct sdhci_host *host, unsigned int uhs) -{ - u16 ctrl_2; - - /* - * Set V18_EN -- UHS modes do not work without this. - * does not change signaling voltage - */ - ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2); - - /* Select Bus Speed Mode for host */ - ctrl_2 &= ~SDHCI_CTRL_UHS_MASK; - switch (uhs) { - case MMC_TIMING_UHS_SDR12: - ctrl_2 |= SDHCI_CTRL_UHS_SDR12; - break; - case MMC_TIMING_UHS_SDR25: - ctrl_2 |= SDHCI_CTRL_UHS_SDR25; - break; - case MMC_TIMING_UHS_SDR50: - ctrl_2 |= SDHCI_CTRL_UHS_SDR50 | SDHCI_CTRL_VDD_180; - break; - case MMC_TIMING_UHS_SDR104: - ctrl_2 |= SDHCI_CTRL_UHS_SDR104 | SDHCI_CTRL_VDD_180; - break; - case MMC_TIMING_UHS_DDR50: - ctrl_2 |= SDHCI_CTRL_UHS_DDR50 | SDHCI_CTRL_VDD_180; - break; - } - - sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2); - pr_debug("%s:%s uhs = %d, ctrl_2 = %04X\n", - __func__, mmc_hostname(host->mmc), uhs, ctrl_2); - - return 0; -} - -static struct sdhci_ops sdhci_pxa_ops = { - .set_uhs_signaling = set_uhs_signaling, - .set_clock = set_clock, -}; - -/*****************************************************************************\ - * * - * Device probing/removal * - * * -\*****************************************************************************/ - -static int __devinit sdhci_pxa_probe(struct platform_device *pdev) -{ - struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data; - struct device *dev = &pdev->dev; - struct sdhci_host *host = NULL; - struct resource *iomem = NULL; - struct sdhci_pxa *pxa = NULL; - int ret, irq; - - irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_err(dev, "no irq specified\n"); - return irq; - } - - iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!iomem) { - dev_err(dev, "no memory specified\n"); - return -ENOENT; - } - - host = sdhci_alloc_host(&pdev->dev, sizeof(struct sdhci_pxa)); - if (IS_ERR(host)) { - dev_err(dev, "failed to alloc host\n"); - return PTR_ERR(host); - } - - pxa = sdhci_priv(host); - pxa->host = host; - pxa->pdata = pdata; - pxa->clk_enable = 0; - - pxa->clk = clk_get(dev, "PXA-SDHCLK"); - if (IS_ERR(pxa->clk)) { - dev_err(dev, "failed to get io clock\n"); - ret = PTR_ERR(pxa->clk); - goto out; - } - - pxa->res = request_mem_region(iomem->start, resource_size(iomem), - mmc_hostname(host->mmc)); - if (!pxa->res) { - dev_err(&pdev->dev, "cannot request region\n"); - ret = -EBUSY; - goto out; - } - - host->ioaddr = ioremap(iomem->start, resource_size(iomem)); - if (!host->ioaddr) { - dev_err(&pdev->dev, "failed to remap registers\n"); - ret = -ENOMEM; - goto out; - } - - host->hw_name = "MMC"; - host->ops = &sdhci_pxa_ops; - host->irq = irq; - host->quirks = SDHCI_QUIRK_BROKEN_ADMA - | SDHCI_QUIRK_BROKEN_TIMEOUT_VAL - | SDHCI_QUIRK_32BIT_DMA_ADDR - | SDHCI_QUIRK_32BIT_DMA_SIZE - | SDHCI_QUIRK_32BIT_ADMA_SIZE - | SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC; - - if (pdata->quirks) - host->quirks |= pdata->quirks; - - /* enable 1/8V DDR capable */ - host->mmc->caps |= MMC_CAP_1_8V_DDR; - - /* If slot design supports 8 bit data, indicate this to MMC. */ - if (pdata->flags & PXA_FLAG_SD_8_BIT_CAPABLE_SLOT) - host->mmc->caps |= MMC_CAP_8_BIT_DATA; - - ret = sdhci_add_host(host); - if (ret) { - dev_err(&pdev->dev, "failed to add host\n"); - goto out; - } - - if (pxa->pdata->max_speed) - host->mmc->f_max = pxa->pdata->max_speed; - - platform_set_drvdata(pdev, host); - - return 0; -out: - if (host) { - clk_put(pxa->clk); - if (host->ioaddr) - iounmap(host->ioaddr); - if (pxa->res) - release_mem_region(pxa->res->start, - resource_size(pxa->res)); - sdhci_free_host(host); - } - - return ret; -} - -static int __devexit sdhci_pxa_remove(struct platform_device *pdev) -{ - struct sdhci_host *host = platform_get_drvdata(pdev); - struct sdhci_pxa *pxa = sdhci_priv(host); - int dead = 0; - u32 scratch; - - if (host) { - scratch = readl(host->ioaddr + SDHCI_INT_STATUS); - if (scratch == (u32)-1) - dead = 1; - - sdhci_remove_host(host, dead); - - if (host->ioaddr) - iounmap(host->ioaddr); - if (pxa->res) - release_mem_region(pxa->res->start, - resource_size(pxa->res)); - if (pxa->clk_enable) { - clk_disable(pxa->clk); - pxa->clk_enable = 0; - } - clk_put(pxa->clk); - - sdhci_free_host(host); - platform_set_drvdata(pdev, NULL); - } - - return 0; -} - -#ifdef CONFIG_PM -static int sdhci_pxa_suspend(struct platform_device *dev, pm_message_t state) -{ - struct sdhci_host *host = platform_get_drvdata(dev); - - return sdhci_suspend_host(host, state); -} - -static int sdhci_pxa_resume(struct platform_device *dev) -{ - struct sdhci_host *host = platform_get_drvdata(dev); - - return sdhci_resume_host(host); -} -#else -#define sdhci_pxa_suspend NULL -#define sdhci_pxa_resume NULL -#endif - -static struct platform_driver sdhci_pxa_driver = { - .probe = sdhci_pxa_probe, - .remove = __devexit_p(sdhci_pxa_remove), - .suspend = sdhci_pxa_suspend, - .resume = sdhci_pxa_resume, - .driver = { - .name = DRIVER_NAME, - .owner = THIS_MODULE, - }, -}; - -/*****************************************************************************\ - * * - * Driver init/exit * - * * -\*****************************************************************************/ - -static int __init sdhci_pxa_init(void) -{ - return platform_driver_register(&sdhci_pxa_driver); -} - -static void __exit sdhci_pxa_exit(void) -{ - platform_driver_unregister(&sdhci_pxa_driver); -} - -module_init(sdhci_pxa_init); -module_exit(sdhci_pxa_exit); - -MODULE_DESCRIPTION("SDH controller driver for PXA168/PXA910/MMP2"); -MODULE_AUTHOR("Zhangfei Gao <zhangfei.gao@marvell.com>"); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/mmc/host/sdhci-pxav2.c b/drivers/mmc/host/sdhci-pxav2.c new file mode 100644 index 000000000000..38f58994f79a --- /dev/null +++ b/drivers/mmc/host/sdhci-pxav2.c @@ -0,0 +1,244 @@ +/* + * Copyright (C) 2010 Marvell International Ltd. + * Zhangfei Gao <zhangfei.gao@marvell.com> + * Kevin Wang <dwang4@marvell.com> + * Jun Nie <njun@marvell.com> + * Qiming Wu <wuqm@marvell.com> + * Philip Rakity <prakity@marvell.com> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/err.h> +#include <linux/init.h> +#include <linux/platform_device.h> +#include <linux/clk.h> +#include <linux/io.h> +#include <linux/gpio.h> +#include <linux/mmc/card.h> +#include <linux/mmc/host.h> +#include <linux/platform_data/pxa_sdhci.h> +#include <linux/slab.h> +#include "sdhci.h" +#include "sdhci-pltfm.h" + +#define SD_FIFO_PARAM 0xe0 +#define DIS_PAD_SD_CLK_GATE 0x0400 /* Turn on/off Dynamic SD Clock Gating */ +#define CLK_GATE_ON 0x0200 /* Disable/enable Clock Gate */ +#define CLK_GATE_CTL 0x0100 /* Clock Gate Control */ +#define CLK_GATE_SETTING_BITS (DIS_PAD_SD_CLK_GATE | \ + CLK_GATE_ON | CLK_GATE_CTL) + +#define SD_CLOCK_BURST_SIZE_SETUP 0xe6 +#define SDCLK_SEL_SHIFT 8 +#define SDCLK_SEL_MASK 0x3 +#define SDCLK_DELAY_SHIFT 10 +#define SDCLK_DELAY_MASK 0x3c + +#define SD_CE_ATA_2 0xea +#define MMC_CARD 0x1000 +#define MMC_WIDTH 0x0100 + +static void pxav2_set_private_registers(struct sdhci_host *host, u8 mask) +{ + struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc)); + struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data; + + if (mask == SDHCI_RESET_ALL) { + u16 tmp = 0; + + /* + * tune timing of read data/command when crc error happen + * no performance impact + */ + if (pdata->clk_delay_sel == 1) { + tmp = readw(host->ioaddr + SD_CLOCK_BURST_SIZE_SETUP); + + tmp &= ~(SDCLK_DELAY_MASK << SDCLK_DELAY_SHIFT); + tmp |= (pdata->clk_delay_cycles & SDCLK_DELAY_MASK) + << SDCLK_DELAY_SHIFT; + tmp &= ~(SDCLK_SEL_MASK << SDCLK_SEL_SHIFT); + tmp |= (1 & SDCLK_SEL_MASK) << SDCLK_SEL_SHIFT; + + writew(tmp, host->ioaddr + SD_CLOCK_BURST_SIZE_SETUP); + } + + if (pdata->flags & PXA_FLAG_ENABLE_CLOCK_GATING) { + tmp = readw(host->ioaddr + SD_FIFO_PARAM); + tmp &= ~CLK_GATE_SETTING_BITS; + writew(tmp, host->ioaddr + SD_FIFO_PARAM); + } else { + tmp = readw(host->ioaddr + SD_FIFO_PARAM); + tmp &= ~CLK_GATE_SETTING_BITS; + tmp |= CLK_GATE_SETTING_BITS; + writew(tmp, host->ioaddr + SD_FIFO_PARAM); + } + } +} + +static int pxav2_mmc_set_width(struct sdhci_host *host, int width) +{ + u8 ctrl; + u16 tmp; + + ctrl = readb(host->ioaddr + SDHCI_HOST_CONTROL); + tmp = readw(host->ioaddr + SD_CE_ATA_2); + if (width == MMC_BUS_WIDTH_8) { + ctrl &= ~SDHCI_CTRL_4BITBUS; + tmp |= MMC_CARD | MMC_WIDTH; + } else { + tmp &= ~(MMC_CARD | MMC_WIDTH); + if (width == MMC_BUS_WIDTH_4) + ctrl |= SDHCI_CTRL_4BITBUS; + else + ctrl &= ~SDHCI_CTRL_4BITBUS; + } + writew(tmp, host->ioaddr + SD_CE_ATA_2); + writeb(ctrl, host->ioaddr + SDHCI_HOST_CONTROL); + + return 0; +} + +static u32 pxav2_get_max_clock(struct sdhci_host *host) +{ + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + + return clk_get_rate(pltfm_host->clk); +} + +static struct sdhci_ops pxav2_sdhci_ops = { + .get_max_clock = pxav2_get_max_clock, + .platform_reset_exit = pxav2_set_private_registers, + .platform_8bit_width = pxav2_mmc_set_width, +}; + +static int __devinit sdhci_pxav2_probe(struct platform_device *pdev) +{ + struct sdhci_pltfm_host *pltfm_host; + struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data; + struct device *dev = &pdev->dev; + struct sdhci_host *host = NULL; + struct sdhci_pxa *pxa = NULL; + int ret; + struct clk *clk; + + pxa = kzalloc(sizeof(struct sdhci_pxa), GFP_KERNEL); + if (!pxa) + return -ENOMEM; + + host = sdhci_pltfm_init(pdev, NULL); + if (IS_ERR(host)) { + kfree(pxa); + return PTR_ERR(host); + } + pltfm_host = sdhci_priv(host); + pltfm_host->priv = pxa; + + clk = clk_get(dev, "PXA-SDHCLK"); + if (IS_ERR(clk)) { + dev_err(dev, "failed to get io clock\n"); + ret = PTR_ERR(clk); + goto err_clk_get; + } + pltfm_host->clk = clk; + clk_enable(clk); + + host->quirks = SDHCI_QUIRK_BROKEN_ADMA + | SDHCI_QUIRK_BROKEN_TIMEOUT_VAL + | SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN; + + if (pdata) { + if (pdata->flags & PXA_FLAG_CARD_PERMANENT) { + /* on-chip device */ + host->quirks |= SDHCI_QUIRK_BROKEN_CARD_DETECTION; + host->mmc->caps |= MMC_CAP_NONREMOVABLE; + } + + /* If slot design supports 8 bit data, indicate this to MMC. */ + if (pdata->flags & PXA_FLAG_SD_8_BIT_CAPABLE_SLOT) + host->mmc->caps |= MMC_CAP_8_BIT_DATA; + + if (pdata->quirks) + host->quirks |= pdata->quirks; + if (pdata->host_caps) + host->mmc->caps |= pdata->host_caps; + if (pdata->pm_caps) + host->mmc->pm_caps |= pdata->pm_caps; + } + + host->ops = &pxav2_sdhci_ops; + + ret = sdhci_add_host(host); + if (ret) { + dev_err(&pdev->dev, "failed to add host\n"); + goto err_add_host; + } + + platform_set_drvdata(pdev, host); + + return 0; + +err_add_host: + clk_disable(clk); + clk_put(clk); +err_clk_get: + sdhci_pltfm_free(pdev); + kfree(pxa); + return ret; +} + +static int __devexit sdhci_pxav2_remove(struct platform_device *pdev) +{ + struct sdhci_host *host = platform_get_drvdata(pdev); + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_pxa *pxa = pltfm_host->priv; + + sdhci_remove_host(host, 1); + + clk_disable(pltfm_host->clk); + clk_put(pltfm_host->clk); + sdhci_pltfm_free(pdev); + kfree(pxa); + + platform_set_drvdata(pdev, NULL); + + return 0; +} + +static struct platform_driver sdhci_pxav2_driver = { + .driver = { + .name = "sdhci-pxav2", + .owner = THIS_MODULE, + }, + .probe = sdhci_pxav2_probe, + .remove = __devexit_p(sdhci_pxav2_remove), +#ifdef CONFIG_PM + .suspend = sdhci_pltfm_suspend, + .resume = sdhci_pltfm_resume, +#endif +}; +static int __init sdhci_pxav2_init(void) +{ + return platform_driver_register(&sdhci_pxav2_driver); +} + +static void __exit sdhci_pxav2_exit(void) +{ + platform_driver_unregister(&sdhci_pxav2_driver); +} + +module_init(sdhci_pxav2_init); +module_exit(sdhci_pxav2_exit); + +MODULE_DESCRIPTION("SDHCI driver for pxav2"); +MODULE_AUTHOR("Marvell International Ltd."); +MODULE_LICENSE("GPL v2"); + diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c new file mode 100644 index 000000000000..4198dbbc5c20 --- /dev/null +++ b/drivers/mmc/host/sdhci-pxav3.c @@ -0,0 +1,289 @@ +/* + * Copyright (C) 2010 Marvell International Ltd. + * Zhangfei Gao <zhangfei.gao@marvell.com> + * Kevin Wang <dwang4@marvell.com> + * Mingwei Wang <mwwang@marvell.com> + * Philip Rakity <prakity@marvell.com> + * Mark Brown <markb@marvell.com> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/err.h> +#include <linux/init.h> +#include <linux/platform_device.h> +#include <linux/clk.h> +#include <linux/io.h> +#include <linux/gpio.h> +#include <linux/mmc/card.h> +#include <linux/mmc/host.h> +#include <linux/platform_data/pxa_sdhci.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include "sdhci.h" +#include "sdhci-pltfm.h" + +#define SD_CLOCK_BURST_SIZE_SETUP 0x10A +#define SDCLK_SEL 0x100 +#define SDCLK_DELAY_SHIFT 9 +#define SDCLK_DELAY_MASK 0x1f + +#define SD_CFG_FIFO_PARAM 0x100 +#define SDCFG_GEN_PAD_CLK_ON (1<<6) +#define SDCFG_GEN_PAD_CLK_CNT_MASK 0xFF +#define SDCFG_GEN_PAD_CLK_CNT_SHIFT 24 + +#define SD_SPI_MODE 0x108 +#define SD_CE_ATA_1 0x10C + +#define SD_CE_ATA_2 0x10E +#define SDCE_MISC_INT (1<<2) +#define SDCE_MISC_INT_EN (1<<1) + +static void pxav3_set_private_registers(struct sdhci_host *host, u8 mask) +{ + struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc)); + struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data; + + if (mask == SDHCI_RESET_ALL) { + /* + * tune timing of read data/command when crc error happen + * no performance impact + */ + if (pdata && 0 != pdata->clk_delay_cycles) { + u16 tmp; + + tmp = readw(host->ioaddr + SD_CLOCK_BURST_SIZE_SETUP); + tmp |= (pdata->clk_delay_cycles & SDCLK_DELAY_MASK) + << SDCLK_DELAY_SHIFT; + tmp |= SDCLK_SEL; + writew(tmp, host->ioaddr + SD_CLOCK_BURST_SIZE_SETUP); + } + } +} + +#define MAX_WAIT_COUNT 5 +static void pxav3_gen_init_74_clocks(struct sdhci_host *host, u8 power_mode) +{ + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_pxa *pxa = pltfm_host->priv; + u16 tmp; + int count; + + if (pxa->power_mode == MMC_POWER_UP + && power_mode == MMC_POWER_ON) { + + dev_dbg(mmc_dev(host->mmc), + "%s: slot->power_mode = %d," + "ios->power_mode = %d\n", + __func__, + pxa->power_mode, + power_mode); + + /* set we want notice of when 74 clocks are sent */ + tmp = readw(host->ioaddr + SD_CE_ATA_2); + tmp |= SDCE_MISC_INT_EN; + writew(tmp, host->ioaddr + SD_CE_ATA_2); + + /* start sending the 74 clocks */ + tmp = readw(host->ioaddr + SD_CFG_FIFO_PARAM); + tmp |= SDCFG_GEN_PAD_CLK_ON; + writew(tmp, host->ioaddr + SD_CFG_FIFO_PARAM); + + /* slowest speed is about 100KHz or 10usec per clock */ + udelay(740); + count = 0; + + while (count++ < MAX_WAIT_COUNT) { + if ((readw(host->ioaddr + SD_CE_ATA_2) + & SDCE_MISC_INT) == 0) + break; + udelay(10); + } + + if (count == MAX_WAIT_COUNT) + dev_warn(mmc_dev(host->mmc), "74 clock interrupt not cleared\n"); + + /* clear the interrupt bit if posted */ + tmp = readw(host->ioaddr + SD_CE_ATA_2); + tmp |= SDCE_MISC_INT; + writew(tmp, host->ioaddr + SD_CE_ATA_2); + } + pxa->power_mode = power_mode; +} + +static int pxav3_set_uhs_signaling(struct sdhci_host *host, unsigned int uhs) +{ + u16 ctrl_2; + + /* + * Set V18_EN -- UHS modes do not work without this. + * does not change signaling voltage + */ + ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2); + + /* Select Bus Speed Mode for host */ + ctrl_2 &= ~SDHCI_CTRL_UHS_MASK; + switch (uhs) { + case MMC_TIMING_UHS_SDR12: + ctrl_2 |= SDHCI_CTRL_UHS_SDR12; + break; + case MMC_TIMING_UHS_SDR25: + ctrl_2 |= SDHCI_CTRL_UHS_SDR25; + break; + case MMC_TIMING_UHS_SDR50: + ctrl_2 |= SDHCI_CTRL_UHS_SDR50 | SDHCI_CTRL_VDD_180; + break; + case MMC_TIMING_UHS_SDR104: + ctrl_2 |= SDHCI_CTRL_UHS_SDR104 | SDHCI_CTRL_VDD_180; + break; + case MMC_TIMING_UHS_DDR50: + ctrl_2 |= SDHCI_CTRL_UHS_DDR50 | SDHCI_CTRL_VDD_180; + break; + } + + sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2); + dev_dbg(mmc_dev(host->mmc), + "%s uhs = %d, ctrl_2 = %04X\n", + __func__, uhs, ctrl_2); + + return 0; +} + +static struct sdhci_ops pxav3_sdhci_ops = { + .platform_reset_exit = pxav3_set_private_registers, + .set_uhs_signaling = pxav3_set_uhs_signaling, + .platform_send_init_74_clocks = pxav3_gen_init_74_clocks, +}; + +static int __devinit sdhci_pxav3_probe(struct platform_device *pdev) +{ + struct sdhci_pltfm_host *pltfm_host; + struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data; + struct device *dev = &pdev->dev; + struct sdhci_host *host = NULL; + struct sdhci_pxa *pxa = NULL; + int ret; + struct clk *clk; + + pxa = kzalloc(sizeof(struct sdhci_pxa), GFP_KERNEL); + if (!pxa) + return -ENOMEM; + + host = sdhci_pltfm_init(pdev, NULL); + if (IS_ERR(host)) { + kfree(pxa); + return PTR_ERR(host); + } + pltfm_host = sdhci_priv(host); + pltfm_host->priv = pxa; + + clk = clk_get(dev, "PXA-SDHCLK"); + if (IS_ERR(clk)) { + dev_err(dev, "failed to get io clock\n"); + ret = PTR_ERR(clk); + goto err_clk_get; + } + pltfm_host->clk = clk; + clk_enable(clk); + + host->quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL + | SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC; + + /* enable 1/8V DDR capable */ + host->mmc->caps |= MMC_CAP_1_8V_DDR; + + if (pdata) { + if (pdata->flags & PXA_FLAG_CARD_PERMANENT) { + /* on-chip device */ + host->quirks |= SDHCI_QUIRK_BROKEN_CARD_DETECTION; + host->mmc->caps |= MMC_CAP_NONREMOVABLE; + } + + /* If slot design supports 8 bit data, indicate this to MMC. */ + if (pdata->flags & PXA_FLAG_SD_8_BIT_CAPABLE_SLOT) + host->mmc->caps |= MMC_CAP_8_BIT_DATA; + + if (pdata->quirks) + host->quirks |= pdata->quirks; + if (pdata->host_caps) + host->mmc->caps |= pdata->host_caps; + if (pdata->pm_caps) + host->mmc->pm_caps |= pdata->pm_caps; + } + + host->ops = &pxav3_sdhci_ops; + + ret = sdhci_add_host(host); + if (ret) { + dev_err(&pdev->dev, "failed to add host\n"); + goto err_add_host; + } + + platform_set_drvdata(pdev, host); + + return 0; + +err_add_host: + clk_disable(clk); + clk_put(clk); +err_clk_get: + sdhci_pltfm_free(pdev); + kfree(pxa); + return ret; +} + +static int __devexit sdhci_pxav3_remove(struct platform_device *pdev) +{ + struct sdhci_host *host = platform_get_drvdata(pdev); + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_pxa *pxa = pltfm_host->priv; + + sdhci_remove_host(host, 1); + + clk_disable(pltfm_host->clk); + clk_put(pltfm_host->clk); + sdhci_pltfm_free(pdev); + kfree(pxa); + + platform_set_drvdata(pdev, NULL); + + return 0; +} + +static struct platform_driver sdhci_pxav3_driver = { + .driver = { + .name = "sdhci-pxav3", + .owner = THIS_MODULE, + }, + .probe = sdhci_pxav3_probe, + .remove = __devexit_p(sdhci_pxav3_remove), +#ifdef CONFIG_PM + .suspend = sdhci_pltfm_suspend, + .resume = sdhci_pltfm_resume, +#endif +}; +static int __init sdhci_pxav3_init(void) +{ + return platform_driver_register(&sdhci_pxav3_driver); +} + +static void __exit sdhci_pxav3_exit(void) +{ + platform_driver_unregister(&sdhci_pxav3_driver); +} + +module_init(sdhci_pxav3_init); +module_exit(sdhci_pxav3_exit); + +MODULE_DESCRIPTION("SDHCI driver for pxav3"); +MODULE_AUTHOR("Marvell International Ltd."); +MODULE_LICENSE("GPL v2"); + diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c index 69e3ee321eb5..460ffaf0f6d7 100644 --- a/drivers/mmc/host/sdhci-s3c.c +++ b/drivers/mmc/host/sdhci-s3c.c @@ -612,16 +612,14 @@ static int sdhci_s3c_suspend(struct platform_device *dev, pm_message_t pm) { struct sdhci_host *host = platform_get_drvdata(dev); - sdhci_suspend_host(host, pm); - return 0; + return sdhci_suspend_host(host, pm); } static int sdhci_s3c_resume(struct platform_device *dev) { struct sdhci_host *host = platform_get_drvdata(dev); - sdhci_resume_host(host); - return 0; + return sdhci_resume_host(host); } #else diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index 343c97edba32..18b0bd31de78 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -24,7 +24,6 @@ #include <mach/gpio.h> #include <mach/sdhci.h> -#include "sdhci.h" #include "sdhci-pltfm.h" static u32 tegra_sdhci_readl(struct sdhci_host *host, int reg) @@ -116,20 +115,42 @@ static int tegra_sdhci_8bit(struct sdhci_host *host, int bus_width) return 0; } +static struct sdhci_ops tegra_sdhci_ops = { + .get_ro = tegra_sdhci_get_ro, + .read_l = tegra_sdhci_readl, + .read_w = tegra_sdhci_readw, + .write_l = tegra_sdhci_writel, + .platform_8bit_width = tegra_sdhci_8bit, +}; + +static struct sdhci_pltfm_data sdhci_tegra_pdata = { + .quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL | + SDHCI_QUIRK_SINGLE_POWER_WRITE | + SDHCI_QUIRK_NO_HISPD_BIT | + SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC, + .ops = &tegra_sdhci_ops, +}; -static int tegra_sdhci_pltfm_init(struct sdhci_host *host, - struct sdhci_pltfm_data *pdata) +static int __devinit sdhci_tegra_probe(struct platform_device *pdev) { - struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc)); + struct sdhci_pltfm_host *pltfm_host; struct tegra_sdhci_platform_data *plat; + struct sdhci_host *host; struct clk *clk; int rc; + host = sdhci_pltfm_init(pdev, &sdhci_tegra_pdata); + if (IS_ERR(host)) + return PTR_ERR(host); + + pltfm_host = sdhci_priv(host); + plat = pdev->dev.platform_data; + if (plat == NULL) { dev_err(mmc_dev(host->mmc), "missing platform data\n"); - return -ENXIO; + rc = -ENXIO; + goto err_no_plat; } if (gpio_is_valid(plat->power_gpio)) { @@ -137,7 +158,7 @@ static int tegra_sdhci_pltfm_init(struct sdhci_host *host, if (rc) { dev_err(mmc_dev(host->mmc), "failed to allocate power gpio\n"); - goto out; + goto err_power_req; } tegra_gpio_enable(plat->power_gpio); gpio_direction_output(plat->power_gpio, 1); @@ -148,7 +169,7 @@ static int tegra_sdhci_pltfm_init(struct sdhci_host *host, if (rc) { dev_err(mmc_dev(host->mmc), "failed to allocate cd gpio\n"); - goto out_power; + goto err_cd_req; } tegra_gpio_enable(plat->cd_gpio); gpio_direction_input(plat->cd_gpio); @@ -159,7 +180,7 @@ static int tegra_sdhci_pltfm_init(struct sdhci_host *host, if (rc) { dev_err(mmc_dev(host->mmc), "request irq error\n"); - goto out_cd; + goto err_cd_irq_req; } } @@ -169,7 +190,7 @@ static int tegra_sdhci_pltfm_init(struct sdhci_host *host, if (rc) { dev_err(mmc_dev(host->mmc), "failed to allocate wp gpio\n"); - goto out_irq; + goto err_wp_req; } tegra_gpio_enable(plat->wp_gpio); gpio_direction_input(plat->wp_gpio); @@ -179,7 +200,7 @@ static int tegra_sdhci_pltfm_init(struct sdhci_host *host, if (IS_ERR(clk)) { dev_err(mmc_dev(host->mmc), "clk err\n"); rc = PTR_ERR(clk); - goto out_wp; + goto err_clk_get; } clk_enable(clk); pltfm_host->clk = clk; @@ -189,38 +210,47 @@ static int tegra_sdhci_pltfm_init(struct sdhci_host *host, if (plat->is_8bit) host->mmc->caps |= MMC_CAP_8_BIT_DATA; + rc = sdhci_add_host(host); + if (rc) + goto err_add_host; + return 0; -out_wp: +err_add_host: + clk_disable(pltfm_host->clk); + clk_put(pltfm_host->clk); +err_clk_get: if (gpio_is_valid(plat->wp_gpio)) { tegra_gpio_disable(plat->wp_gpio); gpio_free(plat->wp_gpio); } - -out_irq: +err_wp_req: if (gpio_is_valid(plat->cd_gpio)) free_irq(gpio_to_irq(plat->cd_gpio), host); -out_cd: +err_cd_irq_req: if (gpio_is_valid(plat->cd_gpio)) { tegra_gpio_disable(plat->cd_gpio); gpio_free(plat->cd_gpio); } - -out_power: +err_cd_req: if (gpio_is_valid(plat->power_gpio)) { tegra_gpio_disable(plat->power_gpio); gpio_free(plat->power_gpio); } - -out: +err_power_req: +err_no_plat: + sdhci_pltfm_free(pdev); return rc; } -static void tegra_sdhci_pltfm_exit(struct sdhci_host *host) +static int __devexit sdhci_tegra_remove(struct platform_device *pdev) { + struct sdhci_host *host = platform_get_drvdata(pdev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc)); struct tegra_sdhci_platform_data *plat; + int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff); + + sdhci_remove_host(host, dead); plat = pdev->dev.platform_data; @@ -242,22 +272,37 @@ static void tegra_sdhci_pltfm_exit(struct sdhci_host *host) clk_disable(pltfm_host->clk); clk_put(pltfm_host->clk); + + sdhci_pltfm_free(pdev); + + return 0; } -static struct sdhci_ops tegra_sdhci_ops = { - .get_ro = tegra_sdhci_get_ro, - .read_l = tegra_sdhci_readl, - .read_w = tegra_sdhci_readw, - .write_l = tegra_sdhci_writel, - .platform_8bit_width = tegra_sdhci_8bit, +static struct platform_driver sdhci_tegra_driver = { + .driver = { + .name = "sdhci-tegra", + .owner = THIS_MODULE, + }, + .probe = sdhci_tegra_probe, + .remove = __devexit_p(sdhci_tegra_remove), +#ifdef CONFIG_PM + .suspend = sdhci_pltfm_suspend, + .resume = sdhci_pltfm_resume, +#endif }; -struct sdhci_pltfm_data sdhci_tegra_pdata = { - .quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL | - SDHCI_QUIRK_SINGLE_POWER_WRITE | - SDHCI_QUIRK_NO_HISPD_BIT | - SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC, - .ops = &tegra_sdhci_ops, - .init = tegra_sdhci_pltfm_init, - .exit = tegra_sdhci_pltfm_exit, -}; +static int __init sdhci_tegra_init(void) +{ + return platform_driver_register(&sdhci_tegra_driver); +} +module_init(sdhci_tegra_init); + +static void __exit sdhci_tegra_exit(void) +{ + platform_driver_unregister(&sdhci_tegra_driver); +} +module_exit(sdhci_tegra_exit); + +MODULE_DESCRIPTION("SDHCI driver for Tegra"); +MODULE_AUTHOR(" Google, Inc."); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 58d5436ff649..c31a3343340d 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -127,11 +127,15 @@ static void sdhci_mask_irqs(struct sdhci_host *host, u32 irqs) static void sdhci_set_card_detection(struct sdhci_host *host, bool enable) { - u32 irqs = SDHCI_INT_CARD_REMOVE | SDHCI_INT_CARD_INSERT; + u32 present, irqs; if (host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) return; + present = sdhci_readl(host, SDHCI_PRESENT_STATE) & + SDHCI_CARD_PRESENT; + irqs = present ? SDHCI_INT_CARD_REMOVE : SDHCI_INT_CARD_INSERT; + if (enable) sdhci_unmask_irqs(host, irqs); else @@ -2154,13 +2158,30 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id) mmc_hostname(host->mmc), intmask); if (intmask & (SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE)) { + u32 present = sdhci_readl(host, SDHCI_PRESENT_STATE) & + SDHCI_CARD_PRESENT; + + /* + * There is a observation on i.mx esdhc. INSERT bit will be + * immediately set again when it gets cleared, if a card is + * inserted. We have to mask the irq to prevent interrupt + * storm which will freeze the system. And the REMOVE gets + * the same situation. + * + * More testing are needed here to ensure it works for other + * platforms though. + */ + sdhci_mask_irqs(host, present ? SDHCI_INT_CARD_INSERT : + SDHCI_INT_CARD_REMOVE); + sdhci_unmask_irqs(host, present ? SDHCI_INT_CARD_REMOVE : + SDHCI_INT_CARD_INSERT); + sdhci_writel(host, intmask & (SDHCI_INT_CARD_INSERT | - SDHCI_INT_CARD_REMOVE), SDHCI_INT_STATUS); + SDHCI_INT_CARD_REMOVE), SDHCI_INT_STATUS); + intmask &= ~(SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE); tasklet_schedule(&host->card_tasklet); } - intmask &= ~(SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE); - if (intmask & SDHCI_INT_CMD_MASK) { sdhci_writel(host, intmask & SDHCI_INT_CMD_MASK, SDHCI_INT_STATUS); @@ -2488,6 +2509,11 @@ int sdhci_add_host(struct sdhci_host *host) } else mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_200; + if (host->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK) + mmc->max_discard_to = (1 << 27) / (mmc->f_max / 1000); + else + mmc->max_discard_to = (1 << 27) / host->timeout_clk; + mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE | MMC_CAP_CMD23; if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12) diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c index 14f8edbaa195..557886bee9ce 100644 --- a/drivers/mmc/host/sh_mmcif.c +++ b/drivers/mmc/host/sh_mmcif.c @@ -175,6 +175,7 @@ struct sh_mmcif_host { enum mmcif_state state; spinlock_t lock; bool power; + bool card_present; /* DMA support */ struct dma_chan *chan_rx; @@ -877,23 +878,23 @@ static void sh_mmcif_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) spin_unlock_irqrestore(&host->lock, flags); if (ios->power_mode == MMC_POWER_UP) { - if (p->set_pwr) - p->set_pwr(host->pd, ios->power_mode); - if (!host->power) { + if (!host->card_present) { /* See if we also get DMA */ sh_mmcif_request_dma(host, host->pd->dev.platform_data); - pm_runtime_get_sync(&host->pd->dev); - host->power = true; + host->card_present = true; } } else if (ios->power_mode == MMC_POWER_OFF || !ios->clock) { /* clock stop */ sh_mmcif_clock_control(host, 0); if (ios->power_mode == MMC_POWER_OFF) { - if (host->power) { - pm_runtime_put(&host->pd->dev); + if (host->card_present) { sh_mmcif_release_dma(host); - host->power = false; + host->card_present = false; } + } + if (host->power) { + pm_runtime_put(&host->pd->dev); + host->power = false; if (p->down_pwr) p->down_pwr(host->pd); } @@ -901,8 +902,16 @@ static void sh_mmcif_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) return; } - if (ios->clock) + if (ios->clock) { + if (!host->power) { + if (p->set_pwr) + p->set_pwr(host->pd, ios->power_mode); + pm_runtime_get_sync(&host->pd->dev); + host->power = true; + sh_mmcif_sync_reset(host); + } sh_mmcif_clock_control(host, ios->clock); + } host->bus_width = ios->bus_width; host->state = STATE_IDLE; diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c index ce500f03df85..774f6439d7ce 100644 --- a/drivers/mmc/host/sh_mobile_sdhi.c +++ b/drivers/mmc/host/sh_mobile_sdhi.c @@ -26,6 +26,7 @@ #include <linux/mmc/sh_mobile_sdhi.h> #include <linux/mfd/tmio.h> #include <linux/sh_dma.h> +#include <linux/delay.h> #include "tmio_mmc.h" @@ -55,6 +56,39 @@ static int sh_mobile_sdhi_get_cd(struct platform_device *pdev) return -ENOSYS; } +static int sh_mobile_sdhi_wait_idle(struct tmio_mmc_host *host) +{ + int timeout = 1000; + + while (--timeout && !(sd_ctrl_read16(host, CTL_STATUS2) & (1 << 13))) + udelay(1); + + if (!timeout) { + dev_warn(host->pdata->dev, "timeout waiting for SD bus idle\n"); + return -EBUSY; + } + + return 0; +} + +static int sh_mobile_sdhi_write16_hook(struct tmio_mmc_host *host, int addr) +{ + switch (addr) + { + case CTL_SD_CMD: + case CTL_STOP_INTERNAL_ACTION: + case CTL_XFER_BLK_COUNT: + case CTL_SD_CARD_CLK_CTL: + case CTL_SD_XFER_LEN: + case CTL_SD_MEM_CARD_OPT: + case CTL_TRANSACTION_CTL: + case CTL_DMA_ENABLE: + return sh_mobile_sdhi_wait_idle(host); + } + + return 0; +} + static int __devinit sh_mobile_sdhi_probe(struct platform_device *pdev) { struct sh_mobile_sdhi *priv; @@ -86,6 +120,8 @@ static int __devinit sh_mobile_sdhi_probe(struct platform_device *pdev) mmc_data->hclk = clk_get_rate(priv->clk); mmc_data->set_pwr = sh_mobile_sdhi_set_pwr; mmc_data->get_cd = sh_mobile_sdhi_get_cd; + if (mmc_data->flags & TMIO_MMC_HAS_IDLE_WAIT) + mmc_data->write16_hook = sh_mobile_sdhi_write16_hook; mmc_data->capabilities = MMC_CAP_MMC_HIGHSPEED; if (p) { mmc_data->flags = p->tmio_flags; diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h index 8260bc2c34e3..087d88023ba1 100644 --- a/drivers/mmc/host/tmio_mmc.h +++ b/drivers/mmc/host/tmio_mmc.h @@ -18,6 +18,7 @@ #include <linux/highmem.h> #include <linux/mmc/tmio.h> +#include <linux/mutex.h> #include <linux/pagemap.h> #include <linux/spinlock.h> @@ -52,6 +53,8 @@ struct tmio_mmc_host { void (*set_clk_div)(struct platform_device *host, int state); int pm_error; + /* recognise system-wide suspend in runtime PM methods */ + bool pm_global; /* pio related stuff */ struct scatterlist *sg_ptr; @@ -73,8 +76,11 @@ struct tmio_mmc_host { /* Track lost interrupts */ struct delayed_work delayed_reset_work; - spinlock_t lock; + struct work_struct done; + + spinlock_t lock; /* protect host private data */ unsigned long last_req_ts; + struct mutex ios_lock; /* protect set_ios() context */ }; int tmio_mmc_host_probe(struct tmio_mmc_host **host, @@ -103,6 +109,7 @@ static inline void tmio_mmc_kunmap_atomic(struct scatterlist *sg, #if defined(CONFIG_MMC_SDHI) || defined(CONFIG_MMC_SDHI_MODULE) void tmio_mmc_start_dma(struct tmio_mmc_host *host, struct mmc_data *data); +void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable); void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdata); void tmio_mmc_release_dma(struct tmio_mmc_host *host); #else @@ -111,6 +118,10 @@ static inline void tmio_mmc_start_dma(struct tmio_mmc_host *host, { } +static inline void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable) +{ +} + static inline void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdata) { @@ -134,4 +145,44 @@ int tmio_mmc_host_resume(struct device *dev); int tmio_mmc_host_runtime_suspend(struct device *dev); int tmio_mmc_host_runtime_resume(struct device *dev); +static inline u16 sd_ctrl_read16(struct tmio_mmc_host *host, int addr) +{ + return readw(host->ctl + (addr << host->bus_shift)); +} + +static inline void sd_ctrl_read16_rep(struct tmio_mmc_host *host, int addr, + u16 *buf, int count) +{ + readsw(host->ctl + (addr << host->bus_shift), buf, count); +} + +static inline u32 sd_ctrl_read32(struct tmio_mmc_host *host, int addr) +{ + return readw(host->ctl + (addr << host->bus_shift)) | + readw(host->ctl + ((addr + 2) << host->bus_shift)) << 16; +} + +static inline void sd_ctrl_write16(struct tmio_mmc_host *host, int addr, u16 val) +{ + /* If there is a hook and it returns non-zero then there + * is an error and the write should be skipped + */ + if (host->pdata->write16_hook && host->pdata->write16_hook(host, addr)) + return; + writew(val, host->ctl + (addr << host->bus_shift)); +} + +static inline void sd_ctrl_write16_rep(struct tmio_mmc_host *host, int addr, + u16 *buf, int count) +{ + writesw(host->ctl + (addr << host->bus_shift), buf, count); +} + +static inline void sd_ctrl_write32(struct tmio_mmc_host *host, int addr, u32 val) +{ + writew(val, host->ctl + (addr << host->bus_shift)); + writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift)); +} + + #endif diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c index bf12513d6297..86f259cdfcbc 100644 --- a/drivers/mmc/host/tmio_mmc_dma.c +++ b/drivers/mmc/host/tmio_mmc_dma.c @@ -23,11 +23,14 @@ #define TMIO_MMC_MIN_DMA_LEN 8 -static void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable) +void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable) { + if (!host->chan_tx || !host->chan_rx) + return; + #if defined(CONFIG_SUPERH) || defined(CONFIG_ARCH_SHMOBILE) /* Switch DMA mode on or off - SuperH specific? */ - writew(enable ? 2 : 0, host->ctl + (0xd8 << host->bus_shift)); + sd_ctrl_write16(host, CTL_DMA_ENABLE, enable ? 2 : 0); #endif } diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c index 0b09e8239aa0..1f16357e7301 100644 --- a/drivers/mmc/host/tmio_mmc_pio.c +++ b/drivers/mmc/host/tmio_mmc_pio.c @@ -46,40 +46,6 @@ #include "tmio_mmc.h" -static u16 sd_ctrl_read16(struct tmio_mmc_host *host, int addr) -{ - return readw(host->ctl + (addr << host->bus_shift)); -} - -static void sd_ctrl_read16_rep(struct tmio_mmc_host *host, int addr, - u16 *buf, int count) -{ - readsw(host->ctl + (addr << host->bus_shift), buf, count); -} - -static u32 sd_ctrl_read32(struct tmio_mmc_host *host, int addr) -{ - return readw(host->ctl + (addr << host->bus_shift)) | - readw(host->ctl + ((addr + 2) << host->bus_shift)) << 16; -} - -static void sd_ctrl_write16(struct tmio_mmc_host *host, int addr, u16 val) -{ - writew(val, host->ctl + (addr << host->bus_shift)); -} - -static void sd_ctrl_write16_rep(struct tmio_mmc_host *host, int addr, - u16 *buf, int count) -{ - writesw(host->ctl + (addr << host->bus_shift), buf, count); -} - -static void sd_ctrl_write32(struct tmio_mmc_host *host, int addr, u32 val) -{ - writew(val, host->ctl + (addr << host->bus_shift)); - writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift)); -} - void tmio_mmc_enable_mmc_irqs(struct tmio_mmc_host *host, u32 i) { u32 mask = sd_ctrl_read32(host, CTL_IRQ_MASK) & ~(i & TMIO_MASK_IRQ); @@ -284,10 +250,16 @@ static void tmio_mmc_reset_work(struct work_struct *work) /* called with host->lock held, interrupts disabled */ static void tmio_mmc_finish_request(struct tmio_mmc_host *host) { - struct mmc_request *mrq = host->mrq; + struct mmc_request *mrq; + unsigned long flags; + + spin_lock_irqsave(&host->lock, flags); - if (!mrq) + mrq = host->mrq; + if (IS_ERR_OR_NULL(mrq)) { + spin_unlock_irqrestore(&host->lock, flags); return; + } host->cmd = NULL; host->data = NULL; @@ -296,11 +268,18 @@ static void tmio_mmc_finish_request(struct tmio_mmc_host *host) cancel_delayed_work(&host->delayed_reset_work); host->mrq = NULL; + spin_unlock_irqrestore(&host->lock, flags); - /* FIXME: mmc_request_done() can schedule! */ mmc_request_done(host->mmc, mrq); } +static void tmio_mmc_done_work(struct work_struct *work) +{ + struct tmio_mmc_host *host = container_of(work, struct tmio_mmc_host, + done); + tmio_mmc_finish_request(host); +} + /* These are the bitmasks the tmio chip requires to implement the MMC response * types. Note that R1 and R6 are the same in this scheme. */ #define APP_CMD 0x0040 @@ -467,7 +446,7 @@ void tmio_mmc_do_data_irq(struct tmio_mmc_host *host) BUG(); } - tmio_mmc_finish_request(host); + schedule_work(&host->done); } static void tmio_mmc_data_irq(struct tmio_mmc_host *host) @@ -557,7 +536,7 @@ static void tmio_mmc_cmd_irq(struct tmio_mmc_host *host, tasklet_schedule(&host->dma_issue); } } else { - tmio_mmc_finish_request(host); + schedule_work(&host->done); } out: @@ -567,6 +546,7 @@ out: irqreturn_t tmio_mmc_irq(int irq, void *devid) { struct tmio_mmc_host *host = devid; + struct mmc_host *mmc = host->mmc; struct tmio_mmc_data *pdata = host->pdata; unsigned int ireg, irq_mask, status; unsigned int sdio_ireg, sdio_irq_mask, sdio_status; @@ -588,13 +568,13 @@ irqreturn_t tmio_mmc_irq(int irq, void *devid) if (sdio_ireg && !host->sdio_irq_enabled) { pr_warning("tmio_mmc: Spurious SDIO IRQ, disabling! 0x%04x 0x%04x 0x%04x\n", sdio_status, sdio_irq_mask, sdio_ireg); - tmio_mmc_enable_sdio_irq(host->mmc, 0); + tmio_mmc_enable_sdio_irq(mmc, 0); goto out; } - if (host->mmc->caps & MMC_CAP_SDIO_IRQ && + if (mmc->caps & MMC_CAP_SDIO_IRQ && sdio_ireg & TMIO_SDIO_STAT_IOIRQ) - mmc_signal_sdio_irq(host->mmc); + mmc_signal_sdio_irq(mmc); if (sdio_ireg) goto out; @@ -603,58 +583,49 @@ irqreturn_t tmio_mmc_irq(int irq, void *devid) pr_debug_status(status); pr_debug_status(ireg); - if (!ireg) { - tmio_mmc_disable_mmc_irqs(host, status & ~irq_mask); - - pr_warning("tmio_mmc: Spurious irq, disabling! " - "0x%08x 0x%08x 0x%08x\n", status, irq_mask, ireg); - pr_debug_status(status); - + /* Card insert / remove attempts */ + if (ireg & (TMIO_STAT_CARD_INSERT | TMIO_STAT_CARD_REMOVE)) { + tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_CARD_INSERT | + TMIO_STAT_CARD_REMOVE); + if ((((ireg & TMIO_STAT_CARD_REMOVE) && mmc->card) || + ((ireg & TMIO_STAT_CARD_INSERT) && !mmc->card)) && + !work_pending(&mmc->detect.work)) + mmc_detect_change(host->mmc, msecs_to_jiffies(100)); goto out; } - while (ireg) { - /* Card insert / remove attempts */ - if (ireg & (TMIO_STAT_CARD_INSERT | TMIO_STAT_CARD_REMOVE)) { - tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_CARD_INSERT | - TMIO_STAT_CARD_REMOVE); - mmc_detect_change(host->mmc, msecs_to_jiffies(100)); - } - - /* CRC and other errors */ -/* if (ireg & TMIO_STAT_ERR_IRQ) - * handled |= tmio_error_irq(host, irq, stat); + /* CRC and other errors */ +/* if (ireg & TMIO_STAT_ERR_IRQ) + * handled |= tmio_error_irq(host, irq, stat); */ - /* Command completion */ - if (ireg & (TMIO_STAT_CMDRESPEND | TMIO_STAT_CMDTIMEOUT)) { - tmio_mmc_ack_mmc_irqs(host, - TMIO_STAT_CMDRESPEND | - TMIO_STAT_CMDTIMEOUT); - tmio_mmc_cmd_irq(host, status); - } - - /* Data transfer */ - if (ireg & (TMIO_STAT_RXRDY | TMIO_STAT_TXRQ)) { - tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_RXRDY | TMIO_STAT_TXRQ); - tmio_mmc_pio_irq(host); - } - - /* Data transfer completion */ - if (ireg & TMIO_STAT_DATAEND) { - tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_DATAEND); - tmio_mmc_data_irq(host); - } + /* Command completion */ + if (ireg & (TMIO_STAT_CMDRESPEND | TMIO_STAT_CMDTIMEOUT)) { + tmio_mmc_ack_mmc_irqs(host, + TMIO_STAT_CMDRESPEND | + TMIO_STAT_CMDTIMEOUT); + tmio_mmc_cmd_irq(host, status); + goto out; + } - /* Check status - keep going until we've handled it all */ - status = sd_ctrl_read32(host, CTL_STATUS); - irq_mask = sd_ctrl_read32(host, CTL_IRQ_MASK); - ireg = status & TMIO_MASK_IRQ & ~irq_mask; + /* Data transfer */ + if (ireg & (TMIO_STAT_RXRDY | TMIO_STAT_TXRQ)) { + tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_RXRDY | TMIO_STAT_TXRQ); + tmio_mmc_pio_irq(host); + goto out; + } - pr_debug("Status at end of loop: %08x\n", status); - pr_debug_status(status); + /* Data transfer completion */ + if (ireg & TMIO_STAT_DATAEND) { + tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_DATAEND); + tmio_mmc_data_irq(host); + goto out; } - pr_debug("MMC IRQ end\n"); + + pr_warning("tmio_mmc: Spurious irq, disabling! " + "0x%08x 0x%08x 0x%08x\n", status, irq_mask, ireg); + pr_debug_status(status); + tmio_mmc_disable_mmc_irqs(host, status & ~irq_mask); out: return IRQ_HANDLED; @@ -749,6 +720,8 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) struct tmio_mmc_data *pdata = host->pdata; unsigned long flags; + mutex_lock(&host->ios_lock); + spin_lock_irqsave(&host->lock, flags); if (host->mrq) { if (IS_ERR(host->mrq)) { @@ -764,6 +737,8 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) host->mrq->cmd->opcode, host->last_req_ts, jiffies); } spin_unlock_irqrestore(&host->lock, flags); + + mutex_unlock(&host->ios_lock); return; } @@ -771,33 +746,30 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) spin_unlock_irqrestore(&host->lock, flags); - if (ios->clock) - tmio_mmc_set_clock(host, ios->clock); - - /* Power sequence - OFF -> UP -> ON */ - if (ios->power_mode == MMC_POWER_UP) { - if ((pdata->flags & TMIO_MMC_HAS_COLD_CD) && !pdata->power) { + /* + * pdata->power == false only if COLD_CD is available, otherwise only + * in short time intervals during probing or resuming + */ + if (ios->power_mode == MMC_POWER_ON && ios->clock) { + if (!pdata->power) { pm_runtime_get_sync(&host->pdev->dev); pdata->power = true; } + tmio_mmc_set_clock(host, ios->clock); /* power up SD bus */ if (host->set_pwr) host->set_pwr(host->pdev, 1); - } else if (ios->power_mode == MMC_POWER_OFF || !ios->clock) { - /* power down SD bus */ - if (ios->power_mode == MMC_POWER_OFF) { - if (host->set_pwr) - host->set_pwr(host->pdev, 0); - if ((pdata->flags & TMIO_MMC_HAS_COLD_CD) && - pdata->power) { - pdata->power = false; - pm_runtime_put(&host->pdev->dev); - } - } - tmio_mmc_clk_stop(host); - } else { /* start bus clock */ tmio_mmc_clk_start(host); + } else if (ios->power_mode != MMC_POWER_UP) { + if (host->set_pwr) + host->set_pwr(host->pdev, 0); + if ((pdata->flags & TMIO_MMC_HAS_COLD_CD) && + pdata->power) { + pdata->power = false; + pm_runtime_put(&host->pdev->dev); + } + tmio_mmc_clk_stop(host); } switch (ios->bus_width) { @@ -817,6 +789,8 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) current->comm, task_pid_nr(current), ios->clock, ios->power_mode); host->mrq = NULL; + + mutex_unlock(&host->ios_lock); } static int tmio_mmc_get_ro(struct mmc_host *mmc) @@ -913,16 +887,20 @@ int __devinit tmio_mmc_host_probe(struct tmio_mmc_host **host, tmio_mmc_enable_sdio_irq(mmc, 0); spin_lock_init(&_host->lock); + mutex_init(&_host->ios_lock); /* Init delayed work for request timeouts */ INIT_DELAYED_WORK(&_host->delayed_reset_work, tmio_mmc_reset_work); + INIT_WORK(&_host->done, tmio_mmc_done_work); /* See if we also get DMA */ tmio_mmc_request_dma(_host, pdata); /* We have to keep the device powered for its card detection to work */ - if (!(pdata->flags & TMIO_MMC_HAS_COLD_CD)) + if (!(pdata->flags & TMIO_MMC_HAS_COLD_CD)) { + pdata->power = true; pm_runtime_get_noresume(&pdev->dev); + } mmc_add_host(mmc); @@ -963,6 +941,7 @@ void tmio_mmc_host_remove(struct tmio_mmc_host *host) pm_runtime_get_sync(&pdev->dev); mmc_remove_host(host->mmc); + cancel_work_sync(&host->done); cancel_delayed_work_sync(&host->delayed_reset_work); tmio_mmc_release_dma(host); @@ -998,11 +977,16 @@ int tmio_mmc_host_resume(struct device *dev) /* The MMC core will perform the complete set up */ host->pdata->power = false; + host->pm_global = true; if (!host->pm_error) pm_runtime_get_sync(dev); - tmio_mmc_reset(mmc_priv(mmc)); - tmio_mmc_request_dma(host, host->pdata); + if (host->pm_global) { + /* Runtime PM resume callback didn't run */ + tmio_mmc_reset(host); + tmio_mmc_enable_dma(host, true); + host->pm_global = false; + } return mmc_resume_host(mmc); } @@ -1023,12 +1007,15 @@ int tmio_mmc_host_runtime_resume(struct device *dev) struct tmio_mmc_data *pdata = host->pdata; tmio_mmc_reset(host); + tmio_mmc_enable_dma(host, true); if (pdata->power) { /* Only entered after a card-insert interrupt */ - tmio_mmc_set_ios(mmc, &mmc->ios); + if (!mmc->card) + tmio_mmc_set_ios(mmc, &mmc->ios); mmc_detect_change(mmc, msecs_to_jiffies(100)); } + host->pm_global = false; return 0; } diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 65626c1c446d..6c3fb5ab20f5 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -953,10 +953,14 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) if (!ubi->peb_buf2) goto out_free; + err = ubi_debugging_init_dev(ubi); + if (err) + goto out_free; + err = attach_by_scanning(ubi); if (err) { dbg_err("failed to attach by scanning, error %d", err); - goto out_free; + goto out_debugging; } if (ubi->autoresize_vol_id != -1) { @@ -969,12 +973,16 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) if (err) goto out_detach; + err = ubi_debugfs_init_dev(ubi); + if (err) + goto out_uif; + ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name); if (IS_ERR(ubi->bgt_thread)) { err = PTR_ERR(ubi->bgt_thread); ubi_err("cannot spawn \"%s\", error %d", ubi->bgt_name, err); - goto out_uif; + goto out_debugfs; } ubi_msg("attached mtd%d to ubi%d", mtd->index, ubi_num); @@ -1008,12 +1016,18 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) ubi_notify_all(ubi, UBI_VOLUME_ADDED, NULL); return ubi_num; +out_debugfs: + ubi_debugfs_exit_dev(ubi); out_uif: + get_device(&ubi->dev); + ubi_assert(ref); uif_close(ubi); out_detach: ubi_wl_close(ubi); free_internal_volumes(ubi); vfree(ubi->vtbl); +out_debugging: + ubi_debugging_exit_dev(ubi); out_free: vfree(ubi->peb_buf1); vfree(ubi->peb_buf2); @@ -1080,11 +1094,13 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway) */ get_device(&ubi->dev); + ubi_debugfs_exit_dev(ubi); uif_close(ubi); ubi_wl_close(ubi); free_internal_volumes(ubi); vfree(ubi->vtbl); put_mtd_device(ubi->mtd); + ubi_debugging_exit_dev(ubi); vfree(ubi->peb_buf1); vfree(ubi->peb_buf2); ubi_msg("mtd%d is detached from ubi%d", ubi->mtd->index, ubi->ubi_num); @@ -1199,6 +1215,11 @@ static int __init ubi_init(void) if (!ubi_wl_entry_slab) goto out_dev_unreg; + err = ubi_debugfs_init(); + if (err) + goto out_slab; + + /* Attach MTD devices */ for (i = 0; i < mtd_devs; i++) { struct mtd_dev_param *p = &mtd_dev_param[i]; @@ -1247,6 +1268,8 @@ out_detach: ubi_detach_mtd_dev(ubi_devices[k]->ubi_num, 1); mutex_unlock(&ubi_devices_mutex); } + ubi_debugfs_exit(); +out_slab: kmem_cache_destroy(ubi_wl_entry_slab); out_dev_unreg: misc_deregister(&ubi_ctrl_cdev); @@ -1270,6 +1293,7 @@ static void __exit ubi_exit(void) ubi_detach_mtd_dev(ubi_devices[i]->ubi_num, 1); mutex_unlock(&ubi_devices_mutex); } + ubi_debugfs_exit(); kmem_cache_destroy(ubi_wl_entry_slab); misc_deregister(&ubi_ctrl_cdev); class_remove_file(ubi_class, &ubi_version); diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c index 2224cbe41ddf..ab80c0debac8 100644 --- a/drivers/mtd/ubi/debug.c +++ b/drivers/mtd/ubi/debug.c @@ -27,17 +27,9 @@ #ifdef CONFIG_MTD_UBI_DEBUG #include "ubi.h" +#include <linux/debugfs.h> +#include <linux/uaccess.h> #include <linux/module.h> -#include <linux/moduleparam.h> - -unsigned int ubi_chk_flags; -unsigned int ubi_tst_flags; - -module_param_named(debug_chks, ubi_chk_flags, uint, S_IRUGO | S_IWUSR); -module_param_named(debug_tsts, ubi_chk_flags, uint, S_IRUGO | S_IWUSR); - -MODULE_PARM_DESC(debug_chks, "Debug check flags"); -MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); /** * ubi_dbg_dump_ec_hdr - dump an erase counter header. @@ -239,4 +231,261 @@ out: return; } +/** + * ubi_debugging_init_dev - initialize debugging for an UBI device. + * @ubi: UBI device description object + * + * This function initializes debugging-related data for UBI device @ubi. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubi_debugging_init_dev(struct ubi_device *ubi) +{ + ubi->dbg = kzalloc(sizeof(struct ubi_debug_info), GFP_KERNEL); + if (!ubi->dbg) + return -ENOMEM; + + return 0; +} + +/** + * ubi_debugging_exit_dev - free debugging data for an UBI device. + * @ubi: UBI device description object + */ +void ubi_debugging_exit_dev(struct ubi_device *ubi) +{ + kfree(ubi->dbg); +} + +/* + * Root directory for UBI stuff in debugfs. Contains sub-directories which + * contain the stuff specific to particular UBI devices. + */ +static struct dentry *dfs_rootdir; + +/** + * ubi_debugfs_init - create UBI debugfs directory. + * + * Create UBI debugfs directory. Returns zero in case of success and a negative + * error code in case of failure. + */ +int ubi_debugfs_init(void) +{ + dfs_rootdir = debugfs_create_dir("ubi", NULL); + if (IS_ERR_OR_NULL(dfs_rootdir)) { + int err = dfs_rootdir ? -ENODEV : PTR_ERR(dfs_rootdir); + + ubi_err("cannot create \"ubi\" debugfs directory, error %d\n", + err); + return err; + } + + return 0; +} + +/** + * ubi_debugfs_exit - remove UBI debugfs directory. + */ +void ubi_debugfs_exit(void) +{ + debugfs_remove(dfs_rootdir); +} + +/* Read an UBI debugfs file */ +static ssize_t dfs_file_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + unsigned long ubi_num = (unsigned long)file->private_data; + struct dentry *dent = file->f_path.dentry; + struct ubi_device *ubi; + struct ubi_debug_info *d; + char buf[3]; + int val; + + ubi = ubi_get_device(ubi_num); + if (!ubi) + return -ENODEV; + d = ubi->dbg; + + if (dent == d->dfs_chk_gen) + val = d->chk_gen; + else if (dent == d->dfs_chk_io) + val = d->chk_io; + else if (dent == d->dfs_disable_bgt) + val = d->disable_bgt; + else if (dent == d->dfs_emulate_bitflips) + val = d->emulate_bitflips; + else if (dent == d->dfs_emulate_io_failures) + val = d->emulate_io_failures; + else { + count = -EINVAL; + goto out; + } + + if (val) + buf[0] = '1'; + else + buf[0] = '0'; + buf[1] = '\n'; + buf[2] = 0x00; + + count = simple_read_from_buffer(user_buf, count, ppos, buf, 2); + +out: + ubi_put_device(ubi); + return count; +} + +/* Write an UBI debugfs file */ +static ssize_t dfs_file_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + unsigned long ubi_num = (unsigned long)file->private_data; + struct dentry *dent = file->f_path.dentry; + struct ubi_device *ubi; + struct ubi_debug_info *d; + size_t buf_size; + char buf[8]; + int val; + + ubi = ubi_get_device(ubi_num); + if (!ubi) + return -ENODEV; + d = ubi->dbg; + + buf_size = min_t(size_t, count, (sizeof(buf) - 1)); + if (copy_from_user(buf, user_buf, buf_size)) { + count = -EFAULT; + goto out; + } + + if (buf[0] == '1') + val = 1; + else if (buf[0] == '0') + val = 0; + else { + count = -EINVAL; + goto out; + } + + if (dent == d->dfs_chk_gen) + d->chk_gen = val; + else if (dent == d->dfs_chk_io) + d->chk_io = val; + else if (dent == d->dfs_disable_bgt) + d->disable_bgt = val; + else if (dent == d->dfs_emulate_bitflips) + d->emulate_bitflips = val; + else if (dent == d->dfs_emulate_io_failures) + d->emulate_io_failures = val; + else + count = -EINVAL; + +out: + ubi_put_device(ubi); + return count; +} + +static int default_open(struct inode *inode, struct file *file) +{ + if (inode->i_private) + file->private_data = inode->i_private; + + return 0; +} + +/* File operations for all UBI debugfs files */ +static const struct file_operations dfs_fops = { + .read = dfs_file_read, + .write = dfs_file_write, + .open = default_open, + .llseek = no_llseek, + .owner = THIS_MODULE, +}; + +/** + * ubi_debugfs_init_dev - initialize debugfs for an UBI device. + * @ubi: UBI device description object + * + * This function creates all debugfs files for UBI device @ubi. Returns zero in + * case of success and a negative error code in case of failure. + */ +int ubi_debugfs_init_dev(struct ubi_device *ubi) +{ + int err, n; + unsigned long ubi_num = ubi->ubi_num; + const char *fname; + struct dentry *dent; + struct ubi_debug_info *d = ubi->dbg; + + n = snprintf(d->dfs_dir_name, UBI_DFS_DIR_LEN + 1, UBI_DFS_DIR_NAME, + ubi->ubi_num); + if (n == UBI_DFS_DIR_LEN) { + /* The array size is too small */ + fname = UBI_DFS_DIR_NAME; + dent = ERR_PTR(-EINVAL); + goto out; + } + + fname = d->dfs_dir_name; + dent = debugfs_create_dir(fname, dfs_rootdir); + if (IS_ERR_OR_NULL(dent)) + goto out; + d->dfs_dir = dent; + + fname = "chk_gen"; + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_gen = dent; + + fname = "chk_io"; + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_io = dent; + + fname = "tst_disable_bgt"; + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_disable_bgt = dent; + + fname = "tst_emulate_bitflips"; + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_emulate_bitflips = dent; + + fname = "tst_emulate_io_failures"; + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_emulate_io_failures = dent; + + return 0; + +out_remove: + debugfs_remove_recursive(d->dfs_dir); +out: + err = dent ? PTR_ERR(dent) : -ENODEV; + ubi_err("cannot create \"%s\" debugfs file or directory, error %d\n", + fname, err); + return err; +} + +/** + * dbg_debug_exit_dev - free all debugfs files corresponding to device @ubi + * @ubi: UBI device description object + */ +void ubi_debugfs_exit_dev(struct ubi_device *ubi) +{ + debugfs_remove_recursive(ubi->dbg->dfs_dir); +} + #endif /* CONFIG_MTD_UBI_DEBUG */ diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h index 3f1a09c5c438..65b5b76cc379 100644 --- a/drivers/mtd/ubi/debug.h +++ b/drivers/mtd/ubi/debug.h @@ -21,14 +21,6 @@ #ifndef __UBI_DEBUG_H__ #define __UBI_DEBUG_H__ -struct ubi_ec_hdr; -struct ubi_vid_hdr; -struct ubi_volume; -struct ubi_vtbl_record; -struct ubi_scan_volume; -struct ubi_scan_leb; -struct ubi_mkvol_req; - #ifdef CONFIG_MTD_UBI_DEBUG #include <linux/random.h> @@ -71,86 +63,103 @@ void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv); void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type); void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req); void ubi_dbg_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len); - -extern unsigned int ubi_chk_flags; - -/* - * Debugging check flags. - * - * UBI_CHK_GEN: general checks - * UBI_CHK_IO: check writes and erases - */ -enum { - UBI_CHK_GEN = 0x1, - UBI_CHK_IO = 0x2, -}; - int ubi_dbg_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len); int ubi_dbg_check_write(struct ubi_device *ubi, const void *buf, int pnum, int offset, int len); - -extern unsigned int ubi_tst_flags; +int ubi_debugging_init_dev(struct ubi_device *ubi); +void ubi_debugging_exit_dev(struct ubi_device *ubi); +int ubi_debugfs_init(void); +void ubi_debugfs_exit(void); +int ubi_debugfs_init_dev(struct ubi_device *ubi); +void ubi_debugfs_exit_dev(struct ubi_device *ubi); /* - * Special testing flags. + * The UBI debugfs directory name pattern and maximum name length (3 for "ubi" + * + 2 for the number plus 1 for the trailing zero byte. + */ +#define UBI_DFS_DIR_NAME "ubi%d" +#define UBI_DFS_DIR_LEN (3 + 2 + 1) + +/** + * struct ubi_debug_info - debugging information for an UBI device. * - * UBIFS_TST_DISABLE_BGT: disable the background thread - * UBI_TST_EMULATE_BITFLIPS: emulate bit-flips - * UBI_TST_EMULATE_WRITE_FAILURES: emulate write failures - * UBI_TST_EMULATE_ERASE_FAILURES: emulate erase failures + * @chk_gen: if UBI general extra checks are enabled + * @chk_io: if UBI I/O extra checks are enabled + * @disable_bgt: disable the background task for testing purposes + * @emulate_bitflips: emulate bit-flips for testing purposes + * @emulate_io_failures: emulate write/erase failures for testing purposes + * @dfs_dir_name: name of debugfs directory containing files of this UBI device + * @dfs_dir: direntry object of the UBI device debugfs directory + * @dfs_chk_gen: debugfs knob to enable UBI general extra checks + * @dfs_chk_io: debugfs knob to enable UBI I/O extra checks + * @dfs_disable_bgt: debugfs knob to disable the background task + * @dfs_emulate_bitflips: debugfs knob to emulate bit-flips + * @dfs_emulate_io_failures: debugfs knob to emulate write/erase failures */ -enum { - UBI_TST_DISABLE_BGT = 0x1, - UBI_TST_EMULATE_BITFLIPS = 0x2, - UBI_TST_EMULATE_WRITE_FAILURES = 0x4, - UBI_TST_EMULATE_ERASE_FAILURES = 0x8, +struct ubi_debug_info { + unsigned int chk_gen:1; + unsigned int chk_io:1; + unsigned int disable_bgt:1; + unsigned int emulate_bitflips:1; + unsigned int emulate_io_failures:1; + char dfs_dir_name[UBI_DFS_DIR_LEN + 1]; + struct dentry *dfs_dir; + struct dentry *dfs_chk_gen; + struct dentry *dfs_chk_io; + struct dentry *dfs_disable_bgt; + struct dentry *dfs_emulate_bitflips; + struct dentry *dfs_emulate_io_failures; }; /** * ubi_dbg_is_bgt_disabled - if the background thread is disabled. + * @ubi: UBI device description object * * Returns non-zero if the UBI background thread is disabled for testing * purposes. */ -static inline int ubi_dbg_is_bgt_disabled(void) +static inline int ubi_dbg_is_bgt_disabled(const struct ubi_device *ubi) { - return ubi_tst_flags & UBI_TST_DISABLE_BGT; + return ubi->dbg->disable_bgt; } /** * ubi_dbg_is_bitflip - if it is time to emulate a bit-flip. + * @ubi: UBI device description object * * Returns non-zero if a bit-flip should be emulated, otherwise returns zero. */ -static inline int ubi_dbg_is_bitflip(void) +static inline int ubi_dbg_is_bitflip(const struct ubi_device *ubi) { - if (ubi_tst_flags & UBI_TST_EMULATE_BITFLIPS) + if (ubi->dbg->emulate_bitflips) return !(random32() % 200); return 0; } /** * ubi_dbg_is_write_failure - if it is time to emulate a write failure. + * @ubi: UBI device description object * * Returns non-zero if a write failure should be emulated, otherwise returns * zero. */ -static inline int ubi_dbg_is_write_failure(void) +static inline int ubi_dbg_is_write_failure(const struct ubi_device *ubi) { - if (ubi_tst_flags & UBI_TST_EMULATE_WRITE_FAILURES) + if (ubi->dbg->emulate_io_failures) return !(random32() % 500); return 0; } /** * ubi_dbg_is_erase_failure - if its time to emulate an erase failure. + * @ubi: UBI device description object * * Returns non-zero if an erase failure should be emulated, otherwise returns * zero. */ -static inline int ubi_dbg_is_erase_failure(void) +static inline int ubi_dbg_is_erase_failure(const struct ubi_device *ubi) { - if (ubi_tst_flags & UBI_TST_EMULATE_ERASE_FAILURES) + if (ubi->dbg->emulate_io_failures) return !(random32() % 400); return 0; } @@ -201,11 +210,6 @@ static inline void ubi_dbg_dump_flash(struct ubi_device *ubi, static inline void ubi_dbg_print_hex_dump(const char *l, const char *ps, int pt, int r, int g, const void *b, size_t len, bool a) { return; } - -static inline int ubi_dbg_is_bgt_disabled(void) { return 0; } -static inline int ubi_dbg_is_bitflip(void) { return 0; } -static inline int ubi_dbg_is_write_failure(void) { return 0; } -static inline int ubi_dbg_is_erase_failure(void) { return 0; } static inline int ubi_dbg_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len) { return 0; } @@ -213,5 +217,20 @@ static inline int ubi_dbg_check_write(struct ubi_device *ubi, const void *buf, int pnum, int offset, int len) { return 0; } +static inline int ubi_debugging_init_dev(struct ubi_device *ubi) { return 0; } +static inline void ubi_debugging_exit_dev(struct ubi_device *ubi) { return; } +static inline int ubi_debugfs_init(void) { return 0; } +static inline void ubi_debugfs_exit(void) { return; } +static inline int ubi_debugfs_init_dev(struct ubi_device *ubi) { return 0; } +static inline void ubi_debugfs_exit_dev(struct ubi_device *ubi) { return; } + +static inline int +ubi_dbg_is_bgt_disabled(const struct ubi_device *ubi) { return 0; } +static inline int ubi_dbg_is_bitflip(const struct ubi_device *ubi) { return 0; } +static inline int +ubi_dbg_is_write_failure(const struct ubi_device *ubi) { return 0; } +static inline int +ubi_dbg_is_erase_failure(const struct ubi_device *ubi) { return 0; } + #endif /* !CONFIG_MTD_UBI_DEBUG */ #endif /* !__UBI_DEBUG_H__ */ diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index 8c1b1c7bc4a7..6ba55c235873 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -212,7 +212,7 @@ retry: } else { ubi_assert(len == read); - if (ubi_dbg_is_bitflip()) { + if (ubi_dbg_is_bitflip(ubi)) { dbg_gen("bit-flip (emulated)"); err = UBI_IO_BITFLIPS; } @@ -281,7 +281,7 @@ int ubi_io_write(struct ubi_device *ubi, const void *buf, int pnum, int offset, return err; } - if (ubi_dbg_is_write_failure()) { + if (ubi_dbg_is_write_failure(ubi)) { dbg_err("cannot write %d bytes to PEB %d:%d " "(emulated)", len, pnum, offset); ubi_dbg_dump_stack(); @@ -396,7 +396,7 @@ retry: if (err) return err; - if (ubi_dbg_is_erase_failure()) { + if (ubi_dbg_is_erase_failure(ubi)) { dbg_err("cannot erase PEB %d (emulated)", pnum); return -EIO; } @@ -1146,7 +1146,7 @@ static int paranoid_check_not_bad(const struct ubi_device *ubi, int pnum) { int err; - if (!(ubi_chk_flags & UBI_CHK_IO)) + if (!ubi->dbg->chk_io) return 0; err = ubi_io_is_bad(ubi, pnum); @@ -1173,7 +1173,7 @@ static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum, int err; uint32_t magic; - if (!(ubi_chk_flags & UBI_CHK_IO)) + if (!ubi->dbg->chk_io) return 0; magic = be32_to_cpu(ec_hdr->magic); @@ -1211,7 +1211,7 @@ static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum) uint32_t crc, hdr_crc; struct ubi_ec_hdr *ec_hdr; - if (!(ubi_chk_flags & UBI_CHK_IO)) + if (!ubi->dbg->chk_io) return 0; ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); @@ -1255,7 +1255,7 @@ static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum, int err; uint32_t magic; - if (!(ubi_chk_flags & UBI_CHK_IO)) + if (!ubi->dbg->chk_io) return 0; magic = be32_to_cpu(vid_hdr->magic); @@ -1296,7 +1296,7 @@ static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum) struct ubi_vid_hdr *vid_hdr; void *p; - if (!(ubi_chk_flags & UBI_CHK_IO)) + if (!ubi->dbg->chk_io) return 0; vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); @@ -1348,7 +1348,7 @@ int ubi_dbg_check_write(struct ubi_device *ubi, const void *buf, int pnum, void *buf1; loff_t addr = (loff_t)pnum * ubi->peb_size + offset; - if (!(ubi_chk_flags & UBI_CHK_IO)) + if (!ubi->dbg->chk_io) return 0; buf1 = __vmalloc(len, GFP_NOFS, PAGE_KERNEL); @@ -1412,7 +1412,7 @@ int ubi_dbg_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len) void *buf; loff_t addr = (loff_t)pnum * ubi->peb_size + offset; - if (!(ubi_chk_flags & UBI_CHK_IO)) + if (!ubi->dbg->chk_io) return 0; buf = __vmalloc(len, GFP_NOFS, PAGE_KERNEL); diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c index 2135a53732ff..a3a198f9b98d 100644 --- a/drivers/mtd/ubi/scan.c +++ b/drivers/mtd/ubi/scan.c @@ -1347,7 +1347,7 @@ static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si) struct ubi_scan_leb *seb, *last_seb; uint8_t *buf; - if (!(ubi_chk_flags & UBI_CHK_GEN)) + if (!ubi->dbg->chk_gen) return 0; /* diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index c6c22295898e..dc64c767fd21 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -44,7 +44,6 @@ #include "ubi-media.h" #include "scan.h" -#include "debug.h" /* Maximum number of supported UBI devices */ #define UBI_MAX_DEVICES 32 @@ -390,6 +389,8 @@ struct ubi_wl_entry; * @peb_buf2: another buffer of PEB size used for different purposes * @buf_mutex: protects @peb_buf1 and @peb_buf2 * @ckvol_mutex: serializes static volume checking when opening + * + * @dbg: debugging information for this UBI device */ struct ubi_device { struct cdev cdev; @@ -472,8 +473,12 @@ struct ubi_device { void *peb_buf2; struct mutex buf_mutex; struct mutex ckvol_mutex; + + struct ubi_debug_info *dbg; }; +#include "debug.h" + extern struct kmem_cache *ubi_wl_entry_slab; extern const struct file_operations ubi_ctrl_cdev_operations; extern const struct file_operations ubi_cdev_operations; @@ -662,6 +667,7 @@ static inline void ubi_ro_mode(struct ubi_device *ubi) if (!ubi->ro_mode) { ubi->ro_mode = 1; ubi_warn("switch to read-only mode"); + ubi_dbg_dump_stack(); } } diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c index 366eb70219a6..97e093d19672 100644 --- a/drivers/mtd/ubi/vmt.c +++ b/drivers/mtd/ubi/vmt.c @@ -871,7 +871,7 @@ static int paranoid_check_volumes(struct ubi_device *ubi) { int i, err = 0; - if (!(ubi_chk_flags & UBI_CHK_GEN)) + if (!ubi->dbg->chk_gen) return 0; for (i = 0; i < ubi->vtbl_slots; i++) { diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index fd3bf770f518..4b50a3029b84 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -307,8 +307,7 @@ static int create_vtbl(struct ubi_device *ubi, struct ubi_scan_info *si, { int err, tries = 0; static struct ubi_vid_hdr *vid_hdr; - struct ubi_scan_volume *sv; - struct ubi_scan_leb *new_seb, *old_seb = NULL; + struct ubi_scan_leb *new_seb; ubi_msg("create volume table (copy #%d)", copy + 1); @@ -316,15 +315,6 @@ static int create_vtbl(struct ubi_device *ubi, struct ubi_scan_info *si, if (!vid_hdr) return -ENOMEM; - /* - * Check if there is a logical eraseblock which would have to contain - * this volume table copy was found during scanning. It has to be wiped - * out. - */ - sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOLUME_ID); - if (sv) - old_seb = ubi_scan_find_seb(sv, copy); - retry: new_seb = ubi_scan_get_free_peb(ubi, si); if (IS_ERR(new_seb)) { @@ -351,8 +341,8 @@ retry: goto write_error; /* - * And add it to the scanning information. Don't delete the old - * @old_seb as it will be deleted and freed in 'ubi_scan_add_used()'. + * And add it to the scanning information. Don't delete the old version + * of this LEB as it will be deleted and freed in 'ubi_scan_add_used()'. */ err = ubi_scan_add_used(ubi, si, new_seb->pnum, new_seb->ec, vid_hdr, 0); @@ -876,7 +866,7 @@ out_free: */ static void paranoid_vtbl_check(const struct ubi_device *ubi) { - if (!(ubi_chk_flags & UBI_CHK_GEN)) + if (!ubi->dbg->chk_gen) return; if (vtbl_check(ubi, ubi->vtbl)) { diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index ff2c4956eeff..42c684cf3688 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1,4 +1,5 @@ /* + * @ubi: UBI device description object * Copyright (c) International Business Machines Corp., 2006 * * This program is free software; you can redistribute it and/or modify @@ -163,12 +164,14 @@ struct ubi_work { #ifdef CONFIG_MTD_UBI_DEBUG static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec); -static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, +static int paranoid_check_in_wl_tree(const struct ubi_device *ubi, + struct ubi_wl_entry *e, struct rb_root *root); -static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e); +static int paranoid_check_in_pq(const struct ubi_device *ubi, + struct ubi_wl_entry *e); #else #define paranoid_check_ec(ubi, pnum, ec) 0 -#define paranoid_check_in_wl_tree(e, root) +#define paranoid_check_in_wl_tree(ubi, e, root) #define paranoid_check_in_pq(ubi, e) 0 #endif @@ -449,7 +452,7 @@ retry: BUG(); } - paranoid_check_in_wl_tree(e, &ubi->free); + paranoid_check_in_wl_tree(ubi, e, &ubi->free); /* * Move the physical eraseblock to the protection queue where it will @@ -613,7 +616,7 @@ static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) list_add_tail(&wrk->list, &ubi->works); ubi_assert(ubi->works_count >= 0); ubi->works_count += 1; - if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled()) + if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled(ubi)) wake_up_process(ubi->bgt_thread); spin_unlock(&ubi->wl_lock); } @@ -712,7 +715,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, e1->ec, e2->ec); goto out_cancel; } - paranoid_check_in_wl_tree(e1, &ubi->used); + paranoid_check_in_wl_tree(ubi, e1, &ubi->used); rb_erase(&e1->u.rb, &ubi->used); dbg_wl("move PEB %d EC %d to PEB %d EC %d", e1->pnum, e1->ec, e2->pnum, e2->ec); @@ -721,12 +724,12 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, scrubbing = 1; e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb); e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); - paranoid_check_in_wl_tree(e1, &ubi->scrub); + paranoid_check_in_wl_tree(ubi, e1, &ubi->scrub); rb_erase(&e1->u.rb, &ubi->scrub); dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum); } - paranoid_check_in_wl_tree(e2, &ubi->free); + paranoid_check_in_wl_tree(ubi, e2, &ubi->free); rb_erase(&e2->u.rb, &ubi->free); ubi->move_from = e1; ubi->move_to = e2; @@ -1169,13 +1172,13 @@ retry: return 0; } else { if (in_wl_tree(e, &ubi->used)) { - paranoid_check_in_wl_tree(e, &ubi->used); + paranoid_check_in_wl_tree(ubi, e, &ubi->used); rb_erase(&e->u.rb, &ubi->used); } else if (in_wl_tree(e, &ubi->scrub)) { - paranoid_check_in_wl_tree(e, &ubi->scrub); + paranoid_check_in_wl_tree(ubi, e, &ubi->scrub); rb_erase(&e->u.rb, &ubi->scrub); } else if (in_wl_tree(e, &ubi->erroneous)) { - paranoid_check_in_wl_tree(e, &ubi->erroneous); + paranoid_check_in_wl_tree(ubi, e, &ubi->erroneous); rb_erase(&e->u.rb, &ubi->erroneous); ubi->erroneous_peb_count -= 1; ubi_assert(ubi->erroneous_peb_count >= 0); @@ -1242,7 +1245,7 @@ retry: } if (in_wl_tree(e, &ubi->used)) { - paranoid_check_in_wl_tree(e, &ubi->used); + paranoid_check_in_wl_tree(ubi, e, &ubi->used); rb_erase(&e->u.rb, &ubi->used); } else { int err; @@ -1364,7 +1367,7 @@ int ubi_thread(void *u) spin_lock(&ubi->wl_lock); if (list_empty(&ubi->works) || ubi->ro_mode || - !ubi->thread_enabled || ubi_dbg_is_bgt_disabled()) { + !ubi->thread_enabled || ubi_dbg_is_bgt_disabled(ubi)) { set_current_state(TASK_INTERRUPTIBLE); spin_unlock(&ubi->wl_lock); schedule(); @@ -1579,7 +1582,7 @@ static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec) long long read_ec; struct ubi_ec_hdr *ec_hdr; - if (!(ubi_chk_flags & UBI_CHK_GEN)) + if (!ubi->dbg->chk_gen) return 0; ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); @@ -1609,16 +1612,18 @@ out_free: /** * paranoid_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree. + * @ubi: UBI device description object * @e: the wear-leveling entry to check * @root: the root of the tree * * This function returns zero if @e is in the @root RB-tree and %-EINVAL if it * is not. */ -static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, +static int paranoid_check_in_wl_tree(const struct ubi_device *ubi, + struct ubi_wl_entry *e, struct rb_root *root) { - if (!(ubi_chk_flags & UBI_CHK_GEN)) + if (!ubi->dbg->chk_gen) return 0; if (in_wl_tree(e, root)) @@ -1638,12 +1643,13 @@ static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, * * This function returns zero if @e is in @ubi->pq and %-EINVAL if it is not. */ -static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e) +static int paranoid_check_in_pq(const struct ubi_device *ubi, + struct ubi_wl_entry *e) { struct ubi_wl_entry *p; int i; - if (!(ubi_chk_flags & UBI_CHK_GEN)) + if (!ubi->dbg->chk_gen) return 0; for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 02145e9697a9..1196f61a4ab6 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2758,6 +2758,29 @@ static void ricoh_mmc_fixup_r5c832(struct pci_dev *dev) dev_notice(&dev->dev, "proprietary Ricoh MMC controller disabled (via firewire function)\n"); dev_notice(&dev->dev, "MMC cards are now supported by standard SDHCI controller\n"); + + /* + * RICOH 0xe823 SD/MMC card reader fails to recognize + * certain types of SD/MMC cards. Lowering the SD base + * clock frequency from 200Mhz to 50Mhz fixes this issue. + * + * 0x150 - SD2.0 mode enable for changing base clock + * frequency to 50Mhz + * 0xe1 - Base clock frequency + * 0x32 - 50Mhz new clock frequency + * 0xf9 - Key register for 0x150 + * 0xfc - key register for 0xe1 + */ + if (dev->device == PCI_DEVICE_ID_RICOH_R5CE823) { + pci_write_config_byte(dev, 0xf9, 0xfc); + pci_write_config_byte(dev, 0x150, 0x10); + pci_write_config_byte(dev, 0xf9, 0x00); + pci_write_config_byte(dev, 0xfc, 0x01); + pci_write_config_byte(dev, 0xe1, 0x32); + pci_write_config_byte(dev, 0xfc, 0x00); + + dev_notice(&dev->dev, "MMC controller base frequency changed to 50Mhz.\n"); + } } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832); DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832); diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index a59638b37c1a..03bc471c3eed 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -9,6 +9,23 @@ config XEN_BALLOON the system to expand the domain's memory allocation, or alternatively return unneeded memory to the system. +config XEN_SELFBALLOONING + bool "Dynamically self-balloon kernel memory to target" + depends on XEN && XEN_BALLOON && CLEANCACHE && SWAP + default n + help + Self-ballooning dynamically balloons available kernel memory driven + by the current usage of anonymous memory ("committed AS") and + controlled by various sysfs-settable parameters. Configuring + FRONTSWAP is highly recommended; if it is not configured, self- + ballooning is disabled by default but can be enabled with the + 'selfballooning' kernel boot parameter. If FRONTSWAP is configured, + frontswap-selfshrinking is enabled by default but can be disabled + with the 'noselfshrink' kernel boot parameter; and self-ballooning + is enabled by default but can be disabled with the 'noselfballooning' + kernel boot parameter. Note that systems without a sufficiently + large swap device should not enable self-ballooning. + config XEN_SCRUB_PAGES bool "Scrub pages before returning them to system" depends on XEN_BALLOON @@ -105,4 +122,33 @@ config SWIOTLB_XEN depends on PCI select SWIOTLB +config XEN_TMEM + bool + default y if (CLEANCACHE || FRONTSWAP) + help + Shim to interface in-kernel Transcendent Memory hooks + (e.g. cleancache and frontswap) to Xen tmem hypercalls. + +config XEN_PCIDEV_BACKEND + tristate "Xen PCI-device backend driver" + depends on PCI && X86 && XEN + depends on XEN_BACKEND + default m + help + The PCI device backend driver allows the kernel to export arbitrary + PCI devices to other guests. If you select this to be a module, you + will need to make sure no other driver has bound to the device(s) + you want to make visible to other guests. + + The parameter "passthrough" allows you specify how you want the PCI + devices to appear in the guest. You can choose the default (0) where + PCI topology starts at 00.00.0, or (1) for passthrough if you want + the PCI devices topology appear the same as in the host. + + The "hide" parameter (only applicable if backend driver is compiled + into the kernel) allows you to bind the PCI devices to this module + from the default device drivers. The argument is the list of PCI BDFs: + xen-pciback.hide=(03:00.0)(04:00.0) + + If in doubt, say m. endmenu diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index bbc18258ecc5..72bbb27d7a68 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1,6 +1,5 @@ obj-y += grant-table.o features.o events.o manage.o balloon.o obj-y += xenbus/ -obj-y += tmem.o nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_features.o := $(nostackp) @@ -9,14 +8,17 @@ obj-$(CONFIG_BLOCK) += biomerge.o obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o +obj-$(CONFIG_XEN_SELFBALLOONING) += xen-selfballoon.o obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o +obj-$(CONFIG_XEN_TMEM) += tmem.o obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o obj-$(CONFIG_XEN_DOM0) += pci.o +obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ xen-evtchn-y := evtchn.o xen-gntdev-y := gntdev.o diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 30df85d8fca8..da70f5c32eb9 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -615,11 +615,6 @@ static int find_irq_by_gsi(unsigned gsi) return -1; } -int xen_allocate_pirq_gsi(unsigned gsi) -{ - return gsi; -} - /* * Do not make any assumptions regarding the relationship between the * IRQ number returned here and the Xen pirq argument. @@ -1693,6 +1688,6 @@ void __init xen_init_IRQ(void) } else { irq_ctx_init(smp_processor_id()); if (xen_initial_domain()) - xen_setup_pirqs(); + pci_xen_initial_domain(); } } diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index 816a44959ef0..d369965e8f8a 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -1,7 +1,7 @@ /* * Xen implementation for transcendent memory (tmem) * - * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. + * Copyright (C) 2009-2011 Oracle Corp. All rights reserved. * Author: Dan Magenheimer */ @@ -9,8 +9,14 @@ #include <linux/types.h> #include <linux/init.h> #include <linux/pagemap.h> +#include <linux/module.h> #include <linux/cleancache.h> +/* temporary ifdef until include/linux/frontswap.h is upstream */ +#ifdef CONFIG_FRONTSWAP +#include <linux/frontswap.h> +#endif + #include <xen/xen.h> #include <xen/interface/xen.h> #include <asm/xen/hypercall.h> @@ -122,14 +128,8 @@ static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid) return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0); } -static int xen_tmem_destroy_pool(u32 pool_id) -{ - struct tmem_oid oid = { { 0 } }; - - return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0); -} - -int tmem_enabled; +int tmem_enabled __read_mostly; +EXPORT_SYMBOL(tmem_enabled); static int __init enable_tmem(char *s) { @@ -139,6 +139,14 @@ static int __init enable_tmem(char *s) __setup("tmem", enable_tmem); +#ifdef CONFIG_CLEANCACHE +static int xen_tmem_destroy_pool(u32 pool_id) +{ + struct tmem_oid oid = { { 0 } }; + + return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0); +} + /* cleancache ops */ static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key, @@ -240,18 +248,156 @@ static struct cleancache_ops tmem_cleancache_ops = { .init_shared_fs = tmem_cleancache_init_shared_fs, .init_fs = tmem_cleancache_init_fs }; +#endif -static int __init xen_tmem_init(void) +#ifdef CONFIG_FRONTSWAP +/* frontswap tmem operations */ + +/* a single tmem poolid is used for all frontswap "types" (swapfiles) */ +static int tmem_frontswap_poolid; + +/* + * Swizzling increases objects per swaptype, increasing tmem concurrency + * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS + */ +#define SWIZ_BITS 4 +#define SWIZ_MASK ((1 << SWIZ_BITS) - 1) +#define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK)) +#define iswiz(_ind) (_ind >> SWIZ_BITS) + +static inline struct tmem_oid oswiz(unsigned type, u32 ind) { - struct cleancache_ops old_ops; + struct tmem_oid oid = { .oid = { 0 } }; + oid.oid[0] = _oswiz(type, ind); + return oid; +} +/* returns 0 if the page was successfully put into frontswap, -1 if not */ +static int tmem_frontswap_put_page(unsigned type, pgoff_t offset, + struct page *page) +{ + u64 ind64 = (u64)offset; + u32 ind = (u32)offset; + unsigned long pfn = page_to_pfn(page); + int pool = tmem_frontswap_poolid; + int ret; + + if (pool < 0) + return -1; + if (ind64 != ind) + return -1; + mb(); /* ensure page is quiescent; tmem may address it with an alias */ + ret = xen_tmem_put_page(pool, oswiz(type, ind), iswiz(ind), pfn); + /* translate Xen tmem return values to linux semantics */ + if (ret == 1) + return 0; + else + return -1; +} + +/* + * returns 0 if the page was successfully gotten from frontswap, -1 if + * was not present (should never happen!) + */ +static int tmem_frontswap_get_page(unsigned type, pgoff_t offset, + struct page *page) +{ + u64 ind64 = (u64)offset; + u32 ind = (u32)offset; + unsigned long pfn = page_to_pfn(page); + int pool = tmem_frontswap_poolid; + int ret; + + if (pool < 0) + return -1; + if (ind64 != ind) + return -1; + ret = xen_tmem_get_page(pool, oswiz(type, ind), iswiz(ind), pfn); + /* translate Xen tmem return values to linux semantics */ + if (ret == 1) + return 0; + else + return -1; +} + +/* flush a single page from frontswap */ +static void tmem_frontswap_flush_page(unsigned type, pgoff_t offset) +{ + u64 ind64 = (u64)offset; + u32 ind = (u32)offset; + int pool = tmem_frontswap_poolid; + + if (pool < 0) + return; + if (ind64 != ind) + return; + (void) xen_tmem_flush_page(pool, oswiz(type, ind), iswiz(ind)); +} + +/* flush all pages from the passed swaptype */ +static void tmem_frontswap_flush_area(unsigned type) +{ + int pool = tmem_frontswap_poolid; + int ind; + + if (pool < 0) + return; + for (ind = SWIZ_MASK; ind >= 0; ind--) + (void)xen_tmem_flush_object(pool, oswiz(type, ind)); +} + +static void tmem_frontswap_init(unsigned ignored) +{ + struct tmem_pool_uuid private = TMEM_POOL_PRIVATE_UUID; + + /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ + if (tmem_frontswap_poolid < 0) + tmem_frontswap_poolid = + xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE); +} + +static int __initdata use_frontswap = 1; + +static int __init no_frontswap(char *s) +{ + use_frontswap = 0; + return 1; +} + +__setup("nofrontswap", no_frontswap); + +static struct frontswap_ops tmem_frontswap_ops = { + .put_page = tmem_frontswap_put_page, + .get_page = tmem_frontswap_get_page, + .flush_page = tmem_frontswap_flush_page, + .flush_area = tmem_frontswap_flush_area, + .init = tmem_frontswap_init +}; +#endif + +static int __init xen_tmem_init(void) +{ if (!xen_domain()) return 0; +#ifdef CONFIG_FRONTSWAP + if (tmem_enabled && use_frontswap) { + char *s = ""; + struct frontswap_ops old_ops = + frontswap_register_ops(&tmem_frontswap_ops); + + tmem_frontswap_poolid = -1; + if (old_ops.init != NULL) + s = " (WARNING: frontswap_ops overridden)"; + printk(KERN_INFO "frontswap enabled, RAM provided by " + "Xen Transcendent Memory\n"); + } +#endif #ifdef CONFIG_CLEANCACHE BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); if (tmem_enabled && use_cleancache) { char *s = ""; - old_ops = cleancache_register_ops(&tmem_cleancache_ops); + struct cleancache_ops old_ops = + cleancache_register_ops(&tmem_cleancache_ops); if (old_ops.init_fs != NULL) s = " (WARNING: cleancache_ops overridden)"; printk(KERN_INFO "cleancache enabled, RAM provided by " diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index a4ff225ee868..5c9dc43c1e94 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c @@ -98,6 +98,8 @@ static int __init balloon_init(void) register_balloon(&balloon_sysdev); + register_xen_selfballooning(&balloon_sysdev); + target_watch.callback = watch_target; xenstore_notifier.notifier_call = balloon_init_watcher; diff --git a/drivers/xen/xen-pciback/Makefile b/drivers/xen/xen-pciback/Makefile new file mode 100644 index 000000000000..ffe0ad3438bd --- /dev/null +++ b/drivers/xen/xen-pciback/Makefile @@ -0,0 +1,7 @@ +obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback.o + +xen-pciback-y := pci_stub.o pciback_ops.o xenbus.o +xen-pciback-y += conf_space.o conf_space_header.o \ + conf_space_capability.o \ + conf_space_quirks.o vpci.o \ + passthrough.o diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c new file mode 100644 index 000000000000..a8031445d94e --- /dev/null +++ b/drivers/xen/xen-pciback/conf_space.c @@ -0,0 +1,438 @@ +/* + * PCI Backend - Functions for creating a virtual configuration space for + * exported PCI Devices. + * It's dangerous to allow PCI Driver Domains to change their + * device's resources (memory, i/o ports, interrupts). We need to + * restrict changes to certain PCI Configuration registers: + * BARs, INTERRUPT_PIN, most registers in the header... + * + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> + */ + +#include <linux/kernel.h> +#include <linux/pci.h> +#include "pciback.h" +#include "conf_space.h" +#include "conf_space_quirks.h" + +#define DRV_NAME "xen-pciback" +static int permissive; +module_param(permissive, bool, 0644); + +/* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word, + * xen_pcibk_write_config_word, and xen_pcibk_write_config_byte are created. */ +#define DEFINE_PCI_CONFIG(op, size, type) \ +int xen_pcibk_##op##_config_##size \ +(struct pci_dev *dev, int offset, type value, void *data) \ +{ \ + return pci_##op##_config_##size(dev, offset, value); \ +} + +DEFINE_PCI_CONFIG(read, byte, u8 *) +DEFINE_PCI_CONFIG(read, word, u16 *) +DEFINE_PCI_CONFIG(read, dword, u32 *) + +DEFINE_PCI_CONFIG(write, byte, u8) +DEFINE_PCI_CONFIG(write, word, u16) +DEFINE_PCI_CONFIG(write, dword, u32) + +static int conf_space_read(struct pci_dev *dev, + const struct config_field_entry *entry, + int offset, u32 *value) +{ + int ret = 0; + const struct config_field *field = entry->field; + + *value = 0; + + switch (field->size) { + case 1: + if (field->u.b.read) + ret = field->u.b.read(dev, offset, (u8 *) value, + entry->data); + break; + case 2: + if (field->u.w.read) + ret = field->u.w.read(dev, offset, (u16 *) value, + entry->data); + break; + case 4: + if (field->u.dw.read) + ret = field->u.dw.read(dev, offset, value, entry->data); + break; + } + return ret; +} + +static int conf_space_write(struct pci_dev *dev, + const struct config_field_entry *entry, + int offset, u32 value) +{ + int ret = 0; + const struct config_field *field = entry->field; + + switch (field->size) { + case 1: + if (field->u.b.write) + ret = field->u.b.write(dev, offset, (u8) value, + entry->data); + break; + case 2: + if (field->u.w.write) + ret = field->u.w.write(dev, offset, (u16) value, + entry->data); + break; + case 4: + if (field->u.dw.write) + ret = field->u.dw.write(dev, offset, value, + entry->data); + break; + } + return ret; +} + +static inline u32 get_mask(int size) +{ + if (size == 1) + return 0xff; + else if (size == 2) + return 0xffff; + else + return 0xffffffff; +} + +static inline int valid_request(int offset, int size) +{ + /* Validate request (no un-aligned requests) */ + if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0) + return 1; + return 0; +} + +static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask, + int offset) +{ + if (offset >= 0) { + new_val_mask <<= (offset * 8); + new_val <<= (offset * 8); + } else { + new_val_mask >>= (offset * -8); + new_val >>= (offset * -8); + } + val = (val & ~new_val_mask) | (new_val & new_val_mask); + + return val; +} + +static int pcibios_err_to_errno(int err) +{ + switch (err) { + case PCIBIOS_SUCCESSFUL: + return XEN_PCI_ERR_success; + case PCIBIOS_DEVICE_NOT_FOUND: + return XEN_PCI_ERR_dev_not_found; + case PCIBIOS_BAD_REGISTER_NUMBER: + return XEN_PCI_ERR_invalid_offset; + case PCIBIOS_FUNC_NOT_SUPPORTED: + return XEN_PCI_ERR_not_implemented; + case PCIBIOS_SET_FAILED: + return XEN_PCI_ERR_access_denied; + } + return err; +} + +int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size, + u32 *ret_val) +{ + int err = 0; + struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); + const struct config_field_entry *cfg_entry; + const struct config_field *field; + int req_start, req_end, field_start, field_end; + /* if read fails for any reason, return 0 + * (as if device didn't respond) */ + u32 value = 0, tmp_val; + + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x\n", + pci_name(dev), size, offset); + + if (!valid_request(offset, size)) { + err = XEN_PCI_ERR_invalid_offset; + goto out; + } + + /* Get the real value first, then modify as appropriate */ + switch (size) { + case 1: + err = pci_read_config_byte(dev, offset, (u8 *) &value); + break; + case 2: + err = pci_read_config_word(dev, offset, (u16 *) &value); + break; + case 4: + err = pci_read_config_dword(dev, offset, &value); + break; + } + + list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { + field = cfg_entry->field; + + req_start = offset; + req_end = offset + size; + field_start = OFFSET(cfg_entry); + field_end = OFFSET(cfg_entry) + field->size; + + if ((req_start >= field_start && req_start < field_end) + || (req_end > field_start && req_end <= field_end)) { + err = conf_space_read(dev, cfg_entry, field_start, + &tmp_val); + if (err) + goto out; + + value = merge_value(value, tmp_val, + get_mask(field->size), + field_start - req_start); + } + } + +out: + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x = %x\n", + pci_name(dev), size, offset, value); + + *ret_val = value; + return pcibios_err_to_errno(err); +} + +int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value) +{ + int err = 0, handled = 0; + struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); + const struct config_field_entry *cfg_entry; + const struct config_field *field; + u32 tmp_val; + int req_start, req_end, field_start, field_end; + + if (unlikely(verbose_request)) + printk(KERN_DEBUG + DRV_NAME ": %s: write request %d bytes at 0x%x = %x\n", + pci_name(dev), size, offset, value); + + if (!valid_request(offset, size)) + return XEN_PCI_ERR_invalid_offset; + + list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { + field = cfg_entry->field; + + req_start = offset; + req_end = offset + size; + field_start = OFFSET(cfg_entry); + field_end = OFFSET(cfg_entry) + field->size; + + if ((req_start >= field_start && req_start < field_end) + || (req_end > field_start && req_end <= field_end)) { + tmp_val = 0; + + err = xen_pcibk_config_read(dev, field_start, + field->size, &tmp_val); + if (err) + break; + + tmp_val = merge_value(tmp_val, value, get_mask(size), + req_start - field_start); + + err = conf_space_write(dev, cfg_entry, field_start, + tmp_val); + + /* handled is set true here, but not every byte + * may have been written! Properly detecting if + * every byte is handled is unnecessary as the + * flag is used to detect devices that need + * special helpers to work correctly. + */ + handled = 1; + } + } + + if (!handled && !err) { + /* By default, anything not specificially handled above is + * read-only. The permissive flag changes this behavior so + * that anything not specifically handled above is writable. + * This means that some fields may still be read-only because + * they have entries in the config_field list that intercept + * the write and do nothing. */ + if (dev_data->permissive || permissive) { + switch (size) { + case 1: + err = pci_write_config_byte(dev, offset, + (u8) value); + break; + case 2: + err = pci_write_config_word(dev, offset, + (u16) value); + break; + case 4: + err = pci_write_config_dword(dev, offset, + (u32) value); + break; + } + } else if (!dev_data->warned_on_write) { + dev_data->warned_on_write = 1; + dev_warn(&dev->dev, "Driver tried to write to a " + "read-only configuration space field at offset" + " 0x%x, size %d. This may be harmless, but if " + "you have problems with your device:\n" + "1) see permissive attribute in sysfs\n" + "2) report problems to the xen-devel " + "mailing list along with details of your " + "device obtained from lspci.\n", offset, size); + } + } + + return pcibios_err_to_errno(err); +} + +void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev) +{ + struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); + struct config_field_entry *cfg_entry, *t; + const struct config_field *field; + + dev_dbg(&dev->dev, "free-ing dynamically allocated virtual " + "configuration space fields\n"); + if (!dev_data) + return; + + list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) { + field = cfg_entry->field; + + if (field->clean) { + field->clean((struct config_field *)field); + + kfree(cfg_entry->data); + + list_del(&cfg_entry->list); + kfree(cfg_entry); + } + + } +} + +void xen_pcibk_config_reset_dev(struct pci_dev *dev) +{ + struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); + const struct config_field_entry *cfg_entry; + const struct config_field *field; + + dev_dbg(&dev->dev, "resetting virtual configuration space\n"); + if (!dev_data) + return; + + list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { + field = cfg_entry->field; + + if (field->reset) + field->reset(dev, OFFSET(cfg_entry), cfg_entry->data); + } +} + +void xen_pcibk_config_free_dev(struct pci_dev *dev) +{ + struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); + struct config_field_entry *cfg_entry, *t; + const struct config_field *field; + + dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n"); + if (!dev_data) + return; + + list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) { + list_del(&cfg_entry->list); + + field = cfg_entry->field; + + if (field->release) + field->release(dev, OFFSET(cfg_entry), cfg_entry->data); + + kfree(cfg_entry); + } +} + +int xen_pcibk_config_add_field_offset(struct pci_dev *dev, + const struct config_field *field, + unsigned int base_offset) +{ + int err = 0; + struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); + struct config_field_entry *cfg_entry; + void *tmp; + + cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL); + if (!cfg_entry) { + err = -ENOMEM; + goto out; + } + + cfg_entry->data = NULL; + cfg_entry->field = field; + cfg_entry->base_offset = base_offset; + + /* silently ignore duplicate fields */ + err = xen_pcibk_field_is_dup(dev, OFFSET(cfg_entry)); + if (err) + goto out; + + if (field->init) { + tmp = field->init(dev, OFFSET(cfg_entry)); + + if (IS_ERR(tmp)) { + err = PTR_ERR(tmp); + goto out; + } + + cfg_entry->data = tmp; + } + + dev_dbg(&dev->dev, "added config field at offset 0x%02x\n", + OFFSET(cfg_entry)); + list_add_tail(&cfg_entry->list, &dev_data->config_fields); + +out: + if (err) + kfree(cfg_entry); + + return err; +} + +/* This sets up the device's virtual configuration space to keep track of + * certain registers (like the base address registers (BARs) so that we can + * keep the client from manipulating them directly. + */ +int xen_pcibk_config_init_dev(struct pci_dev *dev) +{ + int err = 0; + struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); + + dev_dbg(&dev->dev, "initializing virtual configuration space\n"); + + INIT_LIST_HEAD(&dev_data->config_fields); + + err = xen_pcibk_config_header_add_fields(dev); + if (err) + goto out; + + err = xen_pcibk_config_capability_add_fields(dev); + if (err) + goto out; + + err = xen_pcibk_config_quirks_init(dev); + +out: + return err; +} + +int xen_pcibk_config_init(void) +{ + return xen_pcibk_config_capability_init(); +} diff --git a/drivers/xen/xen-pciback/conf_space.h b/drivers/xen/xen-pciback/conf_space.h new file mode 100644 index 000000000000..e56c934ad137 --- /dev/null +++ b/drivers/xen/xen-pciback/conf_space.h @@ -0,0 +1,126 @@ +/* + * PCI Backend - Common data structures for overriding the configuration space + * + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> + */ + +#ifndef __XEN_PCIBACK_CONF_SPACE_H__ +#define __XEN_PCIBACK_CONF_SPACE_H__ + +#include <linux/list.h> +#include <linux/err.h> + +/* conf_field_init can return an errno in a ptr with ERR_PTR() */ +typedef void *(*conf_field_init) (struct pci_dev *dev, int offset); +typedef void (*conf_field_reset) (struct pci_dev *dev, int offset, void *data); +typedef void (*conf_field_free) (struct pci_dev *dev, int offset, void *data); + +typedef int (*conf_dword_write) (struct pci_dev *dev, int offset, u32 value, + void *data); +typedef int (*conf_word_write) (struct pci_dev *dev, int offset, u16 value, + void *data); +typedef int (*conf_byte_write) (struct pci_dev *dev, int offset, u8 value, + void *data); +typedef int (*conf_dword_read) (struct pci_dev *dev, int offset, u32 *value, + void *data); +typedef int (*conf_word_read) (struct pci_dev *dev, int offset, u16 *value, + void *data); +typedef int (*conf_byte_read) (struct pci_dev *dev, int offset, u8 *value, + void *data); + +/* These are the fields within the configuration space which we + * are interested in intercepting reads/writes to and changing their + * values. + */ +struct config_field { + unsigned int offset; + unsigned int size; + unsigned int mask; + conf_field_init init; + conf_field_reset reset; + conf_field_free release; + void (*clean) (struct config_field *field); + union { + struct { + conf_dword_write write; + conf_dword_read read; + } dw; + struct { + conf_word_write write; + conf_word_read read; + } w; + struct { + conf_byte_write write; + conf_byte_read read; + } b; + } u; + struct list_head list; +}; + +struct config_field_entry { + struct list_head list; + const struct config_field *field; + unsigned int base_offset; + void *data; +}; + +#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset) + +/* Add fields to a device - the add_fields macro expects to get a pointer to + * the first entry in an array (of which the ending is marked by size==0) + */ +int xen_pcibk_config_add_field_offset(struct pci_dev *dev, + const struct config_field *field, + unsigned int offset); + +static inline int xen_pcibk_config_add_field(struct pci_dev *dev, + const struct config_field *field) +{ + return xen_pcibk_config_add_field_offset(dev, field, 0); +} + +static inline int xen_pcibk_config_add_fields(struct pci_dev *dev, + const struct config_field *field) +{ + int i, err = 0; + for (i = 0; field[i].size != 0; i++) { + err = xen_pcibk_config_add_field(dev, &field[i]); + if (err) + break; + } + return err; +} + +static inline int xen_pcibk_config_add_fields_offset(struct pci_dev *dev, + const struct config_field *field, + unsigned int offset) +{ + int i, err = 0; + for (i = 0; field[i].size != 0; i++) { + err = xen_pcibk_config_add_field_offset(dev, &field[i], offset); + if (err) + break; + } + return err; +} + +/* Read/Write the real configuration space */ +int xen_pcibk_read_config_byte(struct pci_dev *dev, int offset, u8 *value, + void *data); +int xen_pcibk_read_config_word(struct pci_dev *dev, int offset, u16 *value, + void *data); +int xen_pcibk_read_config_dword(struct pci_dev *dev, int offset, u32 *value, + void *data); +int xen_pcibk_write_config_byte(struct pci_dev *dev, int offset, u8 value, + void *data); +int xen_pcibk_write_config_word(struct pci_dev *dev, int offset, u16 value, + void *data); +int xen_pcibk_write_config_dword(struct pci_dev *dev, int offset, u32 value, + void *data); + +int xen_pcibk_config_capability_init(void); + +int xen_pcibk_config_header_add_fields(struct pci_dev *dev); +int xen_pcibk_config_capability_add_fields(struct pci_dev *dev); + +#endif /* __XEN_PCIBACK_CONF_SPACE_H__ */ diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c new file mode 100644 index 000000000000..7f83e9083e9d --- /dev/null +++ b/drivers/xen/xen-pciback/conf_space_capability.c @@ -0,0 +1,207 @@ +/* + * PCI Backend - Handles the virtual fields found on the capability lists + * in the configuration space. + * + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> + */ + +#include <linux/kernel.h> +#include <linux/pci.h> +#include "pciback.h" +#include "conf_space.h" + +static LIST_HEAD(capabilities); +struct xen_pcibk_config_capability { + struct list_head cap_list; + + int capability; + + /* If the device has the capability found above, add these fields */ + const struct config_field *fields; +}; + +static const struct config_field caplist_header[] = { + { + .offset = PCI_CAP_LIST_ID, + .size = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */ + .u.w.read = xen_pcibk_read_config_word, + .u.w.write = NULL, + }, + {} +}; + +static inline void register_capability(struct xen_pcibk_config_capability *cap) +{ + list_add_tail(&cap->cap_list, &capabilities); +} + +int xen_pcibk_config_capability_add_fields(struct pci_dev *dev) +{ + int err = 0; + struct xen_pcibk_config_capability *cap; + int cap_offset; + + list_for_each_entry(cap, &capabilities, cap_list) { + cap_offset = pci_find_capability(dev, cap->capability); + if (cap_offset) { + dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n", + cap->capability, cap_offset); + + err = xen_pcibk_config_add_fields_offset(dev, + caplist_header, + cap_offset); + if (err) + goto out; + err = xen_pcibk_config_add_fields_offset(dev, + cap->fields, + cap_offset); + if (err) + goto out; + } + } + +out: + return err; +} + +static int vpd_address_write(struct pci_dev *dev, int offset, u16 value, + void *data) +{ + /* Disallow writes to the vital product data */ + if (value & PCI_VPD_ADDR_F) + return PCIBIOS_SET_FAILED; + else + return pci_write_config_word(dev, offset, value); +} + +static const struct config_field caplist_vpd[] = { + { + .offset = PCI_VPD_ADDR, + .size = 2, + .u.w.read = xen_pcibk_read_config_word, + .u.w.write = vpd_address_write, + }, + { + .offset = PCI_VPD_DATA, + .size = 4, + .u.dw.read = xen_pcibk_read_config_dword, + .u.dw.write = NULL, + }, + {} +}; + +static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value, + void *data) +{ + int err; + u16 real_value; + + err = pci_read_config_word(dev, offset, &real_value); + if (err) + goto out; + + *value = real_value & ~PCI_PM_CAP_PME_MASK; + +out: + return err; +} + +/* PM_OK_BITS specifies the bits that the driver domain is allowed to change. + * Can't allow driver domain to enable PMEs - they're shared */ +#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK) + +static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value, + void *data) +{ + int err; + u16 old_value; + pci_power_t new_state, old_state; + + err = pci_read_config_word(dev, offset, &old_value); + if (err) + goto out; + + old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK); + new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK); + + new_value &= PM_OK_BITS; + if ((old_value & PM_OK_BITS) != new_value) { + new_value = (old_value & ~PM_OK_BITS) | new_value; + err = pci_write_config_word(dev, offset, new_value); + if (err) + goto out; + } + + /* Let pci core handle the power management change */ + dev_dbg(&dev->dev, "set power state to %x\n", new_state); + err = pci_set_power_state(dev, new_state); + if (err) { + err = PCIBIOS_SET_FAILED; + goto out; + } + + out: + return err; +} + +/* Ensure PMEs are disabled */ +static void *pm_ctrl_init(struct pci_dev *dev, int offset) +{ + int err; + u16 value; + + err = pci_read_config_word(dev, offset, &value); + if (err) + goto out; + + if (value & PCI_PM_CTRL_PME_ENABLE) { + value &= ~PCI_PM_CTRL_PME_ENABLE; + err = pci_write_config_word(dev, offset, value); + } + +out: + return ERR_PTR(err); +} + +static const struct config_field caplist_pm[] = { + { + .offset = PCI_PM_PMC, + .size = 2, + .u.w.read = pm_caps_read, + }, + { + .offset = PCI_PM_CTRL, + .size = 2, + .init = pm_ctrl_init, + .u.w.read = xen_pcibk_read_config_word, + .u.w.write = pm_ctrl_write, + }, + { + .offset = PCI_PM_PPB_EXTENSIONS, + .size = 1, + .u.b.read = xen_pcibk_read_config_byte, + }, + { + .offset = PCI_PM_DATA_REGISTER, + .size = 1, + .u.b.read = xen_pcibk_read_config_byte, + }, + {} +}; + +static struct xen_pcibk_config_capability xen_pcibk_config_capability_pm = { + .capability = PCI_CAP_ID_PM, + .fields = caplist_pm, +}; +static struct xen_pcibk_config_capability xen_pcibk_config_capability_vpd = { + .capability = PCI_CAP_ID_VPD, + .fields = caplist_vpd, +}; + +int xen_pcibk_config_capability_init(void) +{ + register_capability(&xen_pcibk_config_capability_vpd); + register_capability(&xen_pcibk_config_capability_pm); + + return 0; +} diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c new file mode 100644 index 000000000000..da3cbdfcb5dc --- /dev/null +++ b/drivers/xen/xen-pciback/conf_space_header.c @@ -0,0 +1,386 @@ +/* + * PCI Backend - Handles the virtual fields in the configuration space headers. + * + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> + */ + +#include <linux/kernel.h> +#include <linux/pci.h> +#include "pciback.h" +#include "conf_space.h" + +struct pci_bar_info { + u32 val; + u32 len_val; + int which; +}; + +#define DRV_NAME "xen-pciback" +#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO)) +#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER) + +static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data) +{ + int i; + int ret; + + ret = xen_pcibk_read_config_word(dev, offset, value, data); + if (!atomic_read(&dev->enable_cnt)) + return ret; + + for (i = 0; i < PCI_ROM_RESOURCE; i++) { + if (dev->resource[i].flags & IORESOURCE_IO) + *value |= PCI_COMMAND_IO; + if (dev->resource[i].flags & IORESOURCE_MEM) + *value |= PCI_COMMAND_MEMORY; + } + + return ret; +} + +static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) +{ + struct xen_pcibk_dev_data *dev_data; + int err; + + dev_data = pci_get_drvdata(dev); + if (!pci_is_enabled(dev) && is_enable_cmd(value)) { + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: enable\n", + pci_name(dev)); + err = pci_enable_device(dev); + if (err) + return err; + if (dev_data) + dev_data->enable_intx = 1; + } else if (pci_is_enabled(dev) && !is_enable_cmd(value)) { + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: disable\n", + pci_name(dev)); + pci_disable_device(dev); + if (dev_data) + dev_data->enable_intx = 0; + } + + if (!dev->is_busmaster && is_master_cmd(value)) { + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: set bus master\n", + pci_name(dev)); + pci_set_master(dev); + } + + if (value & PCI_COMMAND_INVALIDATE) { + if (unlikely(verbose_request)) + printk(KERN_DEBUG + DRV_NAME ": %s: enable memory-write-invalidate\n", + pci_name(dev)); + err = pci_set_mwi(dev); + if (err) { + printk(KERN_WARNING + DRV_NAME ": %s: cannot enable " + "memory-write-invalidate (%d)\n", + pci_name(dev), err); + value &= ~PCI_COMMAND_INVALIDATE; + } + } + + return pci_write_config_word(dev, offset, value); +} + +static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data) +{ + struct pci_bar_info *bar = data; + + if (unlikely(!bar)) { + printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n", + pci_name(dev)); + return XEN_PCI_ERR_op_failed; + } + + /* A write to obtain the length must happen as a 32-bit write. + * This does not (yet) support writing individual bytes + */ + if (value == ~PCI_ROM_ADDRESS_ENABLE) + bar->which = 1; + else { + u32 tmpval; + pci_read_config_dword(dev, offset, &tmpval); + if (tmpval != bar->val && value == bar->val) { + /* Allow restoration of bar value. */ + pci_write_config_dword(dev, offset, bar->val); + } + bar->which = 0; + } + + /* Do we need to support enabling/disabling the rom address here? */ + + return 0; +} + +/* For the BARs, only allow writes which write ~0 or + * the correct resource information + * (Needed for when the driver probes the resource usage) + */ +static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data) +{ + struct pci_bar_info *bar = data; + + if (unlikely(!bar)) { + printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n", + pci_name(dev)); + return XEN_PCI_ERR_op_failed; + } + + /* A write to obtain the length must happen as a 32-bit write. + * This does not (yet) support writing individual bytes + */ + if (value == ~0) + bar->which = 1; + else { + u32 tmpval; + pci_read_config_dword(dev, offset, &tmpval); + if (tmpval != bar->val && value == bar->val) { + /* Allow restoration of bar value. */ + pci_write_config_dword(dev, offset, bar->val); + } + bar->which = 0; + } + + return 0; +} + +static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data) +{ + struct pci_bar_info *bar = data; + + if (unlikely(!bar)) { + printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n", + pci_name(dev)); + return XEN_PCI_ERR_op_failed; + } + + *value = bar->which ? bar->len_val : bar->val; + + return 0; +} + +static inline void read_dev_bar(struct pci_dev *dev, + struct pci_bar_info *bar_info, int offset, + u32 len_mask) +{ + int pos; + struct resource *res = dev->resource; + + if (offset == PCI_ROM_ADDRESS || offset == PCI_ROM_ADDRESS1) + pos = PCI_ROM_RESOURCE; + else { + pos = (offset - PCI_BASE_ADDRESS_0) / 4; + if (pos && ((res[pos - 1].flags & (PCI_BASE_ADDRESS_SPACE | + PCI_BASE_ADDRESS_MEM_TYPE_MASK)) == + (PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64))) { + bar_info->val = res[pos - 1].start >> 32; + bar_info->len_val = res[pos - 1].end >> 32; + return; + } + } + + bar_info->val = res[pos].start | + (res[pos].flags & PCI_REGION_FLAG_MASK); + bar_info->len_val = res[pos].end - res[pos].start + 1; +} + +static void *bar_init(struct pci_dev *dev, int offset) +{ + struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL); + + if (!bar) + return ERR_PTR(-ENOMEM); + + read_dev_bar(dev, bar, offset, ~0); + bar->which = 0; + + return bar; +} + +static void *rom_init(struct pci_dev *dev, int offset) +{ + struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL); + + if (!bar) + return ERR_PTR(-ENOMEM); + + read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE); + bar->which = 0; + + return bar; +} + +static void bar_reset(struct pci_dev *dev, int offset, void *data) +{ + struct pci_bar_info *bar = data; + + bar->which = 0; +} + +static void bar_release(struct pci_dev *dev, int offset, void *data) +{ + kfree(data); +} + +static int xen_pcibk_read_vendor(struct pci_dev *dev, int offset, + u16 *value, void *data) +{ + *value = dev->vendor; + + return 0; +} + +static int xen_pcibk_read_device(struct pci_dev *dev, int offset, + u16 *value, void *data) +{ + *value = dev->device; + + return 0; +} + +static int interrupt_read(struct pci_dev *dev, int offset, u8 * value, + void *data) +{ + *value = (u8) dev->irq; + + return 0; +} + +static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data) +{ + u8 cur_value; + int err; + + err = pci_read_config_byte(dev, offset, &cur_value); + if (err) + goto out; + + if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START) + || value == PCI_BIST_START) + err = pci_write_config_byte(dev, offset, value); + +out: + return err; +} + +static const struct config_field header_common[] = { + { + .offset = PCI_VENDOR_ID, + .size = 2, + .u.w.read = xen_pcibk_read_vendor, + }, + { + .offset = PCI_DEVICE_ID, + .size = 2, + .u.w.read = xen_pcibk_read_device, + }, + { + .offset = PCI_COMMAND, + .size = 2, + .u.w.read = command_read, + .u.w.write = command_write, + }, + { + .offset = PCI_INTERRUPT_LINE, + .size = 1, + .u.b.read = interrupt_read, + }, + { + .offset = PCI_INTERRUPT_PIN, + .size = 1, + .u.b.read = xen_pcibk_read_config_byte, + }, + { + /* Any side effects of letting driver domain control cache line? */ + .offset = PCI_CACHE_LINE_SIZE, + .size = 1, + .u.b.read = xen_pcibk_read_config_byte, + .u.b.write = xen_pcibk_write_config_byte, + }, + { + .offset = PCI_LATENCY_TIMER, + .size = 1, + .u.b.read = xen_pcibk_read_config_byte, + }, + { + .offset = PCI_BIST, + .size = 1, + .u.b.read = xen_pcibk_read_config_byte, + .u.b.write = bist_write, + }, + {} +}; + +#define CFG_FIELD_BAR(reg_offset) \ + { \ + .offset = reg_offset, \ + .size = 4, \ + .init = bar_init, \ + .reset = bar_reset, \ + .release = bar_release, \ + .u.dw.read = bar_read, \ + .u.dw.write = bar_write, \ + } + +#define CFG_FIELD_ROM(reg_offset) \ + { \ + .offset = reg_offset, \ + .size = 4, \ + .init = rom_init, \ + .reset = bar_reset, \ + .release = bar_release, \ + .u.dw.read = bar_read, \ + .u.dw.write = rom_write, \ + } + +static const struct config_field header_0[] = { + CFG_FIELD_BAR(PCI_BASE_ADDRESS_0), + CFG_FIELD_BAR(PCI_BASE_ADDRESS_1), + CFG_FIELD_BAR(PCI_BASE_ADDRESS_2), + CFG_FIELD_BAR(PCI_BASE_ADDRESS_3), + CFG_FIELD_BAR(PCI_BASE_ADDRESS_4), + CFG_FIELD_BAR(PCI_BASE_ADDRESS_5), + CFG_FIELD_ROM(PCI_ROM_ADDRESS), + {} +}; + +static const struct config_field header_1[] = { + CFG_FIELD_BAR(PCI_BASE_ADDRESS_0), + CFG_FIELD_BAR(PCI_BASE_ADDRESS_1), + CFG_FIELD_ROM(PCI_ROM_ADDRESS1), + {} +}; + +int xen_pcibk_config_header_add_fields(struct pci_dev *dev) +{ + int err; + + err = xen_pcibk_config_add_fields(dev, header_common); + if (err) + goto out; + + switch (dev->hdr_type) { + case PCI_HEADER_TYPE_NORMAL: + err = xen_pcibk_config_add_fields(dev, header_0); + break; + + case PCI_HEADER_TYPE_BRIDGE: + err = xen_pcibk_config_add_fields(dev, header_1); + break; + + default: + err = -EINVAL; + printk(KERN_ERR DRV_NAME ": %s: Unsupported header type %d!\n", + pci_name(dev), dev->hdr_type); + break; + } + +out: + return err; +} diff --git a/drivers/xen/xen-pciback/conf_space_quirks.c b/drivers/xen/xen-pciback/conf_space_quirks.c new file mode 100644 index 000000000000..921a889e65eb --- /dev/null +++ b/drivers/xen/xen-pciback/conf_space_quirks.c @@ -0,0 +1,140 @@ +/* + * PCI Backend - Handle special overlays for broken devices. + * + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> + * Author: Chris Bookholt <hap10@epoch.ncsc.mil> + */ + +#include <linux/kernel.h> +#include <linux/pci.h> +#include "pciback.h" +#include "conf_space.h" +#include "conf_space_quirks.h" + +LIST_HEAD(xen_pcibk_quirks); +#define DRV_NAME "xen-pciback" +static inline const struct pci_device_id * +match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) +{ + if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) && + (id->device == PCI_ANY_ID || id->device == dev->device) && + (id->subvendor == PCI_ANY_ID || + id->subvendor == dev->subsystem_vendor) && + (id->subdevice == PCI_ANY_ID || + id->subdevice == dev->subsystem_device) && + !((id->class ^ dev->class) & id->class_mask)) + return id; + return NULL; +} + +static struct xen_pcibk_config_quirk *xen_pcibk_find_quirk(struct pci_dev *dev) +{ + struct xen_pcibk_config_quirk *tmp_quirk; + + list_for_each_entry(tmp_quirk, &xen_pcibk_quirks, quirks_list) + if (match_one_device(&tmp_quirk->devid, dev) != NULL) + goto out; + tmp_quirk = NULL; + printk(KERN_DEBUG DRV_NAME + ":quirk didn't match any device xen_pciback knows about\n"); +out: + return tmp_quirk; +} + +static inline void register_quirk(struct xen_pcibk_config_quirk *quirk) +{ + list_add_tail(&quirk->quirks_list, &xen_pcibk_quirks); +} + +int xen_pcibk_field_is_dup(struct pci_dev *dev, unsigned int reg) +{ + int ret = 0; + struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); + struct config_field_entry *cfg_entry; + + list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { + if (OFFSET(cfg_entry) == reg) { + ret = 1; + break; + } + } + return ret; +} + +int xen_pcibk_config_quirks_add_field(struct pci_dev *dev, struct config_field + *field) +{ + int err = 0; + + switch (field->size) { + case 1: + field->u.b.read = xen_pcibk_read_config_byte; + field->u.b.write = xen_pcibk_write_config_byte; + break; + case 2: + field->u.w.read = xen_pcibk_read_config_word; + field->u.w.write = xen_pcibk_write_config_word; + break; + case 4: + field->u.dw.read = xen_pcibk_read_config_dword; + field->u.dw.write = xen_pcibk_write_config_dword; + break; + default: + err = -EINVAL; + goto out; + } + + xen_pcibk_config_add_field(dev, field); + +out: + return err; +} + +int xen_pcibk_config_quirks_init(struct pci_dev *dev) +{ + struct xen_pcibk_config_quirk *quirk; + int ret = 0; + + quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC); + if (!quirk) { + ret = -ENOMEM; + goto out; + } + + quirk->devid.vendor = dev->vendor; + quirk->devid.device = dev->device; + quirk->devid.subvendor = dev->subsystem_vendor; + quirk->devid.subdevice = dev->subsystem_device; + quirk->devid.class = 0; + quirk->devid.class_mask = 0; + quirk->devid.driver_data = 0UL; + + quirk->pdev = dev; + + register_quirk(quirk); +out: + return ret; +} + +void xen_pcibk_config_field_free(struct config_field *field) +{ + kfree(field); +} + +int xen_pcibk_config_quirk_release(struct pci_dev *dev) +{ + struct xen_pcibk_config_quirk *quirk; + int ret = 0; + + quirk = xen_pcibk_find_quirk(dev); + if (!quirk) { + ret = -ENXIO; + goto out; + } + + list_del(&quirk->quirks_list); + kfree(quirk); + +out: + return ret; +} diff --git a/drivers/xen/xen-pciback/conf_space_quirks.h b/drivers/xen/xen-pciback/conf_space_quirks.h new file mode 100644 index 000000000000..cfcc517e4570 --- /dev/null +++ b/drivers/xen/xen-pciback/conf_space_quirks.h @@ -0,0 +1,33 @@ +/* + * PCI Backend - Data structures for special overlays for broken devices. + * + * Ryan Wilson <hap9@epoch.ncsc.mil> + * Chris Bookholt <hap10@epoch.ncsc.mil> + */ + +#ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__ +#define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__ + +#include <linux/pci.h> +#include <linux/list.h> + +struct xen_pcibk_config_quirk { + struct list_head quirks_list; + struct pci_device_id devid; + struct pci_dev *pdev; +}; + +int xen_pcibk_config_quirks_add_field(struct pci_dev *dev, struct config_field + *field); + +int xen_pcibk_config_quirks_remove_field(struct pci_dev *dev, int reg); + +int xen_pcibk_config_quirks_init(struct pci_dev *dev); + +void xen_pcibk_config_field_free(struct config_field *field); + +int xen_pcibk_config_quirk_release(struct pci_dev *dev); + +int xen_pcibk_field_is_dup(struct pci_dev *dev, unsigned int reg); + +#endif diff --git a/drivers/xen/xen-pciback/passthrough.c b/drivers/xen/xen-pciback/passthrough.c new file mode 100644 index 000000000000..1d32a9a42c01 --- /dev/null +++ b/drivers/xen/xen-pciback/passthrough.c @@ -0,0 +1,194 @@ +/* + * PCI Backend - Provides restricted access to the real PCI bus topology + * to the frontend + * + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> + */ + +#include <linux/list.h> +#include <linux/pci.h> +#include <linux/spinlock.h> +#include "pciback.h" + +struct passthrough_dev_data { + /* Access to dev_list must be protected by lock */ + struct list_head dev_list; + spinlock_t lock; +}; + +static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, + unsigned int domain, + unsigned int bus, + unsigned int devfn) +{ + struct passthrough_dev_data *dev_data = pdev->pci_dev_data; + struct pci_dev_entry *dev_entry; + struct pci_dev *dev = NULL; + unsigned long flags; + + spin_lock_irqsave(&dev_data->lock, flags); + + list_for_each_entry(dev_entry, &dev_data->dev_list, list) { + if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus) + && bus == (unsigned int)dev_entry->dev->bus->number + && devfn == dev_entry->dev->devfn) { + dev = dev_entry->dev; + break; + } + } + + spin_unlock_irqrestore(&dev_data->lock, flags); + + return dev; +} + +static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, + struct pci_dev *dev, + int devid, publish_pci_dev_cb publish_cb) +{ + struct passthrough_dev_data *dev_data = pdev->pci_dev_data; + struct pci_dev_entry *dev_entry; + unsigned long flags; + unsigned int domain, bus, devfn; + int err; + + dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL); + if (!dev_entry) + return -ENOMEM; + dev_entry->dev = dev; + + spin_lock_irqsave(&dev_data->lock, flags); + list_add_tail(&dev_entry->list, &dev_data->dev_list); + spin_unlock_irqrestore(&dev_data->lock, flags); + + /* Publish this device. */ + domain = (unsigned int)pci_domain_nr(dev->bus); + bus = (unsigned int)dev->bus->number; + devfn = dev->devfn; + err = publish_cb(pdev, domain, bus, devfn, devid); + + return err; +} + +static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, + struct pci_dev *dev) +{ + struct passthrough_dev_data *dev_data = pdev->pci_dev_data; + struct pci_dev_entry *dev_entry, *t; + struct pci_dev *found_dev = NULL; + unsigned long flags; + + spin_lock_irqsave(&dev_data->lock, flags); + + list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) { + if (dev_entry->dev == dev) { + list_del(&dev_entry->list); + found_dev = dev_entry->dev; + kfree(dev_entry); + } + } + + spin_unlock_irqrestore(&dev_data->lock, flags); + + if (found_dev) + pcistub_put_pci_dev(found_dev); +} + +static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev) +{ + struct passthrough_dev_data *dev_data; + + dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL); + if (!dev_data) + return -ENOMEM; + + spin_lock_init(&dev_data->lock); + + INIT_LIST_HEAD(&dev_data->dev_list); + + pdev->pci_dev_data = dev_data; + + return 0; +} + +static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, + publish_pci_root_cb publish_root_cb) +{ + int err = 0; + struct passthrough_dev_data *dev_data = pdev->pci_dev_data; + struct pci_dev_entry *dev_entry, *e, *tmp; + struct pci_dev *dev; + int found; + unsigned int domain, bus; + + spin_lock(&dev_data->lock); + + list_for_each_entry_safe(dev_entry, tmp, &dev_data->dev_list, list) { + /* Only publish this device as a root if none of its + * parent bridges are exported + */ + found = 0; + dev = dev_entry->dev->bus->self; + for (; !found && dev != NULL; dev = dev->bus->self) { + list_for_each_entry(e, &dev_data->dev_list, list) { + if (dev == e->dev) { + found = 1; + break; + } + } + } + + domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus); + bus = (unsigned int)dev_entry->dev->bus->number; + + if (!found) { + spin_unlock(&dev_data->lock); + err = publish_root_cb(pdev, domain, bus); + if (err) + break; + spin_lock(&dev_data->lock); + } + } + + if (!err) + spin_unlock(&dev_data->lock); + + return err; +} + +static void __xen_pcibk_release_devices(struct xen_pcibk_device *pdev) +{ + struct passthrough_dev_data *dev_data = pdev->pci_dev_data; + struct pci_dev_entry *dev_entry, *t; + + list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) { + list_del(&dev_entry->list); + pcistub_put_pci_dev(dev_entry->dev); + kfree(dev_entry); + } + + kfree(dev_data); + pdev->pci_dev_data = NULL; +} + +static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, + struct xen_pcibk_device *pdev, + unsigned int *domain, unsigned int *bus, + unsigned int *devfn) +{ + *domain = pci_domain_nr(pcidev->bus); + *bus = pcidev->bus->number; + *devfn = pcidev->devfn; + return 1; +} + +struct xen_pcibk_backend xen_pcibk_passthrough_backend = { + .name = "passthrough", + .init = __xen_pcibk_init_devices, + .free = __xen_pcibk_release_devices, + .find = __xen_pcibk_get_pcifront_dev, + .publish = __xen_pcibk_publish_pci_roots, + .release = __xen_pcibk_release_pci_dev, + .add = __xen_pcibk_add_pci_dev, + .get = __xen_pcibk_get_pci_dev, +}; diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c new file mode 100644 index 000000000000..aec214ac0a14 --- /dev/null +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -0,0 +1,1376 @@ +/* + * PCI Stub Driver - Grabs devices in backend to be exported later + * + * Ryan Wilson <hap9@epoch.ncsc.mil> + * Chris Bookholt <hap10@epoch.ncsc.mil> + */ +#include <linux/module.h> +#include <linux/init.h> +#include <linux/rwsem.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/kref.h> +#include <linux/pci.h> +#include <linux/wait.h> +#include <linux/sched.h> +#include <linux/atomic.h> +#include <xen/events.h> +#include <asm/xen/pci.h> +#include <asm/xen/hypervisor.h> +#include "pciback.h" +#include "conf_space.h" +#include "conf_space_quirks.h" + +#define DRV_NAME "xen-pciback" + +static char *pci_devs_to_hide; +wait_queue_head_t xen_pcibk_aer_wait_queue; +/*Add sem for sync AER handling and xen_pcibk remove/reconfigue ops, +* We want to avoid in middle of AER ops, xen_pcibk devices is being removed +*/ +static DECLARE_RWSEM(pcistub_sem); +module_param_named(hide, pci_devs_to_hide, charp, 0444); + +struct pcistub_device_id { + struct list_head slot_list; + int domain; + unsigned char bus; + unsigned int devfn; +}; +static LIST_HEAD(pcistub_device_ids); +static DEFINE_SPINLOCK(device_ids_lock); + +struct pcistub_device { + struct kref kref; + struct list_head dev_list; + spinlock_t lock; + + struct pci_dev *dev; + struct xen_pcibk_device *pdev;/* non-NULL if struct pci_dev is in use */ +}; + +/* Access to pcistub_devices & seized_devices lists and the initialize_devices + * flag must be locked with pcistub_devices_lock + */ +static DEFINE_SPINLOCK(pcistub_devices_lock); +static LIST_HEAD(pcistub_devices); + +/* wait for device_initcall before initializing our devices + * (see pcistub_init_devices_late) + */ +static int initialize_devices; +static LIST_HEAD(seized_devices); + +static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev) +{ + struct pcistub_device *psdev; + + dev_dbg(&dev->dev, "pcistub_device_alloc\n"); + + psdev = kzalloc(sizeof(*psdev), GFP_ATOMIC); + if (!psdev) + return NULL; + + psdev->dev = pci_dev_get(dev); + if (!psdev->dev) { + kfree(psdev); + return NULL; + } + + kref_init(&psdev->kref); + spin_lock_init(&psdev->lock); + + return psdev; +} + +/* Don't call this directly as it's called by pcistub_device_put */ +static void pcistub_device_release(struct kref *kref) +{ + struct pcistub_device *psdev; + + psdev = container_of(kref, struct pcistub_device, kref); + + dev_dbg(&psdev->dev->dev, "pcistub_device_release\n"); + + xen_unregister_device_domain_owner(psdev->dev); + + /* Clean-up the device */ + xen_pcibk_reset_device(psdev->dev); + xen_pcibk_config_free_dyn_fields(psdev->dev); + xen_pcibk_config_free_dev(psdev->dev); + kfree(pci_get_drvdata(psdev->dev)); + pci_set_drvdata(psdev->dev, NULL); + + pci_dev_put(psdev->dev); + + kfree(psdev); +} + +static inline void pcistub_device_get(struct pcistub_device *psdev) +{ + kref_get(&psdev->kref); +} + +static inline void pcistub_device_put(struct pcistub_device *psdev) +{ + kref_put(&psdev->kref, pcistub_device_release); +} + +static struct pcistub_device *pcistub_device_find(int domain, int bus, + int slot, int func) +{ + struct pcistub_device *psdev = NULL; + unsigned long flags; + + spin_lock_irqsave(&pcistub_devices_lock, flags); + + list_for_each_entry(psdev, &pcistub_devices, dev_list) { + if (psdev->dev != NULL + && domain == pci_domain_nr(psdev->dev->bus) + && bus == psdev->dev->bus->number + && PCI_DEVFN(slot, func) == psdev->dev->devfn) { + pcistub_device_get(psdev); + goto out; + } + } + + /* didn't find it */ + psdev = NULL; + +out: + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + return psdev; +} + +static struct pci_dev *pcistub_device_get_pci_dev(struct xen_pcibk_device *pdev, + struct pcistub_device *psdev) +{ + struct pci_dev *pci_dev = NULL; + unsigned long flags; + + pcistub_device_get(psdev); + + spin_lock_irqsave(&psdev->lock, flags); + if (!psdev->pdev) { + psdev->pdev = pdev; + pci_dev = psdev->dev; + } + spin_unlock_irqrestore(&psdev->lock, flags); + + if (!pci_dev) + pcistub_device_put(psdev); + + return pci_dev; +} + +struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev, + int domain, int bus, + int slot, int func) +{ + struct pcistub_device *psdev; + struct pci_dev *found_dev = NULL; + unsigned long flags; + + spin_lock_irqsave(&pcistub_devices_lock, flags); + + list_for_each_entry(psdev, &pcistub_devices, dev_list) { + if (psdev->dev != NULL + && domain == pci_domain_nr(psdev->dev->bus) + && bus == psdev->dev->bus->number + && PCI_DEVFN(slot, func) == psdev->dev->devfn) { + found_dev = pcistub_device_get_pci_dev(pdev, psdev); + break; + } + } + + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + return found_dev; +} + +struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev, + struct pci_dev *dev) +{ + struct pcistub_device *psdev; + struct pci_dev *found_dev = NULL; + unsigned long flags; + + spin_lock_irqsave(&pcistub_devices_lock, flags); + + list_for_each_entry(psdev, &pcistub_devices, dev_list) { + if (psdev->dev == dev) { + found_dev = pcistub_device_get_pci_dev(pdev, psdev); + break; + } + } + + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + return found_dev; +} + +void pcistub_put_pci_dev(struct pci_dev *dev) +{ + struct pcistub_device *psdev, *found_psdev = NULL; + unsigned long flags; + + spin_lock_irqsave(&pcistub_devices_lock, flags); + + list_for_each_entry(psdev, &pcistub_devices, dev_list) { + if (psdev->dev == dev) { + found_psdev = psdev; + break; + } + } + + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + + /*hold this lock for avoiding breaking link between + * pcistub and xen_pcibk when AER is in processing + */ + down_write(&pcistub_sem); + /* Cleanup our device + * (so it's ready for the next domain) + */ + xen_pcibk_reset_device(found_psdev->dev); + xen_pcibk_config_free_dyn_fields(found_psdev->dev); + xen_pcibk_config_reset_dev(found_psdev->dev); + + spin_lock_irqsave(&found_psdev->lock, flags); + found_psdev->pdev = NULL; + spin_unlock_irqrestore(&found_psdev->lock, flags); + + pcistub_device_put(found_psdev); + up_write(&pcistub_sem); +} + +static int __devinit pcistub_match_one(struct pci_dev *dev, + struct pcistub_device_id *pdev_id) +{ + /* Match the specified device by domain, bus, slot, func and also if + * any of the device's parent bridges match. + */ + for (; dev != NULL; dev = dev->bus->self) { + if (pci_domain_nr(dev->bus) == pdev_id->domain + && dev->bus->number == pdev_id->bus + && dev->devfn == pdev_id->devfn) + return 1; + + /* Sometimes topmost bridge links to itself. */ + if (dev == dev->bus->self) + break; + } + + return 0; +} + +static int __devinit pcistub_match(struct pci_dev *dev) +{ + struct pcistub_device_id *pdev_id; + unsigned long flags; + int found = 0; + + spin_lock_irqsave(&device_ids_lock, flags); + list_for_each_entry(pdev_id, &pcistub_device_ids, slot_list) { + if (pcistub_match_one(dev, pdev_id)) { + found = 1; + break; + } + } + spin_unlock_irqrestore(&device_ids_lock, flags); + + return found; +} + +static int __devinit pcistub_init_device(struct pci_dev *dev) +{ + struct xen_pcibk_dev_data *dev_data; + int err = 0; + + dev_dbg(&dev->dev, "initializing...\n"); + + /* The PCI backend is not intended to be a module (or to work with + * removable PCI devices (yet). If it were, xen_pcibk_config_free() + * would need to be called somewhere to free the memory allocated + * here and then to call kfree(pci_get_drvdata(psdev->dev)). + */ + dev_data = kzalloc(sizeof(*dev_data) + strlen(DRV_NAME "[]") + + strlen(pci_name(dev)) + 1, GFP_ATOMIC); + if (!dev_data) { + err = -ENOMEM; + goto out; + } + pci_set_drvdata(dev, dev_data); + + /* + * Setup name for fake IRQ handler. It will only be enabled + * once the device is turned on by the guest. + */ + sprintf(dev_data->irq_name, DRV_NAME "[%s]", pci_name(dev)); + + dev_dbg(&dev->dev, "initializing config\n"); + + init_waitqueue_head(&xen_pcibk_aer_wait_queue); + err = xen_pcibk_config_init_dev(dev); + if (err) + goto out; + + /* HACK: Force device (& ACPI) to determine what IRQ it's on - we + * must do this here because pcibios_enable_device may specify + * the pci device's true irq (and possibly its other resources) + * if they differ from what's in the configuration space. + * This makes the assumption that the device's resources won't + * change after this point (otherwise this code may break!) + */ + dev_dbg(&dev->dev, "enabling device\n"); + err = pci_enable_device(dev); + if (err) + goto config_release; + + /* Now disable the device (this also ensures some private device + * data is setup before we export) + */ + dev_dbg(&dev->dev, "reset device\n"); + xen_pcibk_reset_device(dev); + + return 0; + +config_release: + xen_pcibk_config_free_dev(dev); + +out: + pci_set_drvdata(dev, NULL); + kfree(dev_data); + return err; +} + +/* + * Because some initialization still happens on + * devices during fs_initcall, we need to defer + * full initialization of our devices until + * device_initcall. + */ +static int __init pcistub_init_devices_late(void) +{ + struct pcistub_device *psdev; + unsigned long flags; + int err = 0; + + pr_debug(DRV_NAME ": pcistub_init_devices_late\n"); + + spin_lock_irqsave(&pcistub_devices_lock, flags); + + while (!list_empty(&seized_devices)) { + psdev = container_of(seized_devices.next, + struct pcistub_device, dev_list); + list_del(&psdev->dev_list); + + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + + err = pcistub_init_device(psdev->dev); + if (err) { + dev_err(&psdev->dev->dev, + "error %d initializing device\n", err); + kfree(psdev); + psdev = NULL; + } + + spin_lock_irqsave(&pcistub_devices_lock, flags); + + if (psdev) + list_add_tail(&psdev->dev_list, &pcistub_devices); + } + + initialize_devices = 1; + + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + + return 0; +} + +static int __devinit pcistub_seize(struct pci_dev *dev) +{ + struct pcistub_device *psdev; + unsigned long flags; + int err = 0; + + psdev = pcistub_device_alloc(dev); + if (!psdev) + return -ENOMEM; + + spin_lock_irqsave(&pcistub_devices_lock, flags); + + if (initialize_devices) { + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + + /* don't want irqs disabled when calling pcistub_init_device */ + err = pcistub_init_device(psdev->dev); + + spin_lock_irqsave(&pcistub_devices_lock, flags); + + if (!err) + list_add(&psdev->dev_list, &pcistub_devices); + } else { + dev_dbg(&dev->dev, "deferring initialization\n"); + list_add(&psdev->dev_list, &seized_devices); + } + + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + + if (err) + pcistub_device_put(psdev); + + return err; +} + +static int __devinit pcistub_probe(struct pci_dev *dev, + const struct pci_device_id *id) +{ + int err = 0; + + dev_dbg(&dev->dev, "probing...\n"); + + if (pcistub_match(dev)) { + + if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL + && dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) { + dev_err(&dev->dev, "can't export pci devices that " + "don't have a normal (0) or bridge (1) " + "header type!\n"); + err = -ENODEV; + goto out; + } + + dev_info(&dev->dev, "seizing device\n"); + err = pcistub_seize(dev); + } else + /* Didn't find the device */ + err = -ENODEV; + +out: + return err; +} + +static void pcistub_remove(struct pci_dev *dev) +{ + struct pcistub_device *psdev, *found_psdev = NULL; + unsigned long flags; + + dev_dbg(&dev->dev, "removing\n"); + + spin_lock_irqsave(&pcistub_devices_lock, flags); + + xen_pcibk_config_quirk_release(dev); + + list_for_each_entry(psdev, &pcistub_devices, dev_list) { + if (psdev->dev == dev) { + found_psdev = psdev; + break; + } + } + + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + + if (found_psdev) { + dev_dbg(&dev->dev, "found device to remove - in use? %p\n", + found_psdev->pdev); + + if (found_psdev->pdev) { + printk(KERN_WARNING DRV_NAME ": ****** removing device " + "%s while still in-use! ******\n", + pci_name(found_psdev->dev)); + printk(KERN_WARNING DRV_NAME ": ****** driver domain may" + " still access this device's i/o resources!\n"); + printk(KERN_WARNING DRV_NAME ": ****** shutdown driver " + "domain before binding device\n"); + printk(KERN_WARNING DRV_NAME ": ****** to other drivers " + "or domains\n"); + + xen_pcibk_release_pci_dev(found_psdev->pdev, + found_psdev->dev); + } + + spin_lock_irqsave(&pcistub_devices_lock, flags); + list_del(&found_psdev->dev_list); + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + + /* the final put for releasing from the list */ + pcistub_device_put(found_psdev); + } +} + +static DEFINE_PCI_DEVICE_TABLE(pcistub_ids) = { + { + .vendor = PCI_ANY_ID, + .device = PCI_ANY_ID, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + }, + {0,}, +}; + +#define PCI_NODENAME_MAX 40 +static void kill_domain_by_device(struct pcistub_device *psdev) +{ + struct xenbus_transaction xbt; + int err; + char nodename[PCI_NODENAME_MAX]; + + if (!psdev) + dev_err(&psdev->dev->dev, + "device is NULL when do AER recovery/kill_domain\n"); + snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0", + psdev->pdev->xdev->otherend_id); + nodename[strlen(nodename)] = '\0'; + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + dev_err(&psdev->dev->dev, + "error %d when start xenbus transaction\n", err); + return; + } + /*PV AER handlers will set this flag*/ + xenbus_printf(xbt, nodename, "aerState" , "aerfail"); + err = xenbus_transaction_end(xbt, 0); + if (err) { + if (err == -EAGAIN) + goto again; + dev_err(&psdev->dev->dev, + "error %d when end xenbus transaction\n", err); + return; + } +} + +/* For each aer recovery step error_detected, mmio_enabled, etc, front_end and + * backend need to have cooperation. In xen_pcibk, those steps will do similar + * jobs: send service request and waiting for front_end response. +*/ +static pci_ers_result_t common_process(struct pcistub_device *psdev, + pci_channel_state_t state, int aer_cmd, + pci_ers_result_t result) +{ + pci_ers_result_t res = result; + struct xen_pcie_aer_op *aer_op; + int ret; + + /*with PV AER drivers*/ + aer_op = &(psdev->pdev->sh_info->aer_op); + aer_op->cmd = aer_cmd ; + /*useful for error_detected callback*/ + aer_op->err = state; + /*pcifront_end BDF*/ + ret = xen_pcibk_get_pcifront_dev(psdev->dev, psdev->pdev, + &aer_op->domain, &aer_op->bus, &aer_op->devfn); + if (!ret) { + dev_err(&psdev->dev->dev, + DRV_NAME ": failed to get pcifront device\n"); + return PCI_ERS_RESULT_NONE; + } + wmb(); + + dev_dbg(&psdev->dev->dev, + DRV_NAME ": aer_op %x dom %x bus %x devfn %x\n", + aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn); + /*local flag to mark there's aer request, xen_pcibk callback will use + * this flag to judge whether we need to check pci-front give aer + * service ack signal + */ + set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags); + + /*It is possible that a pcifront conf_read_write ops request invokes + * the callback which cause the spurious execution of wake_up. + * Yet it is harmless and better than a spinlock here + */ + set_bit(_XEN_PCIB_active, + (unsigned long *)&psdev->pdev->sh_info->flags); + wmb(); + notify_remote_via_irq(psdev->pdev->evtchn_irq); + + ret = wait_event_timeout(xen_pcibk_aer_wait_queue, + !(test_bit(_XEN_PCIB_active, (unsigned long *) + &psdev->pdev->sh_info->flags)), 300*HZ); + + if (!ret) { + if (test_bit(_XEN_PCIB_active, + (unsigned long *)&psdev->pdev->sh_info->flags)) { + dev_err(&psdev->dev->dev, + "pcifront aer process not responding!\n"); + clear_bit(_XEN_PCIB_active, + (unsigned long *)&psdev->pdev->sh_info->flags); + aer_op->err = PCI_ERS_RESULT_NONE; + return res; + } + } + clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags); + + if (test_bit(_XEN_PCIF_active, + (unsigned long *)&psdev->pdev->sh_info->flags)) { + dev_dbg(&psdev->dev->dev, + "schedule pci_conf service in xen_pcibk\n"); + xen_pcibk_test_and_schedule_op(psdev->pdev); + } + + res = (pci_ers_result_t)aer_op->err; + return res; +} + +/* +* xen_pcibk_slot_reset: it will send the slot_reset request to pcifront in case +* of the device driver could provide this service, and then wait for pcifront +* ack. +* @dev: pointer to PCI devices +* return value is used by aer_core do_recovery policy +*/ +static pci_ers_result_t xen_pcibk_slot_reset(struct pci_dev *dev) +{ + struct pcistub_device *psdev; + pci_ers_result_t result; + + result = PCI_ERS_RESULT_RECOVERED; + dev_dbg(&dev->dev, "xen_pcibk_slot_reset(bus:%x,devfn:%x)\n", + dev->bus->number, dev->devfn); + + down_write(&pcistub_sem); + psdev = pcistub_device_find(pci_domain_nr(dev->bus), + dev->bus->number, + PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); + + if (!psdev || !psdev->pdev) { + dev_err(&dev->dev, + DRV_NAME " device is not found/assigned\n"); + goto end; + } + + if (!psdev->pdev->sh_info) { + dev_err(&dev->dev, DRV_NAME " device is not connected or owned" + " by HVM, kill it\n"); + kill_domain_by_device(psdev); + goto release; + } + + if (!test_bit(_XEN_PCIB_AERHANDLER, + (unsigned long *)&psdev->pdev->sh_info->flags)) { + dev_err(&dev->dev, + "guest with no AER driver should have been killed\n"); + goto release; + } + result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result); + + if (result == PCI_ERS_RESULT_NONE || + result == PCI_ERS_RESULT_DISCONNECT) { + dev_dbg(&dev->dev, + "No AER slot_reset service or disconnected!\n"); + kill_domain_by_device(psdev); + } +release: + pcistub_device_put(psdev); +end: + up_write(&pcistub_sem); + return result; + +} + + +/*xen_pcibk_mmio_enabled: it will send the mmio_enabled request to pcifront +* in case of the device driver could provide this service, and then wait +* for pcifront ack +* @dev: pointer to PCI devices +* return value is used by aer_core do_recovery policy +*/ + +static pci_ers_result_t xen_pcibk_mmio_enabled(struct pci_dev *dev) +{ + struct pcistub_device *psdev; + pci_ers_result_t result; + + result = PCI_ERS_RESULT_RECOVERED; + dev_dbg(&dev->dev, "xen_pcibk_mmio_enabled(bus:%x,devfn:%x)\n", + dev->bus->number, dev->devfn); + + down_write(&pcistub_sem); + psdev = pcistub_device_find(pci_domain_nr(dev->bus), + dev->bus->number, + PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); + + if (!psdev || !psdev->pdev) { + dev_err(&dev->dev, + DRV_NAME " device is not found/assigned\n"); + goto end; + } + + if (!psdev->pdev->sh_info) { + dev_err(&dev->dev, DRV_NAME " device is not connected or owned" + " by HVM, kill it\n"); + kill_domain_by_device(psdev); + goto release; + } + + if (!test_bit(_XEN_PCIB_AERHANDLER, + (unsigned long *)&psdev->pdev->sh_info->flags)) { + dev_err(&dev->dev, + "guest with no AER driver should have been killed\n"); + goto release; + } + result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result); + + if (result == PCI_ERS_RESULT_NONE || + result == PCI_ERS_RESULT_DISCONNECT) { + dev_dbg(&dev->dev, + "No AER mmio_enabled service or disconnected!\n"); + kill_domain_by_device(psdev); + } +release: + pcistub_device_put(psdev); +end: + up_write(&pcistub_sem); + return result; +} + +/*xen_pcibk_error_detected: it will send the error_detected request to pcifront +* in case of the device driver could provide this service, and then wait +* for pcifront ack. +* @dev: pointer to PCI devices +* @error: the current PCI connection state +* return value is used by aer_core do_recovery policy +*/ + +static pci_ers_result_t xen_pcibk_error_detected(struct pci_dev *dev, + pci_channel_state_t error) +{ + struct pcistub_device *psdev; + pci_ers_result_t result; + + result = PCI_ERS_RESULT_CAN_RECOVER; + dev_dbg(&dev->dev, "xen_pcibk_error_detected(bus:%x,devfn:%x)\n", + dev->bus->number, dev->devfn); + + down_write(&pcistub_sem); + psdev = pcistub_device_find(pci_domain_nr(dev->bus), + dev->bus->number, + PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); + + if (!psdev || !psdev->pdev) { + dev_err(&dev->dev, + DRV_NAME " device is not found/assigned\n"); + goto end; + } + + if (!psdev->pdev->sh_info) { + dev_err(&dev->dev, DRV_NAME " device is not connected or owned" + " by HVM, kill it\n"); + kill_domain_by_device(psdev); + goto release; + } + + /*Guest owns the device yet no aer handler regiested, kill guest*/ + if (!test_bit(_XEN_PCIB_AERHANDLER, + (unsigned long *)&psdev->pdev->sh_info->flags)) { + dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n"); + kill_domain_by_device(psdev); + goto release; + } + result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result); + + if (result == PCI_ERS_RESULT_NONE || + result == PCI_ERS_RESULT_DISCONNECT) { + dev_dbg(&dev->dev, + "No AER error_detected service or disconnected!\n"); + kill_domain_by_device(psdev); + } +release: + pcistub_device_put(psdev); +end: + up_write(&pcistub_sem); + return result; +} + +/*xen_pcibk_error_resume: it will send the error_resume request to pcifront +* in case of the device driver could provide this service, and then wait +* for pcifront ack. +* @dev: pointer to PCI devices +*/ + +static void xen_pcibk_error_resume(struct pci_dev *dev) +{ + struct pcistub_device *psdev; + + dev_dbg(&dev->dev, "xen_pcibk_error_resume(bus:%x,devfn:%x)\n", + dev->bus->number, dev->devfn); + + down_write(&pcistub_sem); + psdev = pcistub_device_find(pci_domain_nr(dev->bus), + dev->bus->number, + PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); + + if (!psdev || !psdev->pdev) { + dev_err(&dev->dev, + DRV_NAME " device is not found/assigned\n"); + goto end; + } + + if (!psdev->pdev->sh_info) { + dev_err(&dev->dev, DRV_NAME " device is not connected or owned" + " by HVM, kill it\n"); + kill_domain_by_device(psdev); + goto release; + } + + if (!test_bit(_XEN_PCIB_AERHANDLER, + (unsigned long *)&psdev->pdev->sh_info->flags)) { + dev_err(&dev->dev, + "guest with no AER driver should have been killed\n"); + kill_domain_by_device(psdev); + goto release; + } + common_process(psdev, 1, XEN_PCI_OP_aer_resume, + PCI_ERS_RESULT_RECOVERED); +release: + pcistub_device_put(psdev); +end: + up_write(&pcistub_sem); + return; +} + +/*add xen_pcibk AER handling*/ +static struct pci_error_handlers xen_pcibk_error_handler = { + .error_detected = xen_pcibk_error_detected, + .mmio_enabled = xen_pcibk_mmio_enabled, + .slot_reset = xen_pcibk_slot_reset, + .resume = xen_pcibk_error_resume, +}; + +/* + * Note: There is no MODULE_DEVICE_TABLE entry here because this isn't + * for a normal device. I don't want it to be loaded automatically. + */ + +static struct pci_driver xen_pcibk_pci_driver = { + /* The name should be xen_pciback, but until the tools are updated + * we will keep it as pciback. */ + .name = "pciback", + .id_table = pcistub_ids, + .probe = pcistub_probe, + .remove = pcistub_remove, + .err_handler = &xen_pcibk_error_handler, +}; + +static inline int str_to_slot(const char *buf, int *domain, int *bus, + int *slot, int *func) +{ + int err; + + err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func); + if (err == 4) + return 0; + else if (err < 0) + return -EINVAL; + + /* try again without domain */ + *domain = 0; + err = sscanf(buf, " %x:%x.%x", bus, slot, func); + if (err == 3) + return 0; + + return -EINVAL; +} + +static inline int str_to_quirk(const char *buf, int *domain, int *bus, int + *slot, int *func, int *reg, int *size, int *mask) +{ + int err; + + err = + sscanf(buf, " %04x:%02x:%02x.%1x-%08x:%1x:%08x", domain, bus, slot, + func, reg, size, mask); + if (err == 7) + return 0; + return -EINVAL; +} + +static int pcistub_device_id_add(int domain, int bus, int slot, int func) +{ + struct pcistub_device_id *pci_dev_id; + unsigned long flags; + + pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL); + if (!pci_dev_id) + return -ENOMEM; + + pci_dev_id->domain = domain; + pci_dev_id->bus = bus; + pci_dev_id->devfn = PCI_DEVFN(slot, func); + + pr_debug(DRV_NAME ": wants to seize %04x:%02x:%02x.%01x\n", + domain, bus, slot, func); + + spin_lock_irqsave(&device_ids_lock, flags); + list_add_tail(&pci_dev_id->slot_list, &pcistub_device_ids); + spin_unlock_irqrestore(&device_ids_lock, flags); + + return 0; +} + +static int pcistub_device_id_remove(int domain, int bus, int slot, int func) +{ + struct pcistub_device_id *pci_dev_id, *t; + int devfn = PCI_DEVFN(slot, func); + int err = -ENOENT; + unsigned long flags; + + spin_lock_irqsave(&device_ids_lock, flags); + list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, + slot_list) { + if (pci_dev_id->domain == domain + && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) { + /* Don't break; here because it's possible the same + * slot could be in the list more than once + */ + list_del(&pci_dev_id->slot_list); + kfree(pci_dev_id); + + err = 0; + + pr_debug(DRV_NAME ": removed %04x:%02x:%02x.%01x from " + "seize list\n", domain, bus, slot, func); + } + } + spin_unlock_irqrestore(&device_ids_lock, flags); + + return err; +} + +static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg, + int size, int mask) +{ + int err = 0; + struct pcistub_device *psdev; + struct pci_dev *dev; + struct config_field *field; + + psdev = pcistub_device_find(domain, bus, slot, func); + if (!psdev || !psdev->dev) { + err = -ENODEV; + goto out; + } + dev = psdev->dev; + + field = kzalloc(sizeof(*field), GFP_ATOMIC); + if (!field) { + err = -ENOMEM; + goto out; + } + + field->offset = reg; + field->size = size; + field->mask = mask; + field->init = NULL; + field->reset = NULL; + field->release = NULL; + field->clean = xen_pcibk_config_field_free; + + err = xen_pcibk_config_quirks_add_field(dev, field); + if (err) + kfree(field); +out: + return err; +} + +static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf, + size_t count) +{ + int domain, bus, slot, func; + int err; + + err = str_to_slot(buf, &domain, &bus, &slot, &func); + if (err) + goto out; + + err = pcistub_device_id_add(domain, bus, slot, func); + +out: + if (!err) + err = count; + return err; +} + +DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add); + +static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf, + size_t count) +{ + int domain, bus, slot, func; + int err; + + err = str_to_slot(buf, &domain, &bus, &slot, &func); + if (err) + goto out; + + err = pcistub_device_id_remove(domain, bus, slot, func); + +out: + if (!err) + err = count; + return err; +} + +DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove); + +static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf) +{ + struct pcistub_device_id *pci_dev_id; + size_t count = 0; + unsigned long flags; + + spin_lock_irqsave(&device_ids_lock, flags); + list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) { + if (count >= PAGE_SIZE) + break; + + count += scnprintf(buf + count, PAGE_SIZE - count, + "%04x:%02x:%02x.%01x\n", + pci_dev_id->domain, pci_dev_id->bus, + PCI_SLOT(pci_dev_id->devfn), + PCI_FUNC(pci_dev_id->devfn)); + } + spin_unlock_irqrestore(&device_ids_lock, flags); + + return count; +} + +DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL); + +static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf) +{ + struct pcistub_device *psdev; + struct xen_pcibk_dev_data *dev_data; + size_t count = 0; + unsigned long flags; + + spin_lock_irqsave(&pcistub_devices_lock, flags); + list_for_each_entry(psdev, &pcistub_devices, dev_list) { + if (count >= PAGE_SIZE) + break; + if (!psdev->dev) + continue; + dev_data = pci_get_drvdata(psdev->dev); + if (!dev_data) + continue; + count += + scnprintf(buf + count, PAGE_SIZE - count, + "%s:%s:%sing:%ld\n", + pci_name(psdev->dev), + dev_data->isr_on ? "on" : "off", + dev_data->ack_intr ? "ack" : "not ack", + dev_data->handled); + } + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + return count; +} + +DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL); + +static ssize_t pcistub_irq_handler_switch(struct device_driver *drv, + const char *buf, + size_t count) +{ + struct pcistub_device *psdev; + struct xen_pcibk_dev_data *dev_data; + int domain, bus, slot, func; + int err = -ENOENT; + + err = str_to_slot(buf, &domain, &bus, &slot, &func); + if (err) + goto out; + + psdev = pcistub_device_find(domain, bus, slot, func); + + if (!psdev) + goto out; + + dev_data = pci_get_drvdata(psdev->dev); + if (!dev_data) + goto out; + + dev_dbg(&psdev->dev->dev, "%s fake irq handler: %d->%d\n", + dev_data->irq_name, dev_data->isr_on, + !dev_data->isr_on); + + dev_data->isr_on = !(dev_data->isr_on); + if (dev_data->isr_on) + dev_data->ack_intr = 1; +out: + if (!err) + err = count; + return err; +} +DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch); + +static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf, + size_t count) +{ + int domain, bus, slot, func, reg, size, mask; + int err; + + err = str_to_quirk(buf, &domain, &bus, &slot, &func, ®, &size, + &mask); + if (err) + goto out; + + err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask); + +out: + if (!err) + err = count; + return err; +} + +static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf) +{ + int count = 0; + unsigned long flags; + struct xen_pcibk_config_quirk *quirk; + struct xen_pcibk_dev_data *dev_data; + const struct config_field *field; + const struct config_field_entry *cfg_entry; + + spin_lock_irqsave(&device_ids_lock, flags); + list_for_each_entry(quirk, &xen_pcibk_quirks, quirks_list) { + if (count >= PAGE_SIZE) + goto out; + + count += scnprintf(buf + count, PAGE_SIZE - count, + "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n", + quirk->pdev->bus->number, + PCI_SLOT(quirk->pdev->devfn), + PCI_FUNC(quirk->pdev->devfn), + quirk->devid.vendor, quirk->devid.device, + quirk->devid.subvendor, + quirk->devid.subdevice); + + dev_data = pci_get_drvdata(quirk->pdev); + + list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { + field = cfg_entry->field; + if (count >= PAGE_SIZE) + goto out; + + count += scnprintf(buf + count, PAGE_SIZE - count, + "\t\t%08x:%01x:%08x\n", + cfg_entry->base_offset + + field->offset, field->size, + field->mask); + } + } + +out: + spin_unlock_irqrestore(&device_ids_lock, flags); + + return count; +} + +DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add); + +static ssize_t permissive_add(struct device_driver *drv, const char *buf, + size_t count) +{ + int domain, bus, slot, func; + int err; + struct pcistub_device *psdev; + struct xen_pcibk_dev_data *dev_data; + err = str_to_slot(buf, &domain, &bus, &slot, &func); + if (err) + goto out; + psdev = pcistub_device_find(domain, bus, slot, func); + if (!psdev) { + err = -ENODEV; + goto out; + } + if (!psdev->dev) { + err = -ENODEV; + goto release; + } + dev_data = pci_get_drvdata(psdev->dev); + /* the driver data for a device should never be null at this point */ + if (!dev_data) { + err = -ENXIO; + goto release; + } + if (!dev_data->permissive) { + dev_data->permissive = 1; + /* Let user know that what they're doing could be unsafe */ + dev_warn(&psdev->dev->dev, "enabling permissive mode " + "configuration space accesses!\n"); + dev_warn(&psdev->dev->dev, + "permissive mode is potentially unsafe!\n"); + } +release: + pcistub_device_put(psdev); +out: + if (!err) + err = count; + return err; +} + +static ssize_t permissive_show(struct device_driver *drv, char *buf) +{ + struct pcistub_device *psdev; + struct xen_pcibk_dev_data *dev_data; + size_t count = 0; + unsigned long flags; + spin_lock_irqsave(&pcistub_devices_lock, flags); + list_for_each_entry(psdev, &pcistub_devices, dev_list) { + if (count >= PAGE_SIZE) + break; + if (!psdev->dev) + continue; + dev_data = pci_get_drvdata(psdev->dev); + if (!dev_data || !dev_data->permissive) + continue; + count += + scnprintf(buf + count, PAGE_SIZE - count, "%s\n", + pci_name(psdev->dev)); + } + spin_unlock_irqrestore(&pcistub_devices_lock, flags); + return count; +} + +DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add); + +static void pcistub_exit(void) +{ + driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_new_slot); + driver_remove_file(&xen_pcibk_pci_driver.driver, + &driver_attr_remove_slot); + driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_slots); + driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_quirks); + driver_remove_file(&xen_pcibk_pci_driver.driver, + &driver_attr_permissive); + driver_remove_file(&xen_pcibk_pci_driver.driver, + &driver_attr_irq_handlers); + driver_remove_file(&xen_pcibk_pci_driver.driver, + &driver_attr_irq_handler_state); + pci_unregister_driver(&xen_pcibk_pci_driver); +} + +static int __init pcistub_init(void) +{ + int pos = 0; + int err = 0; + int domain, bus, slot, func; + int parsed; + + if (pci_devs_to_hide && *pci_devs_to_hide) { + do { + parsed = 0; + + err = sscanf(pci_devs_to_hide + pos, + " (%x:%x:%x.%x) %n", + &domain, &bus, &slot, &func, &parsed); + if (err != 4) { + domain = 0; + err = sscanf(pci_devs_to_hide + pos, + " (%x:%x.%x) %n", + &bus, &slot, &func, &parsed); + if (err != 3) + goto parse_error; + } + + err = pcistub_device_id_add(domain, bus, slot, func); + if (err) + goto out; + + /* if parsed<=0, we've reached the end of the string */ + pos += parsed; + } while (parsed > 0 && pci_devs_to_hide[pos]); + } + + /* If we're the first PCI Device Driver to register, we're the + * first one to get offered PCI devices as they become + * available (and thus we can be the first to grab them) + */ + err = pci_register_driver(&xen_pcibk_pci_driver); + if (err < 0) + goto out; + + err = driver_create_file(&xen_pcibk_pci_driver.driver, + &driver_attr_new_slot); + if (!err) + err = driver_create_file(&xen_pcibk_pci_driver.driver, + &driver_attr_remove_slot); + if (!err) + err = driver_create_file(&xen_pcibk_pci_driver.driver, + &driver_attr_slots); + if (!err) + err = driver_create_file(&xen_pcibk_pci_driver.driver, + &driver_attr_quirks); + if (!err) + err = driver_create_file(&xen_pcibk_pci_driver.driver, + &driver_attr_permissive); + + if (!err) + err = driver_create_file(&xen_pcibk_pci_driver.driver, + &driver_attr_irq_handlers); + if (!err) + err = driver_create_file(&xen_pcibk_pci_driver.driver, + &driver_attr_irq_handler_state); + if (err) + pcistub_exit(); + +out: + return err; + +parse_error: + printk(KERN_ERR DRV_NAME ": Error parsing pci_devs_to_hide at \"%s\"\n", + pci_devs_to_hide + pos); + return -EINVAL; +} + +#ifndef MODULE +/* + * fs_initcall happens before device_initcall + * so xen_pcibk *should* get called first (b/c we + * want to suck up any device before other drivers + * get a chance by being the first pci device + * driver to register) + */ +fs_initcall(pcistub_init); +#endif + +static int __init xen_pcibk_init(void) +{ + int err; + + if (!xen_initial_domain()) + return -ENODEV; + + err = xen_pcibk_config_init(); + if (err) + return err; + +#ifdef MODULE + err = pcistub_init(); + if (err < 0) + return err; +#endif + + pcistub_init_devices_late(); + err = xen_pcibk_xenbus_register(); + if (err) + pcistub_exit(); + + return err; +} + +static void __exit xen_pcibk_cleanup(void) +{ + xen_pcibk_xenbus_unregister(); + pcistub_exit(); +} + +module_init(xen_pcibk_init); +module_exit(xen_pcibk_cleanup); + +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h new file mode 100644 index 000000000000..a0e131a81503 --- /dev/null +++ b/drivers/xen/xen-pciback/pciback.h @@ -0,0 +1,183 @@ +/* + * PCI Backend Common Data Structures & Function Declarations + * + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> + */ +#ifndef __XEN_PCIBACK_H__ +#define __XEN_PCIBACK_H__ + +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <xen/xenbus.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/workqueue.h> +#include <linux/atomic.h> +#include <xen/interface/io/pciif.h> + +struct pci_dev_entry { + struct list_head list; + struct pci_dev *dev; +}; + +#define _PDEVF_op_active (0) +#define PDEVF_op_active (1<<(_PDEVF_op_active)) +#define _PCIB_op_pending (1) +#define PCIB_op_pending (1<<(_PCIB_op_pending)) + +struct xen_pcibk_device { + void *pci_dev_data; + spinlock_t dev_lock; + struct xenbus_device *xdev; + struct xenbus_watch be_watch; + u8 be_watching; + int evtchn_irq; + struct xen_pci_sharedinfo *sh_info; + unsigned long flags; + struct work_struct op_work; +}; + +struct xen_pcibk_dev_data { + struct list_head config_fields; + unsigned int permissive:1; + unsigned int warned_on_write:1; + unsigned int enable_intx:1; + unsigned int isr_on:1; /* Whether the IRQ handler is installed. */ + unsigned int ack_intr:1; /* .. and ACK-ing */ + unsigned long handled; + unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */ + char irq_name[0]; /* xen-pcibk[000:04:00.0] */ +}; + +/* Used by XenBus and xen_pcibk_ops.c */ +extern wait_queue_head_t xen_pcibk_aer_wait_queue; +extern struct workqueue_struct *xen_pcibk_wq; +/* Used by pcistub.c and conf_space_quirks.c */ +extern struct list_head xen_pcibk_quirks; + +/* Get/Put PCI Devices that are hidden from the PCI Backend Domain */ +struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev, + int domain, int bus, + int slot, int func); +struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev, + struct pci_dev *dev); +void pcistub_put_pci_dev(struct pci_dev *dev); + +/* Ensure a device is turned off or reset */ +void xen_pcibk_reset_device(struct pci_dev *pdev); + +/* Access a virtual configuration space for a PCI device */ +int xen_pcibk_config_init(void); +int xen_pcibk_config_init_dev(struct pci_dev *dev); +void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev); +void xen_pcibk_config_reset_dev(struct pci_dev *dev); +void xen_pcibk_config_free_dev(struct pci_dev *dev); +int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size, + u32 *ret_val); +int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, + u32 value); + +/* Handle requests for specific devices from the frontend */ +typedef int (*publish_pci_dev_cb) (struct xen_pcibk_device *pdev, + unsigned int domain, unsigned int bus, + unsigned int devfn, unsigned int devid); +typedef int (*publish_pci_root_cb) (struct xen_pcibk_device *pdev, + unsigned int domain, unsigned int bus); + +/* Backend registration for the two types of BDF representation: + * vpci - BDFs start at 00 + * passthrough - BDFs are exactly like in the host. + */ +struct xen_pcibk_backend { + char *name; + int (*init)(struct xen_pcibk_device *pdev); + void (*free)(struct xen_pcibk_device *pdev); + int (*find)(struct pci_dev *pcidev, struct xen_pcibk_device *pdev, + unsigned int *domain, unsigned int *bus, + unsigned int *devfn); + int (*publish)(struct xen_pcibk_device *pdev, publish_pci_root_cb cb); + void (*release)(struct xen_pcibk_device *pdev, struct pci_dev *dev); + int (*add)(struct xen_pcibk_device *pdev, struct pci_dev *dev, + int devid, publish_pci_dev_cb publish_cb); + struct pci_dev *(*get)(struct xen_pcibk_device *pdev, + unsigned int domain, unsigned int bus, + unsigned int devfn); +}; + +extern struct xen_pcibk_backend xen_pcibk_vpci_backend; +extern struct xen_pcibk_backend xen_pcibk_passthrough_backend; +extern struct xen_pcibk_backend *xen_pcibk_backend; + +static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, + struct pci_dev *dev, + int devid, + publish_pci_dev_cb publish_cb) +{ + if (xen_pcibk_backend && xen_pcibk_backend->add) + return xen_pcibk_backend->add(pdev, dev, devid, publish_cb); + return -1; +}; +static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, + struct pci_dev *dev) +{ + if (xen_pcibk_backend && xen_pcibk_backend->free) + return xen_pcibk_backend->release(pdev, dev); +}; + +static inline struct pci_dev * +xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain, + unsigned int bus, unsigned int devfn) +{ + if (xen_pcibk_backend && xen_pcibk_backend->get) + return xen_pcibk_backend->get(pdev, domain, bus, devfn); + return NULL; +}; +/** +* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in xen_pcibk +* before sending aer request to pcifront, so that guest could identify +* device, coopearte with xen_pcibk to finish aer recovery job if device driver +* has the capability +*/ +static inline int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, + struct xen_pcibk_device *pdev, + unsigned int *domain, + unsigned int *bus, + unsigned int *devfn) +{ + if (xen_pcibk_backend && xen_pcibk_backend->find) + return xen_pcibk_backend->find(pcidev, pdev, domain, bus, + devfn); + return -1; +}; +static inline int xen_pcibk_init_devices(struct xen_pcibk_device *pdev) +{ + if (xen_pcibk_backend && xen_pcibk_backend->init) + return xen_pcibk_backend->init(pdev); + return -1; +}; +static inline int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, + publish_pci_root_cb cb) +{ + if (xen_pcibk_backend && xen_pcibk_backend->publish) + return xen_pcibk_backend->publish(pdev, cb); + return -1; +}; +static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev) +{ + if (xen_pcibk_backend && xen_pcibk_backend->free) + return xen_pcibk_backend->free(pdev); +}; +/* Handles events from front-end */ +irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id); +void xen_pcibk_do_op(struct work_struct *data); + +int xen_pcibk_xenbus_register(void); +void xen_pcibk_xenbus_unregister(void); + +extern int verbose_request; + +void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev); +#endif + +/* Handles shared IRQs that can to device domain and control domain. */ +void xen_pcibk_irq_handler(struct pci_dev *dev, int reset); diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c new file mode 100644 index 000000000000..8c95c3415b75 --- /dev/null +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -0,0 +1,384 @@ +/* + * PCI Backend Operations - respond to PCI requests from Frontend + * + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> + */ +#include <linux/module.h> +#include <linux/wait.h> +#include <linux/bitops.h> +#include <xen/events.h> +#include <linux/sched.h> +#include "pciback.h" + +#define DRV_NAME "xen-pciback" +int verbose_request; +module_param(verbose_request, int, 0644); + +static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id); + +/* Ensure a device is has the fake IRQ handler "turned on/off" and is + * ready to be exported. This MUST be run after xen_pcibk_reset_device + * which does the actual PCI device enable/disable. + */ +static void xen_pcibk_control_isr(struct pci_dev *dev, int reset) +{ + struct xen_pcibk_dev_data *dev_data; + int rc; + int enable = 0; + + dev_data = pci_get_drvdata(dev); + if (!dev_data) + return; + + /* We don't deal with bridges */ + if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) + return; + + if (reset) { + dev_data->enable_intx = 0; + dev_data->ack_intr = 0; + } + enable = dev_data->enable_intx; + + /* Asked to disable, but ISR isn't runnig */ + if (!enable && !dev_data->isr_on) + return; + + /* Squirrel away the IRQs in the dev_data. We need this + * b/c when device transitions to MSI, the dev->irq is + * overwritten with the MSI vector. + */ + if (enable) + dev_data->irq = dev->irq; + + /* + * SR-IOV devices in all use MSI-X and have no legacy + * interrupts, so inhibit creating a fake IRQ handler for them. + */ + if (dev_data->irq == 0) + goto out; + + dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n", + dev_data->irq_name, + dev_data->irq, + pci_is_enabled(dev) ? "on" : "off", + dev->msi_enabled ? "MSI" : "", + dev->msix_enabled ? "MSI/X" : "", + dev_data->isr_on ? "enable" : "disable", + enable ? "enable" : "disable"); + + if (enable) { + rc = request_irq(dev_data->irq, + xen_pcibk_guest_interrupt, IRQF_SHARED, + dev_data->irq_name, dev); + if (rc) { + dev_err(&dev->dev, "%s: failed to install fake IRQ " \ + "handler for IRQ %d! (rc:%d)\n", + dev_data->irq_name, dev_data->irq, rc); + goto out; + } + } else { + free_irq(dev_data->irq, dev); + dev_data->irq = 0; + } + dev_data->isr_on = enable; + dev_data->ack_intr = enable; +out: + dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n", + dev_data->irq_name, + dev_data->irq, + pci_is_enabled(dev) ? "on" : "off", + dev->msi_enabled ? "MSI" : "", + dev->msix_enabled ? "MSI/X" : "", + enable ? (dev_data->isr_on ? "enabled" : "failed to enable") : + (dev_data->isr_on ? "failed to disable" : "disabled")); +} + +/* Ensure a device is "turned off" and ready to be exported. + * (Also see xen_pcibk_config_reset to ensure virtual configuration space is + * ready to be re-exported) + */ +void xen_pcibk_reset_device(struct pci_dev *dev) +{ + u16 cmd; + + xen_pcibk_control_isr(dev, 1 /* reset device */); + + /* Disable devices (but not bridges) */ + if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) { +#ifdef CONFIG_PCI_MSI + /* The guest could have been abruptly killed without + * disabling MSI/MSI-X interrupts.*/ + if (dev->msix_enabled) + pci_disable_msix(dev); + if (dev->msi_enabled) + pci_disable_msi(dev); +#endif + pci_disable_device(dev); + + pci_write_config_word(dev, PCI_COMMAND, 0); + + dev->is_busmaster = 0; + } else { + pci_read_config_word(dev, PCI_COMMAND, &cmd); + if (cmd & (PCI_COMMAND_INVALIDATE)) { + cmd &= ~(PCI_COMMAND_INVALIDATE); + pci_write_config_word(dev, PCI_COMMAND, cmd); + + dev->is_busmaster = 0; + } + } +} + +#ifdef CONFIG_PCI_MSI +static +int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op) +{ + struct xen_pcibk_dev_data *dev_data; + int otherend = pdev->xdev->otherend_id; + int status; + + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev)); + + status = pci_enable_msi(dev); + + if (status) { + printk(KERN_ERR "error enable msi for guest %x status %x\n", + otherend, status); + op->value = 0; + return XEN_PCI_ERR_op_failed; + } + + /* The value the guest needs is actually the IDT vector, not the + * the local domain's IRQ number. */ + + op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), + op->value); + + dev_data = pci_get_drvdata(dev); + if (dev_data) + dev_data->ack_intr = 0; + + return 0; +} + +static +int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op) +{ + struct xen_pcibk_dev_data *dev_data; + + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n", + pci_name(dev)); + pci_disable_msi(dev); + + op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), + op->value); + dev_data = pci_get_drvdata(dev); + if (dev_data) + dev_data->ack_intr = 1; + return 0; +} + +static +int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op) +{ + struct xen_pcibk_dev_data *dev_data; + int i, result; + struct msix_entry *entries; + + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n", + pci_name(dev)); + if (op->value > SH_INFO_MAX_VEC) + return -EINVAL; + + entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); + if (entries == NULL) + return -ENOMEM; + + for (i = 0; i < op->value; i++) { + entries[i].entry = op->msix_entries[i].entry; + entries[i].vector = op->msix_entries[i].vector; + } + + result = pci_enable_msix(dev, entries, op->value); + + if (result == 0) { + for (i = 0; i < op->value; i++) { + op->msix_entries[i].entry = entries[i].entry; + if (entries[i].vector) + op->msix_entries[i].vector = + xen_pirq_from_irq(entries[i].vector); + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: " \ + "MSI-X[%d]: %d\n", + pci_name(dev), i, + op->msix_entries[i].vector); + } + } else { + printk(KERN_WARNING DRV_NAME ": %s: failed to enable MSI-X: err %d!\n", + pci_name(dev), result); + } + kfree(entries); + + op->value = result; + dev_data = pci_get_drvdata(dev); + if (dev_data) + dev_data->ack_intr = 0; + + return result; +} + +static +int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev, + struct pci_dev *dev, struct xen_pci_op *op) +{ + struct xen_pcibk_dev_data *dev_data; + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n", + pci_name(dev)); + pci_disable_msix(dev); + + /* + * SR-IOV devices (which don't have any legacy IRQ) have + * an undefined IRQ value of zero. + */ + op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; + if (unlikely(verbose_request)) + printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", pci_name(dev), + op->value); + dev_data = pci_get_drvdata(dev); + if (dev_data) + dev_data->ack_intr = 1; + return 0; +} +#endif +/* +* Now the same evtchn is used for both pcifront conf_read_write request +* as well as pcie aer front end ack. We use a new work_queue to schedule +* xen_pcibk conf_read_write service for avoiding confict with aer_core +* do_recovery job which also use the system default work_queue +*/ +void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) +{ + /* Check that frontend is requesting an operation and that we are not + * already processing a request */ + if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) + && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) { + queue_work(xen_pcibk_wq, &pdev->op_work); + } + /*_XEN_PCIB_active should have been cleared by pcifront. And also make + sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/ + if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) + && test_bit(_PCIB_op_pending, &pdev->flags)) { + wake_up(&xen_pcibk_aer_wait_queue); + } +} + +/* Performing the configuration space reads/writes must not be done in atomic + * context because some of the pci_* functions can sleep (mostly due to ACPI + * use of semaphores). This function is intended to be called from a work + * queue in process context taking a struct xen_pcibk_device as a parameter */ + +void xen_pcibk_do_op(struct work_struct *data) +{ + struct xen_pcibk_device *pdev = + container_of(data, struct xen_pcibk_device, op_work); + struct pci_dev *dev; + struct xen_pcibk_dev_data *dev_data = NULL; + struct xen_pci_op *op = &pdev->sh_info->op; + int test_intx = 0; + + dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn); + + if (dev == NULL) + op->err = XEN_PCI_ERR_dev_not_found; + else { + dev_data = pci_get_drvdata(dev); + if (dev_data) + test_intx = dev_data->enable_intx; + switch (op->cmd) { + case XEN_PCI_OP_conf_read: + op->err = xen_pcibk_config_read(dev, + op->offset, op->size, &op->value); + break; + case XEN_PCI_OP_conf_write: + op->err = xen_pcibk_config_write(dev, + op->offset, op->size, op->value); + break; +#ifdef CONFIG_PCI_MSI + case XEN_PCI_OP_enable_msi: + op->err = xen_pcibk_enable_msi(pdev, dev, op); + break; + case XEN_PCI_OP_disable_msi: + op->err = xen_pcibk_disable_msi(pdev, dev, op); + break; + case XEN_PCI_OP_enable_msix: + op->err = xen_pcibk_enable_msix(pdev, dev, op); + break; + case XEN_PCI_OP_disable_msix: + op->err = xen_pcibk_disable_msix(pdev, dev, op); + break; +#endif + default: + op->err = XEN_PCI_ERR_not_implemented; + break; + } + } + if (!op->err && dev && dev_data) { + /* Transition detected */ + if ((dev_data->enable_intx != test_intx)) + xen_pcibk_control_isr(dev, 0 /* no reset */); + } + /* Tell the driver domain that we're done. */ + wmb(); + clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); + notify_remote_via_irq(pdev->evtchn_irq); + + /* Mark that we're done. */ + smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */ + clear_bit(_PDEVF_op_active, &pdev->flags); + smp_mb__after_clear_bit(); /* /before/ final check for work */ + + /* Check to see if the driver domain tried to start another request in + * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. + */ + xen_pcibk_test_and_schedule_op(pdev); +} + +irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id) +{ + struct xen_pcibk_device *pdev = dev_id; + + xen_pcibk_test_and_schedule_op(pdev); + + return IRQ_HANDLED; +} +static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id) +{ + struct pci_dev *dev = (struct pci_dev *)dev_id; + struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); + + if (dev_data->isr_on && dev_data->ack_intr) { + dev_data->handled++; + if ((dev_data->handled % 1000) == 0) { + if (xen_test_irq_shared(irq)) { + printk(KERN_INFO "%s IRQ line is not shared " + "with other domains. Turning ISR off\n", + dev_data->irq_name); + dev_data->ack_intr = 0; + } + } + return IRQ_HANDLED; + } + return IRQ_NONE; +} diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c new file mode 100644 index 000000000000..4a42cfb0959d --- /dev/null +++ b/drivers/xen/xen-pciback/vpci.c @@ -0,0 +1,259 @@ +/* + * PCI Backend - Provides a Virtual PCI bus (with real devices) + * to the frontend + * + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> + */ + +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/pci.h> +#include <linux/spinlock.h> +#include "pciback.h" + +#define PCI_SLOT_MAX 32 +#define DRV_NAME "xen-pciback" + +struct vpci_dev_data { + /* Access to dev_list must be protected by lock */ + struct list_head dev_list[PCI_SLOT_MAX]; + spinlock_t lock; +}; + +static inline struct list_head *list_first(struct list_head *head) +{ + return head->next; +} + +static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, + unsigned int domain, + unsigned int bus, + unsigned int devfn) +{ + struct pci_dev_entry *entry; + struct pci_dev *dev = NULL; + struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; + unsigned long flags; + + if (domain != 0 || bus != 0) + return NULL; + + if (PCI_SLOT(devfn) < PCI_SLOT_MAX) { + spin_lock_irqsave(&vpci_dev->lock, flags); + + list_for_each_entry(entry, + &vpci_dev->dev_list[PCI_SLOT(devfn)], + list) { + if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) { + dev = entry->dev; + break; + } + } + + spin_unlock_irqrestore(&vpci_dev->lock, flags); + } + return dev; +} + +static inline int match_slot(struct pci_dev *l, struct pci_dev *r) +{ + if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus) + && l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn)) + return 1; + + return 0; +} + +static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, + struct pci_dev *dev, int devid, + publish_pci_dev_cb publish_cb) +{ + int err = 0, slot, func = -1; + struct pci_dev_entry *t, *dev_entry; + struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; + unsigned long flags; + + if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) { + err = -EFAULT; + xenbus_dev_fatal(pdev->xdev, err, + "Can't export bridges on the virtual PCI bus"); + goto out; + } + + dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL); + if (!dev_entry) { + err = -ENOMEM; + xenbus_dev_fatal(pdev->xdev, err, + "Error adding entry to virtual PCI bus"); + goto out; + } + + dev_entry->dev = dev; + + spin_lock_irqsave(&vpci_dev->lock, flags); + + /* Keep multi-function devices together on the virtual PCI bus */ + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { + if (!list_empty(&vpci_dev->dev_list[slot])) { + t = list_entry(list_first(&vpci_dev->dev_list[slot]), + struct pci_dev_entry, list); + + if (match_slot(dev, t->dev)) { + pr_info(DRV_NAME ": vpci: %s: " + "assign to virtual slot %d func %d\n", + pci_name(dev), slot, + PCI_FUNC(dev->devfn)); + list_add_tail(&dev_entry->list, + &vpci_dev->dev_list[slot]); + func = PCI_FUNC(dev->devfn); + goto unlock; + } + } + } + + /* Assign to a new slot on the virtual PCI bus */ + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { + if (list_empty(&vpci_dev->dev_list[slot])) { + printk(KERN_INFO DRV_NAME + ": vpci: %s: assign to virtual slot %d\n", + pci_name(dev), slot); + list_add_tail(&dev_entry->list, + &vpci_dev->dev_list[slot]); + func = PCI_FUNC(dev->devfn); + goto unlock; + } + } + + err = -ENOMEM; + xenbus_dev_fatal(pdev->xdev, err, + "No more space on root virtual PCI bus"); + +unlock: + spin_unlock_irqrestore(&vpci_dev->lock, flags); + + /* Publish this device. */ + if (!err) + err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid); + +out: + return err; +} + +static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, + struct pci_dev *dev) +{ + int slot; + struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; + struct pci_dev *found_dev = NULL; + unsigned long flags; + + spin_lock_irqsave(&vpci_dev->lock, flags); + + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { + struct pci_dev_entry *e, *tmp; + list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot], + list) { + if (e->dev == dev) { + list_del(&e->list); + found_dev = e->dev; + kfree(e); + goto out; + } + } + } + +out: + spin_unlock_irqrestore(&vpci_dev->lock, flags); + + if (found_dev) + pcistub_put_pci_dev(found_dev); +} + +static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev) +{ + int slot; + struct vpci_dev_data *vpci_dev; + + vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL); + if (!vpci_dev) + return -ENOMEM; + + spin_lock_init(&vpci_dev->lock); + + for (slot = 0; slot < PCI_SLOT_MAX; slot++) + INIT_LIST_HEAD(&vpci_dev->dev_list[slot]); + + pdev->pci_dev_data = vpci_dev; + + return 0; +} + +static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, + publish_pci_root_cb publish_cb) +{ + /* The Virtual PCI bus has only one root */ + return publish_cb(pdev, 0, 0); +} + +static void __xen_pcibk_release_devices(struct xen_pcibk_device *pdev) +{ + int slot; + struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; + + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { + struct pci_dev_entry *e, *tmp; + list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot], + list) { + list_del(&e->list); + pcistub_put_pci_dev(e->dev); + kfree(e); + } + } + + kfree(vpci_dev); + pdev->pci_dev_data = NULL; +} + +static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, + struct xen_pcibk_device *pdev, + unsigned int *domain, unsigned int *bus, + unsigned int *devfn) +{ + struct pci_dev_entry *entry; + struct pci_dev *dev = NULL; + struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; + unsigned long flags; + int found = 0, slot; + + spin_lock_irqsave(&vpci_dev->lock, flags); + for (slot = 0; slot < PCI_SLOT_MAX; slot++) { + list_for_each_entry(entry, + &vpci_dev->dev_list[slot], + list) { + dev = entry->dev; + if (dev && dev->bus->number == pcidev->bus->number + && pci_domain_nr(dev->bus) == + pci_domain_nr(pcidev->bus) + && dev->devfn == pcidev->devfn) { + found = 1; + *domain = 0; + *bus = 0; + *devfn = PCI_DEVFN(slot, + PCI_FUNC(pcidev->devfn)); + } + } + } + spin_unlock_irqrestore(&vpci_dev->lock, flags); + return found; +} + +struct xen_pcibk_backend xen_pcibk_vpci_backend = { + .name = "vpci", + .init = __xen_pcibk_init_devices, + .free = __xen_pcibk_release_devices, + .find = __xen_pcibk_get_pcifront_dev, + .publish = __xen_pcibk_publish_pci_roots, + .release = __xen_pcibk_release_pci_dev, + .add = __xen_pcibk_add_pci_dev, + .get = __xen_pcibk_get_pci_dev, +}; diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c new file mode 100644 index 000000000000..206c4ce030bc --- /dev/null +++ b/drivers/xen/xen-pciback/xenbus.c @@ -0,0 +1,749 @@ +/* + * PCI Backend Xenbus Setup - handles setup with frontend and xend + * + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> + */ +#include <linux/module.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/vmalloc.h> +#include <linux/workqueue.h> +#include <xen/xenbus.h> +#include <xen/events.h> +#include <asm/xen/pci.h> +#include <linux/workqueue.h> +#include "pciback.h" + +#define DRV_NAME "xen-pciback" +#define INVALID_EVTCHN_IRQ (-1) +struct workqueue_struct *xen_pcibk_wq; + +static int __read_mostly passthrough; +module_param(passthrough, bool, S_IRUGO); +MODULE_PARM_DESC(passthrough, + "Option to specify how to export PCI topology to guest:\n"\ + " 0 - (default) Hide the true PCI topology and makes the frontend\n"\ + " there is a single PCI bus with only the exported devices on it.\n"\ + " For example, a device at 03:05.0 will be re-assigned to 00:00.0\n"\ + " while second device at 02:1a.1 will be re-assigned to 00:01.1.\n"\ + " 1 - Passthrough provides a real view of the PCI topology to the\n"\ + " frontend (for example, a device at 06:01.b will still appear at\n"\ + " 06:01.b to the frontend). This is similar to how Xen 2.0.x\n"\ + " exposed PCI devices to its driver domains. This may be required\n"\ + " for drivers which depend on finding their hardward in certain\n"\ + " bus/slot locations."); + +static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev) +{ + struct xen_pcibk_device *pdev; + + pdev = kzalloc(sizeof(struct xen_pcibk_device), GFP_KERNEL); + if (pdev == NULL) + goto out; + dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev); + + pdev->xdev = xdev; + dev_set_drvdata(&xdev->dev, pdev); + + spin_lock_init(&pdev->dev_lock); + + pdev->sh_info = NULL; + pdev->evtchn_irq = INVALID_EVTCHN_IRQ; + pdev->be_watching = 0; + + INIT_WORK(&pdev->op_work, xen_pcibk_do_op); + + if (xen_pcibk_init_devices(pdev)) { + kfree(pdev); + pdev = NULL; + } +out: + return pdev; +} + +static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev) +{ + spin_lock(&pdev->dev_lock); + + /* Ensure the guest can't trigger our handler before removing devices */ + if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) { + unbind_from_irqhandler(pdev->evtchn_irq, pdev); + pdev->evtchn_irq = INVALID_EVTCHN_IRQ; + } + spin_unlock(&pdev->dev_lock); + + /* If the driver domain started an op, make sure we complete it + * before releasing the shared memory */ + + /* Note, the workqueue does not use spinlocks at all.*/ + flush_workqueue(xen_pcibk_wq); + + spin_lock(&pdev->dev_lock); + if (pdev->sh_info != NULL) { + xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info); + pdev->sh_info = NULL; + } + spin_unlock(&pdev->dev_lock); + +} + +static void free_pdev(struct xen_pcibk_device *pdev) +{ + if (pdev->be_watching) { + unregister_xenbus_watch(&pdev->be_watch); + pdev->be_watching = 0; + } + + xen_pcibk_disconnect(pdev); + + xen_pcibk_release_devices(pdev); + + dev_set_drvdata(&pdev->xdev->dev, NULL); + pdev->xdev = NULL; + + kfree(pdev); +} + +static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, + int remote_evtchn) +{ + int err = 0; + void *vaddr; + + dev_dbg(&pdev->xdev->dev, + "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n", + gnt_ref, remote_evtchn); + + err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr); + if (err < 0) { + xenbus_dev_fatal(pdev->xdev, err, + "Error mapping other domain page in ours."); + goto out; + } + + spin_lock(&pdev->dev_lock); + pdev->sh_info = vaddr; + spin_unlock(&pdev->dev_lock); + + err = bind_interdomain_evtchn_to_irqhandler( + pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event, + 0, DRV_NAME, pdev); + if (err < 0) { + xenbus_dev_fatal(pdev->xdev, err, + "Error binding event channel to IRQ"); + goto out; + } + + spin_lock(&pdev->dev_lock); + pdev->evtchn_irq = err; + spin_unlock(&pdev->dev_lock); + err = 0; + + dev_dbg(&pdev->xdev->dev, "Attached!\n"); +out: + return err; +} + +static int xen_pcibk_attach(struct xen_pcibk_device *pdev) +{ + int err = 0; + int gnt_ref, remote_evtchn; + char *magic = NULL; + + + /* Make sure we only do this setup once */ + if (xenbus_read_driver_state(pdev->xdev->nodename) != + XenbusStateInitialised) + goto out; + + /* Wait for frontend to state that it has published the configuration */ + if (xenbus_read_driver_state(pdev->xdev->otherend) != + XenbusStateInitialised) + goto out; + + dev_dbg(&pdev->xdev->dev, "Reading frontend config\n"); + + err = xenbus_gather(XBT_NIL, pdev->xdev->otherend, + "pci-op-ref", "%u", &gnt_ref, + "event-channel", "%u", &remote_evtchn, + "magic", NULL, &magic, NULL); + if (err) { + /* If configuration didn't get read correctly, wait longer */ + xenbus_dev_fatal(pdev->xdev, err, + "Error reading configuration from frontend"); + goto out; + } + + if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) { + xenbus_dev_fatal(pdev->xdev, -EFAULT, + "version mismatch (%s/%s) with pcifront - " + "halting xen_pcibk", + magic, XEN_PCI_MAGIC); + goto out; + } + + err = xen_pcibk_do_attach(pdev, gnt_ref, remote_evtchn); + if (err) + goto out; + + dev_dbg(&pdev->xdev->dev, "Connecting...\n"); + + err = xenbus_switch_state(pdev->xdev, XenbusStateConnected); + if (err) + xenbus_dev_fatal(pdev->xdev, err, + "Error switching to connected state!"); + + dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err); +out: + + kfree(magic); + + return err; +} + +static int xen_pcibk_publish_pci_dev(struct xen_pcibk_device *pdev, + unsigned int domain, unsigned int bus, + unsigned int devfn, unsigned int devid) +{ + int err; + int len; + char str[64]; + + len = snprintf(str, sizeof(str), "vdev-%d", devid); + if (unlikely(len >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, + "%04x:%02x:%02x.%02x", domain, bus, + PCI_SLOT(devfn), PCI_FUNC(devfn)); + +out: + return err; +} + +static int xen_pcibk_export_device(struct xen_pcibk_device *pdev, + int domain, int bus, int slot, int func, + int devid) +{ + struct pci_dev *dev; + int err = 0; + + dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n", + domain, bus, slot, func); + + dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func); + if (!dev) { + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Couldn't locate PCI device " + "(%04x:%02x:%02x.%01x)! " + "perhaps already in-use?", + domain, bus, slot, func); + goto out; + } + + err = xen_pcibk_add_pci_dev(pdev, dev, devid, + xen_pcibk_publish_pci_dev); + if (err) + goto out; + + dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id); + if (xen_register_device_domain_owner(dev, + pdev->xdev->otherend_id) != 0) { + dev_err(&dev->dev, "device has been assigned to another " \ + "domain! Over-writting the ownership, but beware.\n"); + xen_unregister_device_domain_owner(dev); + xen_register_device_domain_owner(dev, pdev->xdev->otherend_id); + } + + /* TODO: It'd be nice to export a bridge and have all of its children + * get exported with it. This may be best done in xend (which will + * have to calculate resource usage anyway) but we probably want to + * put something in here to ensure that if a bridge gets given to a + * driver domain, that all devices under that bridge are not given + * to other driver domains (as he who controls the bridge can disable + * it and stop the other devices from working). + */ +out: + return err; +} + +static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev, + int domain, int bus, int slot, int func) +{ + int err = 0; + struct pci_dev *dev; + + dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n", + domain, bus, slot, func); + + dev = xen_pcibk_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func)); + if (!dev) { + err = -EINVAL; + dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device " + "(%04x:%02x:%02x.%01x)! not owned by this domain\n", + domain, bus, slot, func); + goto out; + } + + dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id); + xen_unregister_device_domain_owner(dev); + + xen_pcibk_release_pci_dev(pdev, dev); + +out: + return err; +} + +static int xen_pcibk_publish_pci_root(struct xen_pcibk_device *pdev, + unsigned int domain, unsigned int bus) +{ + unsigned int d, b; + int i, root_num, len, err; + char str[64]; + + dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n"); + + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, + "root_num", "%d", &root_num); + if (err == 0 || err == -ENOENT) + root_num = 0; + else if (err < 0) + goto out; + + /* Verify that we haven't already published this pci root */ + for (i = 0; i < root_num; i++) { + len = snprintf(str, sizeof(str), "root-%d", i); + if (unlikely(len >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, + str, "%x:%x", &d, &b); + if (err < 0) + goto out; + if (err != 2) { + err = -EINVAL; + goto out; + } + + if (d == domain && b == bus) { + err = 0; + goto out; + } + } + + len = snprintf(str, sizeof(str), "root-%d", root_num); + if (unlikely(len >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + + dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n", + root_num, domain, bus); + + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, + "%04x:%02x", domain, bus); + if (err) + goto out; + + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, + "root_num", "%d", (root_num + 1)); + +out: + return err; +} + +static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev) +{ + int err = 0; + int num_devs; + int domain, bus, slot, func; + int substate; + int i, len; + char state_str[64]; + char dev_str[64]; + + + dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n"); + + /* Make sure we only reconfigure once */ + if (xenbus_read_driver_state(pdev->xdev->nodename) != + XenbusStateReconfiguring) + goto out; + + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d", + &num_devs); + if (err != 1) { + if (err >= 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading number of devices"); + goto out; + } + + for (i = 0; i < num_devs; i++) { + len = snprintf(state_str, sizeof(state_str), "state-%d", i); + if (unlikely(len >= (sizeof(state_str) - 1))) { + err = -ENOMEM; + xenbus_dev_fatal(pdev->xdev, err, + "String overflow while reading " + "configuration"); + goto out; + } + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str, + "%d", &substate); + if (err != 1) + substate = XenbusStateUnknown; + + switch (substate) { + case XenbusStateInitialising: + dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i); + + len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i); + if (unlikely(len >= (sizeof(dev_str) - 1))) { + err = -ENOMEM; + xenbus_dev_fatal(pdev->xdev, err, + "String overflow while " + "reading configuration"); + goto out; + } + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, + dev_str, "%x:%x:%x.%x", + &domain, &bus, &slot, &func); + if (err < 0) { + xenbus_dev_fatal(pdev->xdev, err, + "Error reading device " + "configuration"); + goto out; + } + if (err != 4) { + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error parsing pci device " + "configuration"); + goto out; + } + + err = xen_pcibk_export_device(pdev, domain, bus, slot, + func, i); + if (err) + goto out; + + /* Publish pci roots. */ + err = xen_pcibk_publish_pci_roots(pdev, + xen_pcibk_publish_pci_root); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error while publish PCI root" + "buses for frontend"); + goto out; + } + + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, + state_str, "%d", + XenbusStateInitialised); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error switching substate of " + "dev-%d\n", i); + goto out; + } + break; + + case XenbusStateClosing: + dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i); + + len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i); + if (unlikely(len >= (sizeof(dev_str) - 1))) { + err = -ENOMEM; + xenbus_dev_fatal(pdev->xdev, err, + "String overflow while " + "reading configuration"); + goto out; + } + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, + dev_str, "%x:%x:%x.%x", + &domain, &bus, &slot, &func); + if (err < 0) { + xenbus_dev_fatal(pdev->xdev, err, + "Error reading device " + "configuration"); + goto out; + } + if (err != 4) { + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error parsing pci device " + "configuration"); + goto out; + } + + err = xen_pcibk_remove_device(pdev, domain, bus, slot, + func); + if (err) + goto out; + + /* TODO: If at some point we implement support for pci + * root hot-remove on pcifront side, we'll need to + * remove unnecessary xenstore nodes of pci roots here. + */ + + break; + + default: + break; + } + } + + err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error switching to reconfigured state!"); + goto out; + } + +out: + return 0; +} + +static void xen_pcibk_frontend_changed(struct xenbus_device *xdev, + enum xenbus_state fe_state) +{ + struct xen_pcibk_device *pdev = dev_get_drvdata(&xdev->dev); + + dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state); + + switch (fe_state) { + case XenbusStateInitialised: + xen_pcibk_attach(pdev); + break; + + case XenbusStateReconfiguring: + xen_pcibk_reconfigure(pdev); + break; + + case XenbusStateConnected: + /* pcifront switched its state from reconfiguring to connected. + * Then switch to connected state. + */ + xenbus_switch_state(xdev, XenbusStateConnected); + break; + + case XenbusStateClosing: + xen_pcibk_disconnect(pdev); + xenbus_switch_state(xdev, XenbusStateClosing); + break; + + case XenbusStateClosed: + xen_pcibk_disconnect(pdev); + xenbus_switch_state(xdev, XenbusStateClosed); + if (xenbus_dev_is_online(xdev)) + break; + /* fall through if not online */ + case XenbusStateUnknown: + dev_dbg(&xdev->dev, "frontend is gone! unregister device\n"); + device_unregister(&xdev->dev); + break; + + default: + break; + } +} + +static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev) +{ + /* Get configuration from xend (if available now) */ + int domain, bus, slot, func; + int err = 0; + int i, num_devs; + char dev_str[64]; + char state_str[64]; + + /* It's possible we could get the call to setup twice, so make sure + * we're not already connected. + */ + if (xenbus_read_driver_state(pdev->xdev->nodename) != + XenbusStateInitWait) + goto out; + + dev_dbg(&pdev->xdev->dev, "getting be setup\n"); + + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d", + &num_devs); + if (err != 1) { + if (err >= 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading number of devices"); + goto out; + } + + for (i = 0; i < num_devs; i++) { + int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i); + if (unlikely(l >= (sizeof(dev_str) - 1))) { + err = -ENOMEM; + xenbus_dev_fatal(pdev->xdev, err, + "String overflow while reading " + "configuration"); + goto out; + } + + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str, + "%x:%x:%x.%x", &domain, &bus, &slot, &func); + if (err < 0) { + xenbus_dev_fatal(pdev->xdev, err, + "Error reading device configuration"); + goto out; + } + if (err != 4) { + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error parsing pci device " + "configuration"); + goto out; + } + + err = xen_pcibk_export_device(pdev, domain, bus, slot, func, i); + if (err) + goto out; + + /* Switch substate of this device. */ + l = snprintf(state_str, sizeof(state_str), "state-%d", i); + if (unlikely(l >= (sizeof(state_str) - 1))) { + err = -ENOMEM; + xenbus_dev_fatal(pdev->xdev, err, + "String overflow while reading " + "configuration"); + goto out; + } + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str, + "%d", XenbusStateInitialised); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, "Error switching " + "substate of dev-%d\n", i); + goto out; + } + } + + err = xen_pcibk_publish_pci_roots(pdev, xen_pcibk_publish_pci_root); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error while publish PCI root buses " + "for frontend"); + goto out; + } + + err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised); + if (err) + xenbus_dev_fatal(pdev->xdev, err, + "Error switching to initialised state!"); + +out: + if (!err) + /* see if pcifront is already configured (if not, we'll wait) */ + xen_pcibk_attach(pdev); + + return err; +} + +static void xen_pcibk_be_watch(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + struct xen_pcibk_device *pdev = + container_of(watch, struct xen_pcibk_device, be_watch); + + switch (xenbus_read_driver_state(pdev->xdev->nodename)) { + case XenbusStateInitWait: + xen_pcibk_setup_backend(pdev); + break; + + default: + break; + } +} + +static int xen_pcibk_xenbus_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int err = 0; + struct xen_pcibk_device *pdev = alloc_pdev(dev); + + if (pdev == NULL) { + err = -ENOMEM; + xenbus_dev_fatal(dev, err, + "Error allocating xen_pcibk_device struct"); + goto out; + } + + /* wait for xend to configure us */ + err = xenbus_switch_state(dev, XenbusStateInitWait); + if (err) + goto out; + + /* watch the backend node for backend configuration information */ + err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch, + xen_pcibk_be_watch); + if (err) + goto out; + + pdev->be_watching = 1; + + /* We need to force a call to our callback here in case + * xend already configured us! + */ + xen_pcibk_be_watch(&pdev->be_watch, NULL, 0); + +out: + return err; +} + +static int xen_pcibk_xenbus_remove(struct xenbus_device *dev) +{ + struct xen_pcibk_device *pdev = dev_get_drvdata(&dev->dev); + + if (pdev != NULL) + free_pdev(pdev); + + return 0; +} + +static const struct xenbus_device_id xenpci_ids[] = { + {"pci"}, + {""}, +}; + +static struct xenbus_driver xenbus_xen_pcibk_driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, + .ids = xenpci_ids, + .probe = xen_pcibk_xenbus_probe, + .remove = xen_pcibk_xenbus_remove, + .otherend_changed = xen_pcibk_frontend_changed, +}; + +struct xen_pcibk_backend *xen_pcibk_backend; + +int __init xen_pcibk_xenbus_register(void) +{ + xen_pcibk_wq = create_workqueue("xen_pciback_workqueue"); + if (!xen_pcibk_wq) { + printk(KERN_ERR "%s: create" + "xen_pciback_workqueue failed\n", __func__); + return -EFAULT; + } + xen_pcibk_backend = &xen_pcibk_vpci_backend; + if (passthrough) + xen_pcibk_backend = &xen_pcibk_passthrough_backend; + pr_info(DRV_NAME ": backend is %s\n", xen_pcibk_backend->name); + return xenbus_register_backend(&xenbus_xen_pcibk_driver); +} + +void __exit xen_pcibk_xenbus_unregister(void) +{ + destroy_workqueue(xen_pcibk_wq); + xenbus_unregister_driver(&xenbus_xen_pcibk_driver); +} diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c new file mode 100644 index 000000000000..010937b5a7c9 --- /dev/null +++ b/drivers/xen/xen-selfballoon.c @@ -0,0 +1,485 @@ +/****************************************************************************** + * Xen selfballoon driver (and optional frontswap self-shrinking driver) + * + * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp. + * + * This code complements the cleancache and frontswap patchsets to optimize + * support for Xen Transcendent Memory ("tmem"). The policy it implements + * is rudimentary and will likely improve over time, but it does work well + * enough today. + * + * Two functionalities are implemented here which both use "control theory" + * (feedback) to optimize memory utilization. In a virtualized environment + * such as Xen, RAM is often a scarce resource and we would like to ensure + * that each of a possibly large number of virtual machines is using RAM + * efficiently, i.e. using as little as possible when under light load + * and obtaining as much as possible when memory demands are high. + * Since RAM needs vary highly dynamically and sometimes dramatically, + * "hysteresis" is used, that is, memory target is determined not just + * on current data but also on past data stored in the system. + * + * "Selfballooning" creates memory pressure by managing the Xen balloon + * driver to decrease and increase available kernel memory, driven + * largely by the target value of "Committed_AS" (see /proc/meminfo). + * Since Committed_AS does not account for clean mapped pages (i.e. pages + * in RAM that are identical to pages on disk), selfballooning has the + * affect of pushing less frequently used clean pagecache pages out of + * kernel RAM and, presumably using cleancache, into Xen tmem where + * Xen can more efficiently optimize RAM utilization for such pages. + * + * When kernel memory demand unexpectedly increases faster than Xen, via + * the selfballoon driver, is able to (or chooses to) provide usable RAM, + * the kernel may invoke swapping. In most cases, frontswap is able + * to absorb this swapping into Xen tmem. However, due to the fact + * that the kernel swap subsystem assumes swapping occurs to a disk, + * swapped pages may sit on the disk for a very long time; even if + * the kernel knows the page will never be used again. This is because + * the disk space costs very little and can be overwritten when + * necessary. When such stale pages are in frontswap, however, they + * are taking up valuable real estate. "Frontswap selfshrinking" works + * to resolve this: When frontswap activity is otherwise stable + * and the guest kernel is not under memory pressure, the "frontswap + * selfshrinking" accounts for this by providing pressure to remove some + * pages from frontswap and return them to kernel memory. + * + * For both "selfballooning" and "frontswap-selfshrinking", a worker + * thread is used and sysfs tunables are provided to adjust the frequency + * and rate of adjustments to achieve the goal, as well as to disable one + * or both functions independently. + * + * While some argue that this functionality can and should be implemented + * in userspace, it has been observed that bad things happen (e.g. OOMs). + * + * System configuration note: Selfballooning should not be enabled on + * systems without a sufficiently large swap device configured; for best + * results, it is recommended that total swap be increased by the size + * of the guest memory. Also, while technically not required to be + * configured, it is highly recommended that frontswap also be configured + * and enabled when selfballooning is running. So, selfballooning + * is disabled by default if frontswap is not configured and can only + * be enabled with the "selfballooning" kernel boot option; similarly + * selfballooning is enabled by default if frontswap is configured and + * can be disabled with the "noselfballooning" kernel boot option. Finally, + * when frontswap is configured, frontswap-selfshrinking can be disabled + * with the "noselfshrink" kernel boot option. + * + * Selfballooning is disallowed in domain0 and force-disabled. + * + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/mman.h> + +#include <xen/balloon.h> + +#include <xen/tmem.h> + +/* Enable/disable with sysfs. */ +static int xen_selfballooning_enabled __read_mostly; + +/* + * Controls rate at which memory target (this iteration) approaches + * ultimate goal when memory need is increasing (up-hysteresis) or + * decreasing (down-hysteresis). Higher values of hysteresis cause + * slower increases/decreases. The default values for the various + * parameters were deemed reasonable by experimentation, may be + * workload-dependent, and can all be adjusted via sysfs. + */ +static unsigned int selfballoon_downhysteresis __read_mostly = 8; +static unsigned int selfballoon_uphysteresis __read_mostly = 1; + +/* In HZ, controls frequency of worker invocation. */ +static unsigned int selfballoon_interval __read_mostly = 5; + +static void selfballoon_process(struct work_struct *work); +static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process); + +#ifdef CONFIG_FRONTSWAP +#include <linux/frontswap.h> + +/* Enable/disable with sysfs. */ +static bool frontswap_selfshrinking __read_mostly; + +/* Enable/disable with kernel boot option. */ +static bool use_frontswap_selfshrink __initdata = true; + +/* + * The default values for the following parameters were deemed reasonable + * by experimentation, may be workload-dependent, and can all be + * adjusted via sysfs. + */ + +/* Control rate for frontswap shrinking. Higher hysteresis is slower. */ +static unsigned int frontswap_hysteresis __read_mostly = 20; + +/* + * Number of selfballoon worker invocations to wait before observing that + * frontswap selfshrinking should commence. Note that selfshrinking does + * not use a separate worker thread. + */ +static unsigned int frontswap_inertia __read_mostly = 3; + +/* Countdown to next invocation of frontswap_shrink() */ +static unsigned long frontswap_inertia_counter; + +/* + * Invoked by the selfballoon worker thread, uses current number of pages + * in frontswap (frontswap_curr_pages()), previous status, and control + * values (hysteresis and inertia) to determine if frontswap should be + * shrunk and what the new frontswap size should be. Note that + * frontswap_shrink is essentially a partial swapoff that immediately + * transfers pages from the "swap device" (frontswap) back into kernel + * RAM; despite the name, frontswap "shrinking" is very different from + * the "shrinker" interface used by the kernel MM subsystem to reclaim + * memory. + */ +static void frontswap_selfshrink(void) +{ + static unsigned long cur_frontswap_pages; + static unsigned long last_frontswap_pages; + static unsigned long tgt_frontswap_pages; + + last_frontswap_pages = cur_frontswap_pages; + cur_frontswap_pages = frontswap_curr_pages(); + if (!cur_frontswap_pages || + (cur_frontswap_pages > last_frontswap_pages)) { + frontswap_inertia_counter = frontswap_inertia; + return; + } + if (frontswap_inertia_counter && --frontswap_inertia_counter) + return; + if (cur_frontswap_pages <= frontswap_hysteresis) + tgt_frontswap_pages = 0; + else + tgt_frontswap_pages = cur_frontswap_pages - + (cur_frontswap_pages / frontswap_hysteresis); + frontswap_shrink(tgt_frontswap_pages); +} + +static int __init xen_nofrontswap_selfshrink_setup(char *s) +{ + use_frontswap_selfshrink = false; + return 1; +} + +__setup("noselfshrink", xen_nofrontswap_selfshrink_setup); + +/* Disable with kernel boot option. */ +static bool use_selfballooning __initdata = true; + +static int __init xen_noselfballooning_setup(char *s) +{ + use_selfballooning = false; + return 1; +} + +__setup("noselfballooning", xen_noselfballooning_setup); +#else /* !CONFIG_FRONTSWAP */ +/* Enable with kernel boot option. */ +static bool use_selfballooning __initdata = false; + +static int __init xen_selfballooning_setup(char *s) +{ + use_selfballooning = true; + return 1; +} + +__setup("selfballooning", xen_selfballooning_setup); +#endif /* CONFIG_FRONTSWAP */ + +/* + * Use current balloon size, the goal (vm_committed_as), and hysteresis + * parameters to set a new target balloon size + */ +static void selfballoon_process(struct work_struct *work) +{ + unsigned long cur_pages, goal_pages, tgt_pages; + bool reset_timer = false; + + if (xen_selfballooning_enabled) { + cur_pages = balloon_stats.current_pages; + tgt_pages = cur_pages; /* default is no change */ + goal_pages = percpu_counter_read_positive(&vm_committed_as) + + balloon_stats.current_pages - totalram_pages; +#ifdef CONFIG_FRONTSWAP + /* allow space for frontswap pages to be repatriated */ + if (frontswap_selfshrinking && frontswap_enabled) + goal_pages += frontswap_curr_pages(); +#endif + if (cur_pages > goal_pages) + tgt_pages = cur_pages - + ((cur_pages - goal_pages) / + selfballoon_downhysteresis); + else if (cur_pages < goal_pages) + tgt_pages = cur_pages + + ((goal_pages - cur_pages) / + selfballoon_uphysteresis); + /* else if cur_pages == goal_pages, no change */ + balloon_set_new_target(tgt_pages); + reset_timer = true; + } +#ifdef CONFIG_FRONTSWAP + if (frontswap_selfshrinking && frontswap_enabled) { + frontswap_selfshrink(); + reset_timer = true; + } +#endif + if (reset_timer) + schedule_delayed_work(&selfballoon_worker, + selfballoon_interval * HZ); +} + +#ifdef CONFIG_SYSFS + +#include <linux/sysdev.h> +#include <linux/capability.h> + +#define SELFBALLOON_SHOW(name, format, args...) \ + static ssize_t show_##name(struct sys_device *dev, \ + struct sysdev_attribute *attr, \ + char *buf) \ + { \ + return sprintf(buf, format, ##args); \ + } + +SELFBALLOON_SHOW(selfballooning, "%d\n", xen_selfballooning_enabled); + +static ssize_t store_selfballooning(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, + size_t count) +{ + bool was_enabled = xen_selfballooning_enabled; + unsigned long tmp; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + err = strict_strtoul(buf, 10, &tmp); + if (err || ((tmp != 0) && (tmp != 1))) + return -EINVAL; + + xen_selfballooning_enabled = !!tmp; + if (!was_enabled && xen_selfballooning_enabled) + schedule_delayed_work(&selfballoon_worker, + selfballoon_interval * HZ); + + return count; +} + +static SYSDEV_ATTR(selfballooning, S_IRUGO | S_IWUSR, + show_selfballooning, store_selfballooning); + +SELFBALLOON_SHOW(selfballoon_interval, "%d\n", selfballoon_interval); + +static ssize_t store_selfballoon_interval(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, + size_t count) +{ + unsigned long val; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = strict_strtoul(buf, 10, &val); + if (err || val == 0) + return -EINVAL; + selfballoon_interval = val; + return count; +} + +static SYSDEV_ATTR(selfballoon_interval, S_IRUGO | S_IWUSR, + show_selfballoon_interval, store_selfballoon_interval); + +SELFBALLOON_SHOW(selfballoon_downhys, "%d\n", selfballoon_downhysteresis); + +static ssize_t store_selfballoon_downhys(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, + size_t count) +{ + unsigned long val; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = strict_strtoul(buf, 10, &val); + if (err || val == 0) + return -EINVAL; + selfballoon_downhysteresis = val; + return count; +} + +static SYSDEV_ATTR(selfballoon_downhysteresis, S_IRUGO | S_IWUSR, + show_selfballoon_downhys, store_selfballoon_downhys); + + +SELFBALLOON_SHOW(selfballoon_uphys, "%d\n", selfballoon_uphysteresis); + +static ssize_t store_selfballoon_uphys(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, + size_t count) +{ + unsigned long val; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = strict_strtoul(buf, 10, &val); + if (err || val == 0) + return -EINVAL; + selfballoon_uphysteresis = val; + return count; +} + +static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR, + show_selfballoon_uphys, store_selfballoon_uphys); + +#ifdef CONFIG_FRONTSWAP +SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking); + +static ssize_t store_frontswap_selfshrinking(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, + size_t count) +{ + bool was_enabled = frontswap_selfshrinking; + unsigned long tmp; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = strict_strtoul(buf, 10, &tmp); + if (err || ((tmp != 0) && (tmp != 1))) + return -EINVAL; + frontswap_selfshrinking = !!tmp; + if (!was_enabled && !xen_selfballooning_enabled && + frontswap_selfshrinking) + schedule_delayed_work(&selfballoon_worker, + selfballoon_interval * HZ); + + return count; +} + +static SYSDEV_ATTR(frontswap_selfshrinking, S_IRUGO | S_IWUSR, + show_frontswap_selfshrinking, store_frontswap_selfshrinking); + +SELFBALLOON_SHOW(frontswap_inertia, "%d\n", frontswap_inertia); + +static ssize_t store_frontswap_inertia(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, + size_t count) +{ + unsigned long val; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = strict_strtoul(buf, 10, &val); + if (err || val == 0) + return -EINVAL; + frontswap_inertia = val; + frontswap_inertia_counter = val; + return count; +} + +static SYSDEV_ATTR(frontswap_inertia, S_IRUGO | S_IWUSR, + show_frontswap_inertia, store_frontswap_inertia); + +SELFBALLOON_SHOW(frontswap_hysteresis, "%d\n", frontswap_hysteresis); + +static ssize_t store_frontswap_hysteresis(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, + size_t count) +{ + unsigned long val; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = strict_strtoul(buf, 10, &val); + if (err || val == 0) + return -EINVAL; + frontswap_hysteresis = val; + return count; +} + +static SYSDEV_ATTR(frontswap_hysteresis, S_IRUGO | S_IWUSR, + show_frontswap_hysteresis, store_frontswap_hysteresis); + +#endif /* CONFIG_FRONTSWAP */ + +static struct attribute *selfballoon_attrs[] = { + &attr_selfballooning.attr, + &attr_selfballoon_interval.attr, + &attr_selfballoon_downhysteresis.attr, + &attr_selfballoon_uphysteresis.attr, +#ifdef CONFIG_FRONTSWAP + &attr_frontswap_selfshrinking.attr, + &attr_frontswap_hysteresis.attr, + &attr_frontswap_inertia.attr, +#endif + NULL +}; + +static struct attribute_group selfballoon_group = { + .name = "selfballoon", + .attrs = selfballoon_attrs +}; +#endif + +int register_xen_selfballooning(struct sys_device *sysdev) +{ + int error = -1; + +#ifdef CONFIG_SYSFS + error = sysfs_create_group(&sysdev->kobj, &selfballoon_group); +#endif + return error; +} +EXPORT_SYMBOL(register_xen_selfballooning); + +static int __init xen_selfballoon_init(void) +{ + bool enable = false; + + if (!xen_domain()) + return -ENODEV; + + if (xen_initial_domain()) { + pr_info("xen/balloon: Xen selfballooning driver " + "disabled for domain0.\n"); + return -ENODEV; + } + + xen_selfballooning_enabled = tmem_enabled && use_selfballooning; + if (xen_selfballooning_enabled) { + pr_info("xen/balloon: Initializing Xen " + "selfballooning driver.\n"); + enable = true; + } +#ifdef CONFIG_FRONTSWAP + frontswap_selfshrinking = tmem_enabled && use_frontswap_selfshrink; + if (frontswap_selfshrinking) { + pr_info("xen/balloon: Initializing frontswap " + "selfshrinking driver.\n"); + enable = true; + } +#endif + if (!enable) + return -ENODEV; + + schedule_delayed_work(&selfballoon_worker, selfballoon_interval * HZ); + + return 0; +} + +subsys_initcall(xen_selfballoon_init); + +MODULE_LICENSE("GPL"); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 739769551e33..bd2f90c9ac8b 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -378,26 +378,32 @@ static void xenbus_dev_release(struct device *dev) kfree(to_xenbus_device(dev)); } -static ssize_t xendev_show_nodename(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t nodename_show(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename); } -static DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL); -static ssize_t xendev_show_devtype(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t devtype_show(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype); } -static DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); -static ssize_t xendev_show_modalias(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t modalias_show(struct device *dev, + struct device_attribute *attr, char *buf) { - return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype); + return sprintf(buf, "%s:%s\n", dev->bus->name, + to_xenbus_device(dev)->devicetype); } -static DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL); + +struct device_attribute xenbus_dev_attrs[] = { + __ATTR_RO(nodename), + __ATTR_RO(devtype), + __ATTR_RO(modalias), + __ATTR_NULL +}; +EXPORT_SYMBOL_GPL(xenbus_dev_attrs); int xenbus_probe_node(struct xen_bus_type *bus, const char *type, @@ -449,25 +455,7 @@ int xenbus_probe_node(struct xen_bus_type *bus, if (err) goto fail; - err = device_create_file(&xendev->dev, &dev_attr_nodename); - if (err) - goto fail_unregister; - - err = device_create_file(&xendev->dev, &dev_attr_devtype); - if (err) - goto fail_remove_nodename; - - err = device_create_file(&xendev->dev, &dev_attr_modalias); - if (err) - goto fail_remove_devtype; - return 0; -fail_remove_devtype: - device_remove_file(&xendev->dev, &dev_attr_devtype); -fail_remove_nodename: - device_remove_file(&xendev->dev, &dev_attr_nodename); -fail_unregister: - device_unregister(&xendev->dev); fail: kfree(xendev); return err; diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h index 888b9900ca08..b814935378c7 100644 --- a/drivers/xen/xenbus/xenbus_probe.h +++ b/drivers/xen/xenbus/xenbus_probe.h @@ -48,6 +48,8 @@ struct xen_bus_type struct bus_type bus; }; +extern struct device_attribute xenbus_dev_attrs[]; + extern int xenbus_match(struct device *_dev, struct device_driver *_drv); extern int xenbus_dev_probe(struct device *_dev); extern int xenbus_dev_remove(struct device *_dev); diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index 6cf467bf63ec..60adf919d78d 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -107,6 +107,9 @@ static int xenbus_uevent_backend(struct device *dev, if (xdev == NULL) return -ENODEV; + if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype)) + return -ENOMEM; + /* stuff we want to pass to /sbin/hotplug */ if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype)) return -ENOMEM; @@ -183,10 +186,6 @@ static void frontend_changed(struct xenbus_watch *watch, xenbus_otherend_changed(watch, vec, len, 0); } -static struct device_attribute xenbus_backend_dev_attrs[] = { - __ATTR_NULL -}; - static struct xen_bus_type xenbus_backend = { .root = "backend", .levels = 3, /* backend/type/<frontend>/<id> */ @@ -200,7 +199,7 @@ static struct xen_bus_type xenbus_backend = { .probe = xenbus_dev_probe, .remove = xenbus_dev_remove, .shutdown = xenbus_dev_shutdown, - .dev_attrs = xenbus_backend_dev_attrs, + .dev_attrs = xenbus_dev_attrs, }, }; diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index b6a2690c9d49..ed2ba474a560 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -81,10 +81,6 @@ static void backend_changed(struct xenbus_watch *watch, xenbus_otherend_changed(watch, vec, len, 1); } -static struct device_attribute xenbus_frontend_dev_attrs[] = { - __ATTR_NULL -}; - static const struct dev_pm_ops xenbus_pm_ops = { .suspend = xenbus_dev_suspend, .resume = xenbus_dev_resume, @@ -106,7 +102,7 @@ static struct xen_bus_type xenbus_frontend = { .probe = xenbus_dev_probe, .remove = xenbus_dev_remove, .shutdown = xenbus_dev_shutdown, - .dev_attrs = xenbus_frontend_dev_attrs, + .dev_attrs = xenbus_dev_attrs, .pm = &xenbus_pm_ops, }, |