summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-10-11 20:29:01 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-11 20:29:01 -0400
commit81ae31d78239318610d7c2acb3e2610d622a5aa4 (patch)
tree1e31b300f1574fceaff065a9bd92460b7c466f7c
parentef4a48c513211d842c55e84f7a1c31884b91dcf7 (diff)
parent95afae481414cbdb0567bf82d5e5077c3ac9da20 (diff)
downloadlinux-81ae31d78239318610d7c2acb3e2610d622a5aa4.tar.bz2
Merge tag 'stable/for-linus-3.18-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull Xen updates from David Vrabel: "Features and fixes: - Add pvscsi frontend and backend drivers. - Remove _PAGE_IOMAP PTE flag, freeing it for alternate uses. - Try and keep memory contiguous during PV memory setup (reduces SWIOTLB usage). - Allow front/back drivers to use threaded irqs. - Support large initrds in PV guests. - Fix PVH guests in preparation for Xen 4.5" * tag 'stable/for-linus-3.18-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (22 commits) xen: remove DEFINE_XENBUS_DRIVER() macro xen/xenbus: Remove BUG_ON() when error string trucated xen/xenbus: Correct the comments for xenbus_grant_ring() x86/xen: Set EFER.NX and EFER.SCE in PVH guests xen: eliminate scalability issues from initrd handling xen: sync some headers with xen tree xen: make pvscsi frontend dependant on xenbus frontend arm{,64}/xen: Remove "EXPERIMENTAL" in the description of the Xen options xen-scsifront: don't deadlock if the ring becomes full x86: remove the Xen-specific _PAGE_IOMAP PTE flag x86/xen: do not use _PAGE_IOMAP PTE flag for I/O mappings x86: skip check for spurious faults for non-present faults xen/efi: Directly include needed headers xen-scsiback: clean up a type issue in scsiback_make_tpg() xen-scsifront: use GFP_ATOMIC under spin_lock MAINTAINERS: Add xen pvscsi maintainer xen-scsiback: Add Xen PV SCSI backend driver xen-scsifront: Add Xen PV SCSI frontend driver xen: Add Xen pvSCSI protocol description xen/events: support threaded irqs for interdomain event channels ...
-rw-r--r--MAINTAINERS9
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm64/Kconfig2
-rw-r--r--arch/x86/include/asm/pgtable_types.h11
-rw-r--r--arch/x86/mm/fault.c22
-rw-r--r--arch/x86/mm/init_32.c2
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/pci/i386.c2
-rw-r--r--arch/x86/xen/efi.c2
-rw-r--r--arch/x86/xen/enlighten.c19
-rw-r--r--arch/x86/xen/mmu.c48
-rw-r--r--arch/x86/xen/p2m.c23
-rw-r--r--arch/x86/xen/p2m.h15
-rw-r--r--arch/x86/xen/setup.c370
-rw-r--r--arch/x86/xen/smp.c29
-rw-r--r--arch/x86/xen/smp.h8
-rw-r--r--arch/x86/xen/xen-head.S36
-rw-r--r--drivers/block/xen-blkback/xenbus.c11
-rw-r--r--drivers/block/xen-blkfront.c5
-rw-r--r--drivers/char/tpm/xen-tpmfront.c13
-rw-r--r--drivers/input/misc/xen-kbdfront.c5
-rw-r--r--drivers/net/xen-netback/xenbus.c10
-rw-r--r--drivers/net/xen-netfront.c16
-rw-r--r--drivers/pci/xen-pcifront.c6
-rw-r--r--drivers/scsi/Kconfig10
-rw-r--r--drivers/scsi/Makefile1
-rw-r--r--drivers/scsi/xen-scsifront.c1026
-rw-r--r--drivers/tty/hvc/hvc_xen.c9
-rw-r--r--drivers/video/fbdev/xen-fbfront.c5
-rw-r--r--drivers/xen/Kconfig9
-rw-r--r--drivers/xen/Makefile1
-rw-r--r--drivers/xen/efi.c2
-rw-r--r--drivers/xen/events/events_base.c5
-rw-r--r--drivers/xen/grant-table.c2
-rw-r--r--drivers/xen/xen-pciback/xenbus.c6
-rw-r--r--drivers/xen/xen-scsiback.c2126
-rw-r--r--drivers/xen/xenbus/xenbus_client.c9
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c6
-rw-r--r--drivers/xen/xenbus/xenbus_probe.h4
-rw-r--r--drivers/xen/xenbus/xenbus_probe_backend.c8
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c8
-rw-r--r--include/xen/events.h2
-rw-r--r--include/xen/interface/elfnote.h48
-rw-r--r--include/xen/interface/io/vscsiif.h229
-rw-r--r--include/xen/interface/xen.h272
-rw-r--r--include/xen/xenbus.h21
46 files changed, 4214 insertions, 263 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index b28dc111d4a7..f8d882e13200 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10268,6 +10268,15 @@ S: Supported
F: drivers/block/xen-blkback/*
F: drivers/block/xen*
+XEN PVSCSI DRIVERS
+M: Juergen Gross <jgross@suse.com>
+L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
+L: linux-scsi@vger.kernel.org
+S: Supported
+F: drivers/scsi/xen-scsifront.c
+F: drivers/xen/xen-scsiback.c
+F: include/xen/interface/io/vscsiif.h
+
XEN SWIOTLB SUBSYSTEM
M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 18f392f8b744..89c4b5ccc68d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1779,7 +1779,7 @@ config XEN_DOM0
depends on XEN
config XEN
- bool "Xen guest support on ARM (EXPERIMENTAL)"
+ bool "Xen guest support on ARM"
depends on ARM && AEABI && OF
depends on CPU_V7 && !CPU_V6
depends on !GENERIC_ATOMIC64
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c49ca4c738bb..ac9afde76dea 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -349,7 +349,7 @@ config XEN_DOM0
depends on XEN
config XEN
- bool "Xen guest support on ARM64 (EXPERIMENTAL)"
+ bool "Xen guest support on ARM64"
depends on ARM64 && OF
select SWIOTLB_XEN
help
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 0f9724c9c510..07789647bf33 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -23,7 +23,6 @@
#define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1
#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1
#define _PAGE_BIT_SPLITTING _PAGE_BIT_SOFTW2 /* only valid on a PSE pmd */
-#define _PAGE_BIT_IOMAP _PAGE_BIT_SOFTW2 /* flag used to indicate IO mapping */
#define _PAGE_BIT_HIDDEN _PAGE_BIT_SOFTW3 /* hidden by kmemcheck */
#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
@@ -52,7 +51,7 @@
#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
#define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
-#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
+#define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2)
#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
@@ -168,10 +167,10 @@
#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE)
#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
-#define __PAGE_KERNEL_IO (__PAGE_KERNEL | _PAGE_IOMAP)
-#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP)
-#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP)
-#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC | _PAGE_IOMAP)
+#define __PAGE_KERNEL_IO (__PAGE_KERNEL)
+#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE)
+#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS)
+#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC)
#define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a24194681513..83bb03bfa259 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -933,8 +933,17 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
* cross-processor TLB flush, even if no stale TLB entries exist
* on other processors.
*
+ * Spurious faults may only occur if the TLB contains an entry with
+ * fewer permission than the page table entry. Non-present (P = 0)
+ * and reserved bit (R = 1) faults are never spurious.
+ *
* There are no security implications to leaving a stale TLB when
* increasing the permissions on a page.
+ *
+ * Returns non-zero if a spurious fault was handled, zero otherwise.
+ *
+ * See Intel Developer's Manual Vol 3 Section 4.10.4.3, bullet 3
+ * (Optional Invalidation).
*/
static noinline int
spurious_fault(unsigned long error_code, unsigned long address)
@@ -945,8 +954,17 @@ spurious_fault(unsigned long error_code, unsigned long address)
pte_t *pte;
int ret;
- /* Reserved-bit violation or user access to kernel space? */
- if (error_code & (PF_USER | PF_RSVD))
+ /*
+ * Only writes to RO or instruction fetches from NX may cause
+ * spurious faults.
+ *
+ * These could be from user or supervisor accesses but the TLB
+ * is only lazily flushed after a kernel mapping protection
+ * change, so user accesses are not expected to cause spurious
+ * faults.
+ */
+ if (error_code != (PF_WRITE | PF_PROT)
+ && error_code != (PF_INSTR | PF_PROT))
return 0;
pgd = init_mm.pgd + pgd_index(address);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 7d05565ba781..c8140e12816a 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -537,7 +537,7 @@ static void __init pagetable_init(void)
permanent_kmaps_init(pgd_base);
}
-pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP);
+pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL);
EXPORT_SYMBOL_GPL(__supported_pte_mask);
/* user-defined highmem size */
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 5621c47d7a1a..5d984769cbd8 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -151,7 +151,7 @@ early_param("gbpages", parse_direct_gbpages_on);
* around without checking the pgd every time.
*/
-pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP;
+pteval_t __supported_pte_mask __read_mostly = ~0;
EXPORT_SYMBOL_GPL(__supported_pte_mask);
int force_personality32;
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 2ae525e0d8ba..37c1435889ce 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -442,8 +442,6 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
*/
prot |= _PAGE_CACHE_UC_MINUS;
- prot |= _PAGE_IOMAP; /* creating a mapping for IO */
-
vma->vm_page_prot = __pgprot(prot);
if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index a02e09e18f57..be14cc3e48d5 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -15,12 +15,14 @@
* with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/bitops.h>
#include <linux/efi.h>
#include <linux/init.h>
#include <linux/string.h>
#include <xen/xen-ops.h>
+#include <asm/page.h>
#include <asm/setup.h>
void __init xen_efi_init(void)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c0cb11fb5008..acb0effd8077 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1463,6 +1463,7 @@ static void __ref xen_setup_gdt(int cpu)
pv_cpu_ops.load_gdt = xen_load_gdt;
}
+#ifdef CONFIG_XEN_PVH
/*
* A PV guest starts with default flags that are not set for PVH, set them
* here asap.
@@ -1508,17 +1509,21 @@ static void __init xen_pvh_early_guest_init(void)
return;
xen_have_vector_callback = 1;
+
+ xen_pvh_early_cpu_init(0, false);
xen_pvh_set_cr_flags(0);
#ifdef CONFIG_X86_32
BUG(); /* PVH: Implement proper support. */
#endif
}
+#endif /* CONFIG_XEN_PVH */
/* First C function to be called on Xen boot */
asmlinkage __visible void __init xen_start_kernel(void)
{
struct physdev_set_iopl set_iopl;
+ unsigned long initrd_start = 0;
int rc;
if (!xen_start_info)
@@ -1527,7 +1532,9 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_domain_type = XEN_PV_DOMAIN;
xen_setup_features();
+#ifdef CONFIG_XEN_PVH
xen_pvh_early_guest_init();
+#endif
xen_setup_machphys_mapping();
/* Install Xen paravirt ops */
@@ -1559,8 +1566,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
#endif
__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
- __supported_pte_mask |= _PAGE_IOMAP;
-
/*
* Prevent page tables from being allocated in highmem, even
* if CONFIG_HIGHPTE is enabled.
@@ -1667,10 +1672,16 @@ asmlinkage __visible void __init xen_start_kernel(void)
new_cpu_data.x86_capability[0] = cpuid_edx(1);
#endif
+ if (xen_start_info->mod_start) {
+ if (xen_start_info->flags & SIF_MOD_START_PFN)
+ initrd_start = PFN_PHYS(xen_start_info->mod_start);
+ else
+ initrd_start = __pa(xen_start_info->mod_start);
+ }
+
/* Poke various useful things into boot_params */
boot_params.hdr.type_of_loader = (9 << 4) | 0;
- boot_params.hdr.ramdisk_image = xen_start_info->mod_start
- ? __pa(xen_start_info->mod_start) : 0;
+ boot_params.hdr.ramdisk_image = initrd_start;
boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 16fb0099b7f2..f62af7647ec9 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -399,38 +399,14 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
if (unlikely(mfn == INVALID_P2M_ENTRY)) {
mfn = 0;
flags = 0;
- } else {
- /*
- * Paramount to do this test _after_ the
- * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY &
- * IDENTITY_FRAME_BIT resolves to true.
- */
- mfn &= ~FOREIGN_FRAME_BIT;
- if (mfn & IDENTITY_FRAME_BIT) {
- mfn &= ~IDENTITY_FRAME_BIT;
- flags |= _PAGE_IOMAP;
- }
- }
+ } else
+ mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
}
return val;
}
-static pteval_t iomap_pte(pteval_t val)
-{
- if (val & _PAGE_PRESENT) {
- unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
- pteval_t flags = val & PTE_FLAGS_MASK;
-
- /* We assume the pte frame number is a MFN, so
- just use it as-is. */
- val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
- }
-
- return val;
-}
-
__visible pteval_t xen_pte_val(pte_t pte)
{
pteval_t pteval = pte.pte;
@@ -441,9 +417,6 @@ __visible pteval_t xen_pte_val(pte_t pte)
pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT;
}
#endif
- if (xen_initial_domain() && (pteval & _PAGE_IOMAP))
- return pteval;
-
return pte_mfn_to_pfn(pteval);
}
PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
@@ -481,7 +454,6 @@ void xen_set_pat(u64 pat)
__visible pte_t xen_make_pte(pteval_t pte)
{
- phys_addr_t addr = (pte & PTE_PFN_MASK);
#if 0
/* If Linux is trying to set a WC pte, then map to the Xen WC.
* If _PAGE_PAT is set, then it probably means it is really
@@ -496,19 +468,7 @@ __visible pte_t xen_make_pte(pteval_t pte)
pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT;
}
#endif
- /*
- * Unprivileged domains are allowed to do IOMAPpings for
- * PCI passthrough, but not map ISA space. The ISA
- * mappings are just dummy local mappings to keep other
- * parts of the kernel happy.
- */
- if (unlikely(pte & _PAGE_IOMAP) &&
- (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
- pte = iomap_pte(pte);
- } else {
- pte &= ~_PAGE_IOMAP;
- pte = pte_pfn_to_mfn(pte);
- }
+ pte = pte_pfn_to_mfn(pte);
return native_make_pte(pte);
}
@@ -2091,7 +2051,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
default:
/* By default, set_fixmap is used for hardware mappings */
- pte = mfn_pte(phys, __pgprot(pgprot_val(prot) | _PAGE_IOMAP));
+ pte = mfn_pte(phys, prot);
break;
}
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 3172692381ae..9f5983b01ed9 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -173,6 +173,7 @@
#include <xen/balloon.h>
#include <xen/grant_table.h>
+#include "p2m.h"
#include "multicalls.h"
#include "xen-ops.h"
@@ -180,12 +181,6 @@ static void __init m2p_override_init(void);
unsigned long xen_max_p2m_pfn __read_mostly;
-#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
-#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *))
-#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **))
-
-#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
-
/* Placeholders for holes in the address space */
static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
@@ -202,16 +197,12 @@ static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_identity_mfn, P2M_MID_PER_PAGE);
RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
-/* We might hit two boundary violations at the start and end, at max each
- * boundary violation will require three middle nodes. */
-RESERVE_BRK(p2m_mid_extra, PAGE_SIZE * 2 * 3);
-
-/* When we populate back during bootup, the amount of pages can vary. The
- * max we have is seen is 395979, but that does not mean it can't be more.
- * Some machines can have 3GB I/O holes even. With early_can_reuse_p2m_middle
- * it can re-use Xen provided mfn_list array, so we only need to allocate at
- * most three P2M top nodes. */
-RESERVE_BRK(p2m_populated, PAGE_SIZE * 3);
+/* For each I/O range remapped we may lose up to two leaf pages for the boundary
+ * violations and three mid pages to cover up to 3GB. With
+ * early_can_reuse_p2m_middle() most of the leaf pages will be reused by the
+ * remapped region.
+ */
+RESERVE_BRK(p2m_identity_remap, PAGE_SIZE * 2 * 3 * MAX_REMAP_RANGES);
static inline unsigned p2m_top_index(unsigned long pfn)
{
diff --git a/arch/x86/xen/p2m.h b/arch/x86/xen/p2m.h
new file mode 100644
index 000000000000..ad8aee24ab72
--- /dev/null
+++ b/arch/x86/xen/p2m.h
@@ -0,0 +1,15 @@
+#ifndef _XEN_P2M_H
+#define _XEN_P2M_H
+
+#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
+#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *))
+#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **))
+
+#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
+
+#define MAX_REMAP_RANGES 10
+
+extern unsigned long __init set_phys_range_identity(unsigned long pfn_s,
+ unsigned long pfn_e);
+
+#endif /* _XEN_P2M_H */
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 2e555163c2fe..af7216128d93 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -29,6 +29,7 @@
#include <xen/features.h>
#include "xen-ops.h"
#include "vdso.h"
+#include "p2m.h"
/* These are code, but not functions. Defined in entry.S */
extern const char xen_hypervisor_callback[];
@@ -46,6 +47,9 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
/* Number of pages released from the initial allocation. */
unsigned long xen_released_pages;
+/* Buffer used to remap identity mapped pages */
+unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata;
+
/*
* The maximum amount of extra memory compared to the base size. The
* main scaling factor is the size of struct page. At extreme ratios
@@ -151,107 +155,325 @@ static unsigned long __init xen_do_chunk(unsigned long start,
return len;
}
-static unsigned long __init xen_release_chunk(unsigned long start,
- unsigned long end)
-{
- return xen_do_chunk(start, end, true);
-}
-
-static unsigned long __init xen_populate_chunk(
+/*
+ * Finds the next RAM pfn available in the E820 map after min_pfn.
+ * This function updates min_pfn with the pfn found and returns
+ * the size of that range or zero if not found.
+ */
+static unsigned long __init xen_find_pfn_range(
const struct e820entry *list, size_t map_size,
- unsigned long max_pfn, unsigned long *last_pfn,
- unsigned long credits_left)
+ unsigned long *min_pfn)
{
const struct e820entry *entry;
unsigned int i;
unsigned long done = 0;
- unsigned long dest_pfn;
for (i = 0, entry = list; i < map_size; i++, entry++) {
unsigned long s_pfn;
unsigned long e_pfn;
- unsigned long pfns;
- long capacity;
-
- if (credits_left <= 0)
- break;
if (entry->type != E820_RAM)
continue;
e_pfn = PFN_DOWN(entry->addr + entry->size);
- /* We only care about E820 after the xen_start_info->nr_pages */
- if (e_pfn <= max_pfn)
+ /* We only care about E820 after this */
+ if (e_pfn < *min_pfn)
continue;
s_pfn = PFN_UP(entry->addr);
- /* If the E820 falls within the nr_pages, we want to start
- * at the nr_pages PFN.
- * If that would mean going past the E820 entry, skip it
+
+ /* If min_pfn falls within the E820 entry, we want to start
+ * at the min_pfn PFN.
*/
- if (s_pfn <= max_pfn) {
- capacity = e_pfn - max_pfn;
- dest_pfn = max_pfn;
+ if (s_pfn <= *min_pfn) {
+ done = e_pfn - *min_pfn;
} else {
- capacity = e_pfn - s_pfn;
- dest_pfn = s_pfn;
+ done = e_pfn - s_pfn;
+ *min_pfn = s_pfn;
}
+ break;
+ }
- if (credits_left < capacity)
- capacity = credits_left;
+ return done;
+}
- pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false);
- done += pfns;
- *last_pfn = (dest_pfn + pfns);
- if (pfns < capacity)
- break;
- credits_left -= pfns;
+/*
+ * This releases a chunk of memory and then does the identity map. It's used as
+ * as a fallback if the remapping fails.
+ */
+static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
+ unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity,
+ unsigned long *released)
+{
+ WARN_ON(start_pfn > end_pfn);
+
+ /* Need to release pages first */
+ *released += xen_do_chunk(start_pfn, min(end_pfn, nr_pages), true);
+ *identity += set_phys_range_identity(start_pfn, end_pfn);
+}
+
+/*
+ * Helper function to update both the p2m and m2p tables.
+ */
+static unsigned long __init xen_update_mem_tables(unsigned long pfn,
+ unsigned long mfn)
+{
+ struct mmu_update update = {
+ .ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
+ .val = pfn
+ };
+
+ /* Update p2m */
+ if (!early_set_phys_to_machine(pfn, mfn)) {
+ WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n",
+ pfn, mfn);
+ return false;
}
- return done;
+
+ /* Update m2p */
+ if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) {
+ WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n",
+ mfn, pfn);
+ return false;
+ }
+
+ return true;
}
-static void __init xen_set_identity_and_release_chunk(
- unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
- unsigned long *released, unsigned long *identity)
+/*
+ * This function updates the p2m and m2p tables with an identity map from
+ * start_pfn to start_pfn+size and remaps the underlying RAM of the original
+ * allocation at remap_pfn. It must do so carefully in P2M_PER_PAGE sized blocks
+ * to not exhaust the reserved brk space. Doing it in properly aligned blocks
+ * ensures we only allocate the minimum required leaf pages in the p2m table. It
+ * copies the existing mfns from the p2m table under the 1:1 map, overwrites
+ * them with the identity map and then updates the p2m and m2p tables with the
+ * remapped memory.
+ */
+static unsigned long __init xen_do_set_identity_and_remap_chunk(
+ unsigned long start_pfn, unsigned long size, unsigned long remap_pfn)
{
- unsigned long pfn;
+ unsigned long ident_pfn_iter, remap_pfn_iter;
+ unsigned long ident_start_pfn_align, remap_start_pfn_align;
+ unsigned long ident_end_pfn_align, remap_end_pfn_align;
+ unsigned long ident_boundary_pfn, remap_boundary_pfn;
+ unsigned long ident_cnt = 0;
+ unsigned long remap_cnt = 0;
+ unsigned long left = size;
+ unsigned long mod;
+ int i;
+
+ WARN_ON(size == 0);
+
+ BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
/*
- * If the PFNs are currently mapped, clear the mappings
- * (except for the ISA region which must be 1:1 mapped) to
- * release the refcounts (in Xen) on the original frames.
+ * Determine the proper alignment to remap memory in P2M_PER_PAGE sized
+ * blocks. We need to keep track of both the existing pfn mapping and
+ * the new pfn remapping.
*/
- for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) {
- pte_t pte = __pte_ma(0);
+ mod = start_pfn % P2M_PER_PAGE;
+ ident_start_pfn_align =
+ mod ? (start_pfn - mod + P2M_PER_PAGE) : start_pfn;
+ mod = remap_pfn % P2M_PER_PAGE;
+ remap_start_pfn_align =
+ mod ? (remap_pfn - mod + P2M_PER_PAGE) : remap_pfn;
+ mod = (start_pfn + size) % P2M_PER_PAGE;
+ ident_end_pfn_align = start_pfn + size - mod;
+ mod = (remap_pfn + size) % P2M_PER_PAGE;
+ remap_end_pfn_align = remap_pfn + size - mod;
+
+ /* Iterate over each p2m leaf node in each range */
+ for (ident_pfn_iter = ident_start_pfn_align, remap_pfn_iter = remap_start_pfn_align;
+ ident_pfn_iter < ident_end_pfn_align && remap_pfn_iter < remap_end_pfn_align;
+ ident_pfn_iter += P2M_PER_PAGE, remap_pfn_iter += P2M_PER_PAGE) {
+ /* Check we aren't past the end */
+ BUG_ON(ident_pfn_iter + P2M_PER_PAGE > start_pfn + size);
+ BUG_ON(remap_pfn_iter + P2M_PER_PAGE > remap_pfn + size);
+
+ /* Save p2m mappings */
+ for (i = 0; i < P2M_PER_PAGE; i++)
+ xen_remap_buf[i] = pfn_to_mfn(ident_pfn_iter + i);
+
+ /* Set identity map which will free a p2m leaf */
+ ident_cnt += set_phys_range_identity(ident_pfn_iter,
+ ident_pfn_iter + P2M_PER_PAGE);
+
+#ifdef DEBUG
+ /* Helps verify a p2m leaf has been freed */
+ for (i = 0; i < P2M_PER_PAGE; i++) {
+ unsigned int pfn = ident_pfn_iter + i;
+ BUG_ON(pfn_to_mfn(pfn) != pfn);
+ }
+#endif
+ /* Now remap memory */
+ for (i = 0; i < P2M_PER_PAGE; i++) {
+ unsigned long mfn = xen_remap_buf[i];
+
+ /* This will use the p2m leaf freed above */
+ if (!xen_update_mem_tables(remap_pfn_iter + i, mfn)) {
+ WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n",
+ remap_pfn_iter + i, mfn);
+ return 0;
+ }
+
+ remap_cnt++;
+ }
- if (pfn < PFN_UP(ISA_END_ADDRESS))
- pte = mfn_pte(pfn, PAGE_KERNEL_IO);
+ left -= P2M_PER_PAGE;
+ }
- (void)HYPERVISOR_update_va_mapping(
- (unsigned long)__va(pfn << PAGE_SHIFT), pte, 0);
+ /* Max boundary space possible */
+ BUG_ON(left > (P2M_PER_PAGE - 1) * 2);
+
+ /* Now handle the boundary conditions */
+ ident_boundary_pfn = start_pfn;
+ remap_boundary_pfn = remap_pfn;
+ for (i = 0; i < left; i++) {
+ unsigned long mfn;
+
+ /* These two checks move from the start to end boundaries */
+ if (ident_boundary_pfn == ident_start_pfn_align)
+ ident_boundary_pfn = ident_pfn_iter;
+ if (remap_boundary_pfn == remap_start_pfn_align)
+ remap_boundary_pfn = remap_pfn_iter;
+
+ /* Check we aren't past the end */
+ BUG_ON(ident_boundary_pfn >= start_pfn + size);
+ BUG_ON(remap_boundary_pfn >= remap_pfn + size);
+
+ mfn = pfn_to_mfn(ident_boundary_pfn);
+
+ if (!xen_update_mem_tables(remap_boundary_pfn, mfn)) {
+ WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n",
+ remap_pfn_iter + i, mfn);
+ return 0;
+ }
+ remap_cnt++;
+
+ ident_boundary_pfn++;
+ remap_boundary_pfn++;
}
- if (start_pfn < nr_pages)
- *released += xen_release_chunk(
- start_pfn, min(end_pfn, nr_pages));
+ /* Finish up the identity map */
+ if (ident_start_pfn_align >= ident_end_pfn_align) {
+ /*
+ * In this case we have an identity range which does not span an
+ * aligned block so everything needs to be identity mapped here.
+ * If we didn't check this we might remap too many pages since
+ * the align boundaries are not meaningful in this case.
+ */
+ ident_cnt += set_phys_range_identity(start_pfn,
+ start_pfn + size);
+ } else {
+ /* Remapped above so check each end of the chunk */
+ if (start_pfn < ident_start_pfn_align)
+ ident_cnt += set_phys_range_identity(start_pfn,
+ ident_start_pfn_align);
+ if (start_pfn + size > ident_pfn_iter)
+ ident_cnt += set_phys_range_identity(ident_pfn_iter,
+ start_pfn + size);
+ }
- *identity += set_phys_range_identity(start_pfn, end_pfn);
+ BUG_ON(ident_cnt != size);
+ BUG_ON(remap_cnt != size);
+
+ return size;
}
-static unsigned long __init xen_set_identity_and_release(
- const struct e820entry *list, size_t map_size, unsigned long nr_pages)
+/*
+ * This function takes a contiguous pfn range that needs to be identity mapped
+ * and:
+ *
+ * 1) Finds a new range of pfns to use to remap based on E820 and remap_pfn.
+ * 2) Calls the do_ function to actually do the mapping/remapping work.
+ *
+ * The goal is to not allocate additional memory but to remap the existing
+ * pages. In the case of an error the underlying memory is simply released back
+ * to Xen and not remapped.
+ */
+static unsigned long __init xen_set_identity_and_remap_chunk(
+ const struct e820entry *list, size_t map_size, unsigned long start_pfn,
+ unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn,
+ unsigned long *identity, unsigned long *remapped,
+ unsigned long *released)
+{
+ unsigned long pfn;
+ unsigned long i = 0;
+ unsigned long n = end_pfn - start_pfn;
+
+ while (i < n) {
+ unsigned long cur_pfn = start_pfn + i;
+ unsigned long left = n - i;
+ unsigned long size = left;
+ unsigned long remap_range_size;
+
+ /* Do not remap pages beyond the current allocation */
+ if (cur_pfn >= nr_pages) {
+ /* Identity map remaining pages */
+ *identity += set_phys_range_identity(cur_pfn,
+ cur_pfn + size);
+ break;
+ }
+ if (cur_pfn + size > nr_pages)
+ size = nr_pages - cur_pfn;
+
+ remap_range_size = xen_find_pfn_range(list, map_size,
+ &remap_pfn);
+ if (!remap_range_size) {
+ pr_warning("Unable to find available pfn range, not remapping identity pages\n");
+ xen_set_identity_and_release_chunk(cur_pfn,
+ cur_pfn + left, nr_pages, identity, released);
+ break;
+ }
+ /* Adjust size to fit in current e820 RAM region */
+ if (size > remap_range_size)
+ size = remap_range_size;
+
+ if (!xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn)) {
+ WARN(1, "Failed to remap 1:1 memory cur_pfn=%ld size=%ld remap_pfn=%ld\n",
+ cur_pfn, size, remap_pfn);
+ xen_set_identity_and_release_chunk(cur_pfn,
+ cur_pfn + left, nr_pages, identity, released);
+ break;
+ }
+
+ /* Update variables to reflect new mappings. */
+ i += size;
+ remap_pfn += size;
+ *identity += size;
+ *remapped += size;
+ }
+
+ /*
+ * If the PFNs are currently mapped, the VA mapping also needs
+ * to be updated to be 1:1.
+ */
+ for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++)
+ (void)HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(pfn << PAGE_SHIFT),
+ mfn_pte(pfn, PAGE_KERNEL_IO), 0);
+
+ return remap_pfn;
+}
+
+static unsigned long __init xen_set_identity_and_remap(
+ const struct e820entry *list, size_t map_size, unsigned long nr_pages,
+ unsigned long *released)
{
phys_addr_t start = 0;
- unsigned long released = 0;
unsigned long identity = 0;
+ unsigned long remapped = 0;
+ unsigned long last_pfn = nr_pages;
const struct e820entry *entry;
+ unsigned long num_released = 0;
int i;
/*
* Combine non-RAM regions and gaps until a RAM region (or the
* end of the map) is reached, then set the 1:1 map and
- * release the pages (if available) in those non-RAM regions.
+ * remap the memory in those non-RAM regions.
*
* The combined non-RAM regions are rounded to a whole number
* of pages so any partial pages are accessible via the 1:1
@@ -269,22 +491,24 @@ static unsigned long __init xen_set_identity_and_release(
end_pfn = PFN_UP(entry->addr);
if (start_pfn < end_pfn)
- xen_set_identity_and_release_chunk(
- start_pfn, end_pfn, nr_pages,
- &released, &identity);
-
+ last_pfn = xen_set_identity_and_remap_chunk(
+ list, map_size, start_pfn,
+ end_pfn, nr_pages, last_pfn,
+ &identity, &remapped,
+ &num_released);
start = end;
}
}
- if (released)
- printk(KERN_INFO "Released %lu pages of unused memory\n", released);
- if (identity)
- printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity);
+ *released = num_released;
- return released;
-}
+ pr_info("Set %ld page(s) to 1-1 mapping\n", identity);
+ pr_info("Remapped %ld page(s), last_pfn=%ld\n", remapped,
+ last_pfn);
+ pr_info("Released %ld page(s)\n", num_released);
+ return last_pfn;
+}
static unsigned long __init xen_get_max_pages(void)
{
unsigned long max_pages = MAX_DOMAIN_PAGES;
@@ -347,7 +571,6 @@ char * __init xen_memory_setup(void)
unsigned long max_pages;
unsigned long last_pfn = 0;
unsigned long extra_pages = 0;
- unsigned long populated;
int i;
int op;
@@ -392,20 +615,11 @@ char * __init xen_memory_setup(void)
extra_pages += max_pages - max_pfn;
/*
- * Set P2M for all non-RAM pages and E820 gaps to be identity
- * type PFNs. Any RAM pages that would be made inaccesible by
- * this are first released.
+ * Set identity map on non-RAM pages and remap the underlying RAM.
*/
- xen_released_pages = xen_set_identity_and_release(
- map, memmap.nr_entries, max_pfn);
-
- /*
- * Populate back the non-RAM pages and E820 gaps that had been
- * released. */
- populated = xen_populate_chunk(map, memmap.nr_entries,
- max_pfn, &last_pfn, xen_released_pages);
+ last_pfn = xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn,
+ &xen_released_pages);
- xen_released_pages -= populated;
extra_pages += xen_released_pages;
if (last_pfn > max_pfn) {
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 7005974c3ff3..c670d7518cf4 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -37,6 +37,7 @@
#include <xen/hvc-console.h>
#include "xen-ops.h"
#include "mmu.h"
+#include "smp.h"
cpumask_var_t xen_cpu_initialized_map;
@@ -99,10 +100,14 @@ static void cpu_bringup(void)
wmb(); /* make sure everything is out */
}
-/* Note: cpu parameter is only relevant for PVH */
-static void cpu_bringup_and_idle(int cpu)
+/*
+ * Note: cpu parameter is only relevant for PVH. The reason for passing it
+ * is we can't do smp_processor_id until the percpu segments are loaded, for
+ * which we need the cpu number! So we pass it in rdi as first parameter.
+ */
+asmlinkage __visible void cpu_bringup_and_idle(int cpu)
{
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_XEN_PVH
if (xen_feature(XENFEAT_auto_translated_physmap) &&
xen_feature(XENFEAT_supervisor_mode_kernel))
xen_pvh_secondary_vcpu_init(cpu);
@@ -374,11 +379,10 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
ctxt->user_regs.fs = __KERNEL_PERCPU;
ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
#endif
- ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
-
memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
ctxt->flags = VGCF_IN_KERNEL;
ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
ctxt->user_regs.ds = __USER_DS;
@@ -413,15 +417,18 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
(unsigned long)xen_failsafe_callback;
ctxt->user_regs.cs = __KERNEL_CS;
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
-#ifdef CONFIG_X86_32
}
-#else
- } else
- /* N.B. The user_regs.eip (cpu_bringup_and_idle) is called with
- * %rdi having the cpu number - which means are passing in
- * as the first parameter the cpu. Subtle!
+#ifdef CONFIG_XEN_PVH
+ else {
+ /*
+ * The vcpu comes on kernel page tables which have the NX pte
+ * bit set. This means before DS/SS is touched, NX in
+ * EFER must be set. Hence the following assembly glue code.
*/
+ ctxt->user_regs.eip = (unsigned long)xen_pvh_early_cpu_init;
ctxt->user_regs.rdi = cpu;
+ ctxt->user_regs.rsi = true; /* entry == true */
+ }
#endif
ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h
index c7c2d89efd76..963d62a35c82 100644
--- a/arch/x86/xen/smp.h
+++ b/arch/x86/xen/smp.h
@@ -8,4 +8,12 @@ extern void xen_send_IPI_allbutself(int vector);
extern void xen_send_IPI_all(int vector);
extern void xen_send_IPI_self(int vector);
+#ifdef CONFIG_XEN_PVH
+extern void xen_pvh_early_cpu_init(int cpu, bool entry);
+#else
+static inline void xen_pvh_early_cpu_init(int cpu, bool entry)
+{
+}
+#endif
+
#endif
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 485b69585540..674b222544b7 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -47,6 +47,41 @@ ENTRY(startup_xen)
__FINIT
+#ifdef CONFIG_XEN_PVH
+/*
+ * xen_pvh_early_cpu_init() - early PVH VCPU initialization
+ * @cpu: this cpu number (%rdi)
+ * @entry: true if this is a secondary vcpu coming up on this entry
+ * point, false if this is the boot CPU being initialized for
+ * the first time (%rsi)
+ *
+ * Note: This is called as a function on the boot CPU, and is the entry point
+ * on the secondary CPU.
+ */
+ENTRY(xen_pvh_early_cpu_init)
+ mov %rsi, %r11
+
+ /* Gather features to see if NX implemented. */
+ mov $0x80000001, %eax
+ cpuid
+ mov %edx, %esi
+
+ mov $MSR_EFER, %ecx
+ rdmsr
+ bts $_EFER_SCE, %eax
+
+ bt $20, %esi
+ jnc 1f /* No NX, skip setting it */
+ bts $_EFER_NX, %eax
+1: wrmsr
+#ifdef CONFIG_SMP
+ cmp $0, %r11b
+ jne cpu_bringup_and_idle
+#endif
+ ret
+
+#endif /* CONFIG_XEN_PVH */
+
.pushsection .text
.balign PAGE_SIZE
ENTRY(hypercall_page)
@@ -124,6 +159,7 @@ NEXT_HYPERCALL(arch_6)
ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
.quad _PAGE_PRESENT; .quad _PAGE_PRESENT)
ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
+ ELFNOTE(Xen, XEN_ELFNOTE_MOD_START_PFN, .long 1)
ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, _ASM_PTR __HYPERVISOR_VIRT_START)
ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, _ASM_PTR 0)
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 3a8b810b4980..0b13b1c9a01e 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -907,22 +907,17 @@ static int connect_ring(struct backend_info *be)
return 0;
}
-
-/* ** Driver Registration ** */
-
-
static const struct xenbus_device_id xen_blkbk_ids[] = {
{ "vbd" },
{ "" }
};
-
-static DEFINE_XENBUS_DRIVER(xen_blkbk, ,
+static struct xenbus_driver xen_blkbk_driver = {
+ .ids = xen_blkbk_ids,
.probe = xen_blkbk_probe,
.remove = xen_blkbk_remove,
.otherend_changed = frontend_changed
-);
-
+};
int xen_blkif_xenbus_init(void)
{
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 5deb235bd18f..37af03e9d859 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2055,13 +2055,14 @@ static const struct xenbus_device_id blkfront_ids[] = {
{ "" }
};
-static DEFINE_XENBUS_DRIVER(blkfront, ,
+static struct xenbus_driver blkfront_driver = {
+ .ids = blkfront_ids,
.probe = blkfront_probe,
.remove = blkfront_remove,
.resume = blkfront_resume,
.otherend_changed = blkback_changed,
.is_ready = blkfront_is_ready,
-);
+};
static int __init xlblk_init(void)
{
diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c
index 2064b4527040..441b44e54226 100644
--- a/drivers/char/tpm/xen-tpmfront.c
+++ b/drivers/char/tpm/xen-tpmfront.c
@@ -367,12 +367,13 @@ static const struct xenbus_device_id tpmfront_ids[] = {
};
MODULE_ALIAS("xen:vtpm");
-static DEFINE_XENBUS_DRIVER(tpmfront, ,
- .probe = tpmfront_probe,
- .remove = tpmfront_remove,
- .resume = tpmfront_resume,
- .otherend_changed = backend_changed,
- );
+static struct xenbus_driver tpmfront_driver = {
+ .ids = tpmfront_ids,
+ .probe = tpmfront_probe,
+ .remove = tpmfront_remove,
+ .resume = tpmfront_resume,
+ .otherend_changed = backend_changed,
+};
static int __init xen_tpmfront_init(void)
{
diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c
index fbfdc10573be..1af28b06c713 100644
--- a/drivers/input/misc/xen-kbdfront.c
+++ b/drivers/input/misc/xen-kbdfront.c
@@ -365,12 +365,13 @@ static const struct xenbus_device_id xenkbd_ids[] = {
{ "" }
};
-static DEFINE_XENBUS_DRIVER(xenkbd, ,
+static struct xenbus_driver xenkbd_driver = {
+ .ids = xenkbd_ids,
.probe = xenkbd_probe,
.remove = xenkbd_remove,
.resume = xenkbd_resume,
.otherend_changed = xenkbd_backend_changed,
-);
+};
static int __init xenkbd_init(void)
{
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 9c47b897b6d2..8079c31ac5e6 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -937,22 +937,18 @@ static int read_xenbus_vif_flags(struct backend_info *be)
return 0;
}
-
-/* ** Driver Registration ** */
-
-
static const struct xenbus_device_id netback_ids[] = {
{ "vif" },
{ "" }
};
-
-static DEFINE_XENBUS_DRIVER(netback, ,
+static struct xenbus_driver netback_driver = {
+ .ids = netback_ids,
.probe = netback_probe,
.remove = netback_remove,
.uevent = netback_uevent,
.otherend_changed = frontend_changed,
-);
+};
int xenvif_xenbus_init(void)
{
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index ca82f545ec2c..fa671442f420 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -2300,12 +2300,6 @@ static void xennet_sysfs_delif(struct net_device *netdev)
#endif /* CONFIG_SYSFS */
-static const struct xenbus_device_id netfront_ids[] = {
- { "vif" },
- { "" }
-};
-
-
static int xennet_remove(struct xenbus_device *dev)
{
struct netfront_info *info = dev_get_drvdata(&dev->dev);
@@ -2338,12 +2332,18 @@ static int xennet_remove(struct xenbus_device *dev)
return 0;
}
-static DEFINE_XENBUS_DRIVER(netfront, ,
+static const struct xenbus_device_id netfront_ids[] = {
+ { "vif" },
+ { "" }
+};
+
+static struct xenbus_driver netfront_driver = {
+ .ids = netfront_ids,
.probe = netfront_probe,
.remove = xennet_remove,
.resume = netfront_resume,
.otherend_changed = netback_changed,
-);
+};
static int __init netif_init(void)
{
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 53df39a22c8a..116ca3746adb 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -1136,11 +1136,13 @@ static const struct xenbus_device_id xenpci_ids[] = {
{""},
};
-static DEFINE_XENBUS_DRIVER(xenpci, "pcifront",
+static struct xenbus_driver xenpci_driver = {
+ .name = "pcifront",
+ .ids = xenpci_ids,
.probe = pcifront_xenbus_probe,
.remove = pcifront_xenbus_remove,
.otherend_changed = pcifront_backend_changed,
-);
+};
static int __init pcifront_init(void)
{
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index e85e64a07d02..296619b7426c 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -587,6 +587,16 @@ config VMWARE_PVSCSI
To compile this driver as a module, choose M here: the
module will be called vmw_pvscsi.
+config XEN_SCSI_FRONTEND
+ tristate "XEN SCSI frontend driver"
+ depends on SCSI && XEN
+ select XEN_XENBUS_FRONTEND
+ help
+ The XEN SCSI frontend driver allows the kernel to access SCSI Devices
+ within another guest OS (usually Dom0).
+ Only needed if the kernel is running in a XEN guest and generic
+ SCSI access to a device is needed.
+
config HYPERV_STORAGE
tristate "Microsoft Hyper-V virtual storage driver"
depends on SCSI && HYPERV
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 5f0d299b0093..59f1ce6df2d6 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -141,6 +141,7 @@ obj-$(CONFIG_SCSI_ESAS2R) += esas2r/
obj-$(CONFIG_SCSI_PMCRAID) += pmcraid.o
obj-$(CONFIG_SCSI_VIRTIO) += virtio_scsi.o
obj-$(CONFIG_VMWARE_PVSCSI) += vmw_pvscsi.o
+obj-$(CONFIG_XEN_SCSI_FRONTEND) += xen-scsifront.o
obj-$(CONFIG_HYPERV_STORAGE) += hv_storvsc.o
obj-$(CONFIG_ARM) += arm/
diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c
new file mode 100644
index 000000000000..34199d206ba6
--- /dev/null
+++ b/drivers/scsi/xen-scsifront.c
@@ -0,0 +1,1026 @@
+/*
+ * Xen SCSI frontend driver
+ *
+ * Copyright (c) 2008, FUJITSU Limited
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/blkdev.h>
+#include <linux/pfn.h>
+#include <linux/slab.h>
+#include <linux/bitops.h>
+
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+
+#include <xen/xen.h>
+#include <xen/xenbus.h>
+#include <xen/grant_table.h>
+#include <xen/events.h>
+#include <xen/page.h>
+
+#include <xen/interface/grant_table.h>
+#include <xen/interface/io/vscsiif.h>
+#include <xen/interface/io/protocols.h>
+
+#include <asm/xen/hypervisor.h>
+
+
+#define GRANT_INVALID_REF 0
+
+#define VSCSIFRONT_OP_ADD_LUN 1
+#define VSCSIFRONT_OP_DEL_LUN 2
+
+/* Tuning point. */
+#define VSCSIIF_DEFAULT_CMD_PER_LUN 10
+#define VSCSIIF_MAX_TARGET 64
+#define VSCSIIF_MAX_LUN 255
+
+#define VSCSIIF_RING_SIZE __CONST_RING_SIZE(vscsiif, PAGE_SIZE)
+#define VSCSIIF_MAX_REQS VSCSIIF_RING_SIZE
+
+#define vscsiif_grants_sg(_sg) (PFN_UP((_sg) * \
+ sizeof(struct scsiif_request_segment)))
+
+struct vscsifrnt_shadow {
+ /* command between backend and frontend */
+ unsigned char act;
+ uint16_t rqid;
+
+ unsigned int nr_grants; /* number of grants in gref[] */
+ struct scsiif_request_segment *sg; /* scatter/gather elements */
+
+ /* Do reset or abort function. */
+ wait_queue_head_t wq_reset; /* reset work queue */
+ int wait_reset; /* reset work queue condition */
+ int32_t rslt_reset; /* reset response status: */
+ /* SUCCESS or FAILED or: */
+#define RSLT_RESET_WAITING 0
+#define RSLT_RESET_ERR -1
+
+ /* Requested struct scsi_cmnd is stored from kernel. */
+ struct scsi_cmnd *sc;
+ int gref[vscsiif_grants_sg(SG_ALL) + SG_ALL];
+};
+
+struct vscsifrnt_info {
+ struct xenbus_device *dev;
+
+ struct Scsi_Host *host;
+ int host_active;
+
+ unsigned int evtchn;
+ unsigned int irq;
+
+ grant_ref_t ring_ref;
+ struct vscsiif_front_ring ring;
+ struct vscsiif_response ring_rsp;
+
+ spinlock_t shadow_lock;
+ DECLARE_BITMAP(shadow_free_bitmap, VSCSIIF_MAX_REQS);
+ struct vscsifrnt_shadow *shadow[VSCSIIF_MAX_REQS];
+
+ wait_queue_head_t wq_sync;
+ unsigned int wait_ring_available:1;
+
+ char dev_state_path[64];
+ struct task_struct *curr;
+};
+
+static DEFINE_MUTEX(scsifront_mutex);
+
+static void scsifront_wake_up(struct vscsifrnt_info *info)
+{
+ info->wait_ring_available = 0;
+ wake_up(&info->wq_sync);
+}
+
+static int scsifront_get_rqid(struct vscsifrnt_info *info)
+{
+ unsigned long flags;
+ int free;
+
+ spin_lock_irqsave(&info->shadow_lock, flags);
+
+ free = find_first_bit(info->shadow_free_bitmap, VSCSIIF_MAX_REQS);
+ __clear_bit(free, info->shadow_free_bitmap);
+
+ spin_unlock_irqrestore(&info->shadow_lock, flags);
+
+ return free;
+}
+
+static int _scsifront_put_rqid(struct vscsifrnt_info *info, uint32_t id)
+{
+ int empty = bitmap_empty(info->shadow_free_bitmap, VSCSIIF_MAX_REQS);
+
+ __set_bit(id, info->shadow_free_bitmap);
+ info->shadow[id] = NULL;
+
+ return empty || info->wait_ring_available;
+}
+
+static void scsifront_put_rqid(struct vscsifrnt_info *info, uint32_t id)
+{
+ unsigned long flags;
+ int kick;
+
+ spin_lock_irqsave(&info->shadow_lock, flags);
+ kick = _scsifront_put_rqid(info, id);
+ spin_unlock_irqrestore(&info->shadow_lock, flags);
+
+ if (kick)
+ scsifront_wake_up(info);
+}
+
+static struct vscsiif_request *scsifront_pre_req(struct vscsifrnt_info *info)
+{
+ struct vscsiif_front_ring *ring = &(info->ring);
+ struct vscsiif_request *ring_req;
+ uint32_t id;
+
+ id = scsifront_get_rqid(info); /* use id in response */
+ if (id >= VSCSIIF_MAX_REQS)
+ return NULL;
+
+ ring_req = RING_GET_REQUEST(&(info->ring), ring->req_prod_pvt);
+
+ ring->req_prod_pvt++;
+
+ ring_req->rqid = (uint16_t)id;
+
+ return ring_req;
+}
+
+static void scsifront_do_request(struct vscsifrnt_info *info)
+{
+ struct vscsiif_front_ring *ring = &(info->ring);
+ int notify;
+
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(ring, notify);
+ if (notify)
+ notify_remote_via_irq(info->irq);
+}
+
+static void scsifront_gnttab_done(struct vscsifrnt_info *info, uint32_t id)
+{
+ struct vscsifrnt_shadow *s = info->shadow[id];
+ int i;
+
+ if (s->sc->sc_data_direction == DMA_NONE)
+ return;
+
+ for (i = 0; i < s->nr_grants; i++) {
+ if (unlikely(gnttab_query_foreign_access(s->gref[i]) != 0)) {
+ shost_printk(KERN_ALERT, info->host, KBUILD_MODNAME
+ "grant still in use by backend\n");
+ BUG();
+ }
+ gnttab_end_foreign_access(s->gref[i], 0, 0UL);
+ }
+
+ kfree(s->sg);
+}
+
+static void scsifront_cdb_cmd_done(struct vscsifrnt_info *info,
+ struct vscsiif_response *ring_rsp)
+{
+ struct scsi_cmnd *sc;
+ uint32_t id;
+ uint8_t sense_len;
+
+ id = ring_rsp->rqid;
+ sc = info->shadow[id]->sc;
+
+ BUG_ON(sc == NULL);
+
+ scsifront_gnttab_done(info, id);
+ scsifront_put_rqid(info, id);
+
+ sc->result = ring_rsp->rslt;
+ scsi_set_resid(sc, ring_rsp->residual_len);
+
+ sense_len = min_t(uint8_t, VSCSIIF_SENSE_BUFFERSIZE,
+ ring_rsp->sense_len);
+
+ if (sense_len)
+ memcpy(sc->sense_buffer, ring_rsp->sense_buffer, sense_len);
+
+ sc->scsi_done(sc);
+}
+
+static void scsifront_sync_cmd_done(struct vscsifrnt_info *info,
+ struct vscsiif_response *ring_rsp)
+{
+ uint16_t id = ring_rsp->rqid;
+ unsigned long flags;
+ struct vscsifrnt_shadow *shadow = info->shadow[id];
+ int kick;
+
+ spin_lock_irqsave(&info->shadow_lock, flags);
+ shadow->wait_reset = 1;
+ switch (shadow->rslt_reset) {
+ case RSLT_RESET_WAITING:
+ shadow->rslt_reset = ring_rsp->rslt;
+ break;
+ case RSLT_RESET_ERR:
+ kick = _scsifront_put_rqid(info, id);
+ spin_unlock_irqrestore(&info->shadow_lock, flags);
+ kfree(shadow);
+ if (kick)
+ scsifront_wake_up(info);
+ return;
+ default:
+ shost_printk(KERN_ERR, info->host, KBUILD_MODNAME
+ "bad reset state %d, possibly leaking %u\n",
+ shadow->rslt_reset, id);
+ break;
+ }
+ spin_unlock_irqrestore(&info->shadow_lock, flags);
+
+ wake_up(&shadow->wq_reset);
+}
+
+static int scsifront_cmd_done(struct vscsifrnt_info *info)
+{
+ struct vscsiif_response *ring_rsp;
+ RING_IDX i, rp;
+ int more_to_do = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(info->host->host_lock, flags);
+
+ rp = info->ring.sring->rsp_prod;
+ rmb(); /* ordering required respective to dom0 */
+ for (i = info->ring.rsp_cons; i != rp; i++) {
+
+ ring_rsp = RING_GET_RESPONSE(&info->ring, i);
+
+ if (WARN(ring_rsp->rqid >= VSCSIIF_MAX_REQS ||
+ test_bit(ring_rsp->rqid, info->shadow_free_bitmap),
+ "illegal rqid %u returned by backend!\n",
+ ring_rsp->rqid))
+ continue;
+
+ if (info->shadow[ring_rsp->rqid]->act == VSCSIIF_ACT_SCSI_CDB)
+ scsifront_cdb_cmd_done(info, ring_rsp);
+ else
+ scsifront_sync_cmd_done(info, ring_rsp);
+ }
+
+ info->ring.rsp_cons = i;
+
+ if (i != info->ring.req_prod_pvt)
+ RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
+ else
+ info->ring.sring->rsp_event = i + 1;
+
+ info->wait_ring_available = 0;
+
+ spin_unlock_irqrestore(info->host->host_lock, flags);
+
+ wake_up(&info->wq_sync);
+
+ return more_to_do;
+}
+
+static irqreturn_t scsifront_irq_fn(int irq, void *dev_id)
+{
+ struct vscsifrnt_info *info = dev_id;
+
+ while (scsifront_cmd_done(info))
+ /* Yield point for this unbounded loop. */
+ cond_resched();
+
+ return IRQ_HANDLED;
+}
+
+static int map_data_for_request(struct vscsifrnt_info *info,
+ struct scsi_cmnd *sc,
+ struct vscsiif_request *ring_req,
+ struct vscsifrnt_shadow *shadow)
+{
+ grant_ref_t gref_head;
+ struct page *page;
+ int err, ref, ref_cnt = 0;
+ int grant_ro = (sc->sc_data_direction == DMA_TO_DEVICE);
+ unsigned int i, off, len, bytes;
+ unsigned int data_len = scsi_bufflen(sc);
+ unsigned int data_grants = 0, seg_grants = 0;
+ struct scatterlist *sg;
+ unsigned long mfn;
+ struct scsiif_request_segment *seg;
+
+ ring_req->nr_segments = 0;
+ if (sc->sc_data_direction == DMA_NONE || !data_len)
+ return 0;
+
+ scsi_for_each_sg(sc, sg, scsi_sg_count(sc), i)
+ data_grants += PFN_UP(sg->offset + sg->length);
+
+ if (data_grants > VSCSIIF_SG_TABLESIZE) {
+ if (data_grants > info->host->sg_tablesize) {
+ shost_printk(KERN_ERR, info->host, KBUILD_MODNAME
+ "Unable to map request_buffer for command!\n");
+ return -E2BIG;
+ }
+ seg_grants = vscsiif_grants_sg(data_grants);
+ shadow->sg = kcalloc(data_grants,
+ sizeof(struct scsiif_request_segment), GFP_ATOMIC);
+ if (!shadow->sg)
+ return -ENOMEM;
+ }
+ seg = shadow->sg ? : ring_req->seg;
+
+ err = gnttab_alloc_grant_references(seg_grants + data_grants,
+ &gref_head);
+ if (err) {
+ kfree(shadow->sg);
+ shost_printk(KERN_ERR, info->host, KBUILD_MODNAME
+ "gnttab_alloc_grant_references() error\n");
+ return -ENOMEM;
+ }
+
+ if (seg_grants) {
+ page = virt_to_page(seg);
+ off = (unsigned long)seg & ~PAGE_MASK;
+ len = sizeof(struct scsiif_request_segment) * data_grants;
+ while (len > 0) {
+ bytes = min_t(unsigned int, len, PAGE_SIZE - off);
+
+ ref = gnttab_claim_grant_reference(&gref_head);
+ BUG_ON(ref == -ENOSPC);
+
+ mfn = pfn_to_mfn(page_to_pfn(page));
+ gnttab_grant_foreign_access_ref(ref,
+ info->dev->otherend_id, mfn, 1);
+ shadow->gref[ref_cnt] = ref;
+ ring_req->seg[ref_cnt].gref = ref;
+ ring_req->seg[ref_cnt].offset = (uint16_t)off;
+ ring_req->seg[ref_cnt].length = (uint16_t)bytes;
+
+ page++;
+ len -= bytes;
+ off = 0;
+ ref_cnt++;
+ }
+ BUG_ON(seg_grants < ref_cnt);
+ seg_grants = ref_cnt;
+ }
+
+ scsi_for_each_sg(sc, sg, scsi_sg_count(sc), i) {
+ page = sg_page(sg);
+ off = sg->offset;
+ len = sg->length;
+
+ while (len > 0 && data_len > 0) {
+ /*
+ * sg sends a scatterlist that is larger than
+ * the data_len it wants transferred for certain
+ * IO sizes.
+ */
+ bytes = min_t(unsigned int, len, PAGE_SIZE - off);
+ bytes = min(bytes, data_len);
+
+ ref = gnttab_claim_grant_reference(&gref_head);
+ BUG_ON(ref == -ENOSPC);
+
+ mfn = pfn_to_mfn(page_to_pfn(page));
+ gnttab_grant_foreign_access_ref(ref,
+ info->dev->otherend_id, mfn, grant_ro);
+
+ shadow->gref[ref_cnt] = ref;
+ seg->gref = ref;
+ seg->offset = (uint16_t)off;
+ seg->length = (uint16_t)bytes;
+
+ page++;
+ seg++;
+ len -= bytes;
+ data_len -= bytes;
+ off = 0;
+ ref_cnt++;
+ }
+ }
+
+ if (seg_grants)
+ ring_req->nr_segments = VSCSIIF_SG_GRANT | seg_grants;
+ else
+ ring_req->nr_segments = (uint8_t)ref_cnt;
+ shadow->nr_grants = ref_cnt;
+
+ return 0;
+}
+
+static struct vscsiif_request *scsifront_command2ring(
+ struct vscsifrnt_info *info, struct scsi_cmnd *sc,
+ struct vscsifrnt_shadow *shadow)
+{
+ struct vscsiif_request *ring_req;
+
+ memset(shadow, 0, sizeof(*shadow));
+
+ ring_req = scsifront_pre_req(info);
+ if (!ring_req)
+ return NULL;
+
+ info->shadow[ring_req->rqid] = shadow;
+ shadow->rqid = ring_req->rqid;
+
+ ring_req->id = sc->device->id;
+ ring_req->lun = sc->device->lun;
+ ring_req->channel = sc->device->channel;
+ ring_req->cmd_len = sc->cmd_len;
+
+ BUG_ON(sc->cmd_len > VSCSIIF_MAX_COMMAND_SIZE);
+
+ memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len);
+
+ ring_req->sc_data_direction = (uint8_t)sc->sc_data_direction;
+ ring_req->timeout_per_command = sc->request->timeout / HZ;
+
+ return ring_req;
+}
+
+static int scsifront_queuecommand(struct Scsi_Host *shost,
+ struct scsi_cmnd *sc)
+{
+ struct vscsifrnt_info *info = shost_priv(shost);
+ struct vscsiif_request *ring_req;
+ struct vscsifrnt_shadow *shadow = scsi_cmd_priv(sc);
+ unsigned long flags;
+ int err;
+ uint16_t rqid;
+
+ spin_lock_irqsave(shost->host_lock, flags);
+ if (RING_FULL(&info->ring))
+ goto busy;
+
+ ring_req = scsifront_command2ring(info, sc, shadow);
+ if (!ring_req)
+ goto busy;
+
+ sc->result = 0;
+
+ rqid = ring_req->rqid;
+ ring_req->act = VSCSIIF_ACT_SCSI_CDB;
+
+ shadow->sc = sc;
+ shadow->act = VSCSIIF_ACT_SCSI_CDB;
+
+ err = map_data_for_request(info, sc, ring_req, shadow);
+ if (err < 0) {
+ pr_debug("%s: err %d\n", __func__, err);
+ scsifront_put_rqid(info, rqid);
+ spin_unlock_irqrestore(shost->host_lock, flags);
+ if (err == -ENOMEM)
+ return SCSI_MLQUEUE_HOST_BUSY;
+ sc->result = DID_ERROR << 16;
+ sc->scsi_done(sc);
+ return 0;
+ }
+
+ scsifront_do_request(info);
+ spin_unlock_irqrestore(shost->host_lock, flags);
+
+ return 0;
+
+busy:
+ spin_unlock_irqrestore(shost->host_lock, flags);
+ pr_debug("%s: busy\n", __func__);
+ return SCSI_MLQUEUE_HOST_BUSY;
+}
+
+/*
+ * Any exception handling (reset or abort) must be forwarded to the backend.
+ * We have to wait until an answer is returned. This answer contains the
+ * result to be returned to the requestor.
+ */
+static int scsifront_action_handler(struct scsi_cmnd *sc, uint8_t act)
+{
+ struct Scsi_Host *host = sc->device->host;
+ struct vscsifrnt_info *info = shost_priv(host);
+ struct vscsifrnt_shadow *shadow, *s = scsi_cmd_priv(sc);
+ struct vscsiif_request *ring_req;
+ int err = 0;
+
+ shadow = kmalloc(sizeof(*shadow), GFP_NOIO);
+ if (!shadow)
+ return FAILED;
+
+ spin_lock_irq(host->host_lock);
+
+ for (;;) {
+ if (!RING_FULL(&info->ring)) {
+ ring_req = scsifront_command2ring(info, sc, shadow);
+ if (ring_req)
+ break;
+ }
+ if (err) {
+ spin_unlock_irq(host->host_lock);
+ kfree(shadow);
+ return FAILED;
+ }
+ info->wait_ring_available = 1;
+ spin_unlock_irq(host->host_lock);
+ err = wait_event_interruptible(info->wq_sync,
+ !info->wait_ring_available);
+ spin_lock_irq(host->host_lock);
+ }
+
+ ring_req->act = act;
+ ring_req->ref_rqid = s->rqid;
+
+ shadow->act = act;
+ shadow->rslt_reset = RSLT_RESET_WAITING;
+ init_waitqueue_head(&shadow->wq_reset);
+
+ ring_req->nr_segments = 0;
+
+ scsifront_do_request(info);
+
+ spin_unlock_irq(host->host_lock);
+ err = wait_event_interruptible(shadow->wq_reset, shadow->wait_reset);
+ spin_lock_irq(host->host_lock);
+
+ if (!err) {
+ err = shadow->rslt_reset;
+ scsifront_put_rqid(info, shadow->rqid);
+ kfree(shadow);
+ } else {
+ spin_lock(&info->shadow_lock);
+ shadow->rslt_reset = RSLT_RESET_ERR;
+ spin_unlock(&info->shadow_lock);
+ err = FAILED;
+ }
+
+ spin_unlock_irq(host->host_lock);
+ return err;
+}
+
+static int scsifront_eh_abort_handler(struct scsi_cmnd *sc)
+{
+ pr_debug("%s\n", __func__);
+ return scsifront_action_handler(sc, VSCSIIF_ACT_SCSI_ABORT);
+}
+
+static int scsifront_dev_reset_handler(struct scsi_cmnd *sc)
+{
+ pr_debug("%s\n", __func__);
+ return scsifront_action_handler(sc, VSCSIIF_ACT_SCSI_RESET);
+}
+
+static int scsifront_sdev_configure(struct scsi_device *sdev)
+{
+ struct vscsifrnt_info *info = shost_priv(sdev->host);
+
+ if (info && current == info->curr)
+ xenbus_printf(XBT_NIL, info->dev->nodename,
+ info->dev_state_path, "%d", XenbusStateConnected);
+
+ return 0;
+}
+
+static void scsifront_sdev_destroy(struct scsi_device *sdev)
+{
+ struct vscsifrnt_info *info = shost_priv(sdev->host);
+
+ if (info && current == info->curr)
+ xenbus_printf(XBT_NIL, info->dev->nodename,
+ info->dev_state_path, "%d", XenbusStateClosed);
+}
+
+static struct scsi_host_template scsifront_sht = {
+ .module = THIS_MODULE,
+ .name = "Xen SCSI frontend driver",
+ .queuecommand = scsifront_queuecommand,
+ .eh_abort_handler = scsifront_eh_abort_handler,
+ .eh_device_reset_handler = scsifront_dev_reset_handler,
+ .slave_configure = scsifront_sdev_configure,
+ .slave_destroy = scsifront_sdev_destroy,
+ .cmd_per_lun = VSCSIIF_DEFAULT_CMD_PER_LUN,
+ .can_queue = VSCSIIF_MAX_REQS,
+ .this_id = -1,
+ .cmd_size = sizeof(struct vscsifrnt_shadow),
+ .sg_tablesize = VSCSIIF_SG_TABLESIZE,
+ .use_clustering = DISABLE_CLUSTERING,
+ .proc_name = "scsifront",
+};
+
+static int scsifront_alloc_ring(struct vscsifrnt_info *info)
+{
+ struct xenbus_device *dev = info->dev;
+ struct vscsiif_sring *sring;
+ int err = -ENOMEM;
+
+ /***** Frontend to Backend ring start *****/
+ sring = (struct vscsiif_sring *)__get_free_page(GFP_KERNEL);
+ if (!sring) {
+ xenbus_dev_fatal(dev, err,
+ "fail to allocate shared ring (Front to Back)");
+ return err;
+ }
+ SHARED_RING_INIT(sring);
+ FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+
+ err = xenbus_grant_ring(dev, virt_to_mfn(sring));
+ if (err < 0) {
+ free_page((unsigned long)sring);
+ xenbus_dev_fatal(dev, err,
+ "fail to grant shared ring (Front to Back)");
+ return err;
+ }
+ info->ring_ref = err;
+
+ err = xenbus_alloc_evtchn(dev, &info->evtchn);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "xenbus_alloc_evtchn");
+ goto free_gnttab;
+ }
+
+ err = bind_evtchn_to_irq(info->evtchn);
+ if (err <= 0) {
+ xenbus_dev_fatal(dev, err, "bind_evtchn_to_irq");
+ goto free_gnttab;
+ }
+
+ info->irq = err;
+
+ err = request_threaded_irq(info->irq, NULL, scsifront_irq_fn,
+ IRQF_ONESHOT, "scsifront", info);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "request_threaded_irq");
+ goto free_irq;
+ }
+
+ return 0;
+
+/* free resource */
+free_irq:
+ unbind_from_irqhandler(info->irq, info);
+free_gnttab:
+ gnttab_end_foreign_access(info->ring_ref, 0,
+ (unsigned long)info->ring.sring);
+
+ return err;
+}
+
+static int scsifront_init_ring(struct vscsifrnt_info *info)
+{
+ struct xenbus_device *dev = info->dev;
+ struct xenbus_transaction xbt;
+ int err;
+
+ pr_debug("%s\n", __func__);
+
+ err = scsifront_alloc_ring(info);
+ if (err)
+ return err;
+ pr_debug("%s: %u %u\n", __func__, info->ring_ref, info->evtchn);
+
+again:
+ err = xenbus_transaction_start(&xbt);
+ if (err)
+ xenbus_dev_fatal(dev, err, "starting transaction");
+
+ err = xenbus_printf(xbt, dev->nodename, "ring-ref", "%u",
+ info->ring_ref);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "%s", "writing ring-ref");
+ goto fail;
+ }
+
+ err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
+ info->evtchn);
+
+ if (err) {
+ xenbus_dev_fatal(dev, err, "%s", "writing event-channel");
+ goto fail;
+ }
+
+ err = xenbus_transaction_end(xbt, 0);
+ if (err) {
+ if (err == -EAGAIN)
+ goto again;
+ xenbus_dev_fatal(dev, err, "completing transaction");
+ goto free_sring;
+ }
+
+ return 0;
+
+fail:
+ xenbus_transaction_end(xbt, 1);
+free_sring:
+ unbind_from_irqhandler(info->irq, info);
+ gnttab_end_foreign_access(info->ring_ref, 0,
+ (unsigned long)info->ring.sring);
+
+ return err;
+}
+
+
+static int scsifront_probe(struct xenbus_device *dev,
+ const struct xenbus_device_id *id)
+{
+ struct vscsifrnt_info *info;
+ struct Scsi_Host *host;
+ int err = -ENOMEM;
+ char name[TASK_COMM_LEN];
+
+ host = scsi_host_alloc(&scsifront_sht, sizeof(*info));
+ if (!host) {
+ xenbus_dev_fatal(dev, err, "fail to allocate scsi host");
+ return err;
+ }
+ info = (struct vscsifrnt_info *)host->hostdata;
+
+ dev_set_drvdata(&dev->dev, info);
+ info->dev = dev;
+
+ bitmap_fill(info->shadow_free_bitmap, VSCSIIF_MAX_REQS);
+
+ err = scsifront_init_ring(info);
+ if (err) {
+ scsi_host_put(host);
+ return err;
+ }
+
+ init_waitqueue_head(&info->wq_sync);
+ spin_lock_init(&info->shadow_lock);
+
+ snprintf(name, TASK_COMM_LEN, "vscsiif.%d", host->host_no);
+
+ host->max_id = VSCSIIF_MAX_TARGET;
+ host->max_channel = 0;
+ host->max_lun = VSCSIIF_MAX_LUN;
+ host->max_sectors = (host->sg_tablesize - 1) * PAGE_SIZE / 512;
+ host->max_cmd_len = VSCSIIF_MAX_COMMAND_SIZE;
+
+ err = scsi_add_host(host, &dev->dev);
+ if (err) {
+ dev_err(&dev->dev, "fail to add scsi host %d\n", err);
+ goto free_sring;
+ }
+ info->host = host;
+ info->host_active = 1;
+
+ xenbus_switch_state(dev, XenbusStateInitialised);
+
+ return 0;
+
+free_sring:
+ unbind_from_irqhandler(info->irq, info);
+ gnttab_end_foreign_access(info->ring_ref, 0,
+ (unsigned long)info->ring.sring);
+ scsi_host_put(host);
+ return err;
+}
+
+static int scsifront_remove(struct xenbus_device *dev)
+{
+ struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
+
+ pr_debug("%s: %s removed\n", __func__, dev->nodename);
+
+ mutex_lock(&scsifront_mutex);
+ if (info->host_active) {
+ /* Scsi_host not yet removed */
+ scsi_remove_host(info->host);
+ info->host_active = 0;
+ }
+ mutex_unlock(&scsifront_mutex);
+
+ gnttab_end_foreign_access(info->ring_ref, 0,
+ (unsigned long)info->ring.sring);
+ unbind_from_irqhandler(info->irq, info);
+
+ scsi_host_put(info->host);
+
+ return 0;
+}
+
+static void scsifront_disconnect(struct vscsifrnt_info *info)
+{
+ struct xenbus_device *dev = info->dev;
+ struct Scsi_Host *host = info->host;
+
+ pr_debug("%s: %s disconnect\n", __func__, dev->nodename);
+
+ /*
+ * When this function is executed, all devices of
+ * Frontend have been deleted.
+ * Therefore, it need not block I/O before remove_host.
+ */
+
+ mutex_lock(&scsifront_mutex);
+ if (info->host_active) {
+ scsi_remove_host(host);
+ info->host_active = 0;
+ }
+ mutex_unlock(&scsifront_mutex);
+
+ xenbus_frontend_closed(dev);
+}
+
+static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op)
+{
+ struct xenbus_device *dev = info->dev;
+ int i, err = 0;
+ char str[64];
+ char **dir;
+ unsigned int dir_n = 0;
+ unsigned int device_state;
+ unsigned int hst, chn, tgt, lun;
+ struct scsi_device *sdev;
+
+ dir = xenbus_directory(XBT_NIL, dev->otherend, "vscsi-devs", &dir_n);
+ if (IS_ERR(dir))
+ return;
+
+ /* mark current task as the one allowed to modify device states */
+ BUG_ON(info->curr);
+ info->curr = current;
+
+ for (i = 0; i < dir_n; i++) {
+ /* read status */
+ snprintf(str, sizeof(str), "vscsi-devs/%s/state", dir[i]);
+ err = xenbus_scanf(XBT_NIL, dev->otherend, str, "%u",
+ &device_state);
+ if (XENBUS_EXIST_ERR(err))
+ continue;
+
+ /* virtual SCSI device */
+ snprintf(str, sizeof(str), "vscsi-devs/%s/v-dev", dir[i]);
+ err = xenbus_scanf(XBT_NIL, dev->otherend, str,
+ "%u:%u:%u:%u", &hst, &chn, &tgt, &lun);
+ if (XENBUS_EXIST_ERR(err))
+ continue;
+
+ /*
+ * Front device state path, used in slave_configure called
+ * on successfull scsi_add_device, and in slave_destroy called
+ * on remove of a device.
+ */
+ snprintf(info->dev_state_path, sizeof(info->dev_state_path),
+ "vscsi-devs/%s/state", dir[i]);
+
+ switch (op) {
+ case VSCSIFRONT_OP_ADD_LUN:
+ if (device_state != XenbusStateInitialised)
+ break;
+
+ if (scsi_add_device(info->host, chn, tgt, lun)) {
+ dev_err(&dev->dev, "scsi_add_device\n");
+ xenbus_printf(XBT_NIL, dev->nodename,
+ info->dev_state_path,
+ "%d", XenbusStateClosed);
+ }
+ break;
+ case VSCSIFRONT_OP_DEL_LUN:
+ if (device_state != XenbusStateClosing)
+ break;
+
+ sdev = scsi_device_lookup(info->host, chn, tgt, lun);
+ if (sdev) {
+ scsi_remove_device(sdev);
+ scsi_device_put(sdev);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ info->curr = NULL;
+
+ kfree(dir);
+}
+
+static void scsifront_read_backend_params(struct xenbus_device *dev,
+ struct vscsifrnt_info *info)
+{
+ unsigned int sg_grant;
+ int ret;
+ struct Scsi_Host *host = info->host;
+
+ ret = xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg-grant", "%u",
+ &sg_grant);
+ if (ret == 1 && sg_grant) {
+ sg_grant = min_t(unsigned int, sg_grant, SG_ALL);
+ sg_grant = max_t(unsigned int, sg_grant, VSCSIIF_SG_TABLESIZE);
+ host->sg_tablesize = min_t(unsigned int, sg_grant,
+ VSCSIIF_SG_TABLESIZE * PAGE_SIZE /
+ sizeof(struct scsiif_request_segment));
+ host->max_sectors = (host->sg_tablesize - 1) * PAGE_SIZE / 512;
+ }
+ dev_info(&dev->dev, "using up to %d SG entries\n", host->sg_tablesize);
+}
+
+static void scsifront_backend_changed(struct xenbus_device *dev,
+ enum xenbus_state backend_state)
+{
+ struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
+
+ pr_debug("%s: %p %u %u\n", __func__, dev, dev->state, backend_state);
+
+ switch (backend_state) {
+ case XenbusStateUnknown:
+ case XenbusStateInitialising:
+ case XenbusStateInitWait:
+ case XenbusStateInitialised:
+ break;
+
+ case XenbusStateConnected:
+ scsifront_read_backend_params(dev, info);
+ if (xenbus_read_driver_state(dev->nodename) ==
+ XenbusStateInitialised)
+ scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
+
+ if (dev->state != XenbusStateConnected)
+ xenbus_switch_state(dev, XenbusStateConnected);
+ break;
+
+ case XenbusStateClosed:
+ if (dev->state == XenbusStateClosed)
+ break;
+ /* Missed the backend's Closing state -- fallthrough */
+ case XenbusStateClosing:
+ scsifront_disconnect(info);
+ break;
+
+ case XenbusStateReconfiguring:
+ scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_DEL_LUN);
+ xenbus_switch_state(dev, XenbusStateReconfiguring);
+ break;
+
+ case XenbusStateReconfigured:
+ scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
+ xenbus_switch_state(dev, XenbusStateConnected);
+ break;
+ }
+}
+
+static const struct xenbus_device_id scsifront_ids[] = {
+ { "vscsi" },
+ { "" }
+};
+
+static struct xenbus_driver scsifront_driver = {
+ .ids = scsifront_ids,
+ .probe = scsifront_probe,
+ .remove = scsifront_remove,
+ .otherend_changed = scsifront_backend_changed,
+};
+
+static int __init scsifront_init(void)
+{
+ if (!xen_domain())
+ return -ENODEV;
+
+ return xenbus_register_frontend(&scsifront_driver);
+}
+module_init(scsifront_init);
+
+static void __exit scsifront_exit(void)
+{
+ xenbus_unregister_driver(&scsifront_driver);
+}
+module_exit(scsifront_exit);
+
+MODULE_DESCRIPTION("Xen SCSI frontend driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("xen:vscsi");
+MODULE_AUTHOR("Juergen Gross <jgross@suse.com>");
diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c
index 2967f0388d2c..f1e57425e39f 100644
--- a/drivers/tty/hvc/hvc_xen.c
+++ b/drivers/tty/hvc/hvc_xen.c
@@ -347,8 +347,6 @@ static int xen_console_remove(struct xencons_info *info)
}
#ifdef CONFIG_HVC_XEN_FRONTEND
-static struct xenbus_driver xencons_driver;
-
static int xencons_remove(struct xenbus_device *dev)
{
return xen_console_remove(dev_get_drvdata(&dev->dev));
@@ -499,13 +497,14 @@ static const struct xenbus_device_id xencons_ids[] = {
{ "" }
};
-
-static DEFINE_XENBUS_DRIVER(xencons, "xenconsole",
+static struct xenbus_driver xencons_driver = {
+ .name = "xenconsole",
+ .ids = xencons_ids,
.probe = xencons_probe,
.remove = xencons_remove,
.resume = xencons_resume,
.otherend_changed = xencons_backend_changed,
-);
+};
#endif /* CONFIG_HVC_XEN_FRONTEND */
static int __init xen_hvc_init(void)
diff --git a/drivers/video/fbdev/xen-fbfront.c b/drivers/video/fbdev/xen-fbfront.c
index 901014bbc821..09dc44736c1a 100644
--- a/drivers/video/fbdev/xen-fbfront.c
+++ b/drivers/video/fbdev/xen-fbfront.c
@@ -684,12 +684,13 @@ static const struct xenbus_device_id xenfb_ids[] = {
{ "" }
};
-static DEFINE_XENBUS_DRIVER(xenfb, ,
+static struct xenbus_driver xenfb_driver = {
+ .ids = xenfb_ids,
.probe = xenfb_probe,
.remove = xenfb_remove,
.resume = xenfb_resume,
.otherend_changed = xenfb_backend_changed,
-);
+};
static int __init xenfb_init(void)
{
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 8bc01838daf9..b812462083fc 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -172,6 +172,15 @@ config XEN_PCIDEV_BACKEND
If in doubt, say m.
+config XEN_SCSI_BACKEND
+ tristate "XEN SCSI backend driver"
+ depends on XEN && XEN_BACKEND && TARGET_CORE
+ help
+ The SCSI backend driver allows the kernel to export its SCSI Devices
+ to other guests via a high-performance shared-memory interface.
+ Only needed for systems running as XEN driver domains (e.g. Dom0) and
+ if guests need generic access to SCSI devices.
+
config XEN_PRIVCMD
tristate
depends on XEN
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 84044b554e33..2140398a2a8c 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_XEN_ACPI_HOTPLUG_MEMORY) += xen-acpi-memhotplug.o
obj-$(CONFIG_XEN_ACPI_HOTPLUG_CPU) += xen-acpi-cpuhotplug.o
obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o
obj-$(CONFIG_XEN_EFI) += efi.o
+obj-$(CONFIG_XEN_SCSI_BACKEND) += xen-scsiback.o
xen-evtchn-y := evtchn.o
xen-gntdev-y := gntdev.o
xen-gntalloc-y := gntalloc.o
diff --git a/drivers/xen/efi.c b/drivers/xen/efi.c
index 31f618a49661..1f850c97482f 100644
--- a/drivers/xen/efi.c
+++ b/drivers/xen/efi.c
@@ -27,6 +27,8 @@
#include <xen/interface/platform.h>
#include <xen/xen.h>
+#include <asm/page.h>
+
#include <asm/xen/hypercall.h>
#define INIT_EFI_OP(name) \
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 5b5c5ff273fd..b4bca2d4a7e5 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -900,8 +900,8 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
return irq;
}
-static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
- unsigned int remote_port)
+int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
+ unsigned int remote_port)
{
struct evtchn_bind_interdomain bind_interdomain;
int err;
@@ -914,6 +914,7 @@ static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
}
+EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq);
static int find_virq(unsigned int virq, unsigned int cpu)
{
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index c254ae036f18..7786291ba229 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -592,7 +592,7 @@ static int grow_gnttab_list(unsigned int more_frames)
return 0;
grow_nomem:
- for ( ; i >= nr_glist_frames; i--)
+ while (i-- > nr_glist_frames)
free_page((unsigned long) gnttab_list[i]);
return -ENOMEM;
}
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index c214daab4829..ad8d30c088fe 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -719,11 +719,13 @@ static const struct xenbus_device_id xen_pcibk_ids[] = {
{""},
};
-static DEFINE_XENBUS_DRIVER(xen_pcibk, DRV_NAME,
+static struct xenbus_driver xen_pcibk_driver = {
+ .name = DRV_NAME,
+ .ids = xen_pcibk_ids,
.probe = xen_pcibk_xenbus_probe,
.remove = xen_pcibk_xenbus_remove,
.otherend_changed = xen_pcibk_frontend_changed,
-);
+};
const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend;
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
new file mode 100644
index 000000000000..3e32146472a5
--- /dev/null
+++ b/drivers/xen/xen-scsiback.c
@@ -0,0 +1,2126 @@
+/*
+ * Xen SCSI backend driver
+ *
+ * Copyright (c) 2008, FUJITSU Limited
+ *
+ * Based on the blkback driver code.
+ * Adaption to kernel taget core infrastructure taken from vhost/scsi.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdarg.h>
+
+#include <linux/module.h>
+#include <linux/utsname.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/gfp.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/configfs.h>
+
+#include <generated/utsrelease.h>
+
+#include <scsi/scsi_dbg.h>
+#include <scsi/scsi_eh.h>
+#include <scsi/scsi_tcq.h>
+
+#include <target/target_core_base.h>
+#include <target/target_core_fabric.h>
+#include <target/target_core_configfs.h>
+#include <target/target_core_fabric_configfs.h>
+
+#include <asm/hypervisor.h>
+
+#include <xen/xen.h>
+#include <xen/balloon.h>
+#include <xen/events.h>
+#include <xen/xenbus.h>
+#include <xen/grant_table.h>
+#include <xen/page.h>
+
+#include <xen/interface/grant_table.h>
+#include <xen/interface/io/vscsiif.h>
+
+#define DPRINTK(_f, _a...) \
+ pr_debug("(file=%s, line=%d) " _f, __FILE__ , __LINE__ , ## _a)
+
+#define VSCSI_VERSION "v0.1"
+#define VSCSI_NAMELEN 32
+
+struct ids_tuple {
+ unsigned int hst; /* host */
+ unsigned int chn; /* channel */
+ unsigned int tgt; /* target */
+ unsigned int lun; /* LUN */
+};
+
+struct v2p_entry {
+ struct ids_tuple v; /* translate from */
+ struct scsiback_tpg *tpg; /* translate to */
+ unsigned int lun;
+ struct kref kref;
+ struct list_head l;
+};
+
+struct vscsibk_info {
+ struct xenbus_device *dev;
+
+ domid_t domid;
+ unsigned int irq;
+
+ struct vscsiif_back_ring ring;
+ int ring_error;
+
+ spinlock_t ring_lock;
+ atomic_t nr_unreplied_reqs;
+
+ spinlock_t v2p_lock;
+ struct list_head v2p_entry_lists;
+
+ wait_queue_head_t waiting_to_free;
+};
+
+/* theoretical maximum of grants for one request */
+#define VSCSI_MAX_GRANTS (SG_ALL + VSCSIIF_SG_TABLESIZE)
+
+/*
+ * VSCSI_GRANT_BATCH is the maximum number of grants to be processed in one
+ * call to map/unmap grants. Don't choose it too large, as there are arrays
+ * with VSCSI_GRANT_BATCH elements allocated on the stack.
+ */
+#define VSCSI_GRANT_BATCH 16
+
+struct vscsibk_pend {
+ uint16_t rqid;
+
+ uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE];
+ uint8_t cmd_len;
+
+ uint8_t sc_data_direction;
+ uint16_t n_sg; /* real length of SG list */
+ uint16_t n_grants; /* SG pages and potentially SG list */
+ uint32_t data_len;
+ uint32_t result;
+
+ struct vscsibk_info *info;
+ struct v2p_entry *v2p;
+ struct scatterlist *sgl;
+
+ uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE];
+
+ grant_handle_t grant_handles[VSCSI_MAX_GRANTS];
+ struct page *pages[VSCSI_MAX_GRANTS];
+
+ struct se_cmd se_cmd;
+};
+
+struct scsiback_tmr {
+ atomic_t tmr_complete;
+ wait_queue_head_t tmr_wait;
+};
+
+struct scsiback_nexus {
+ /* Pointer to TCM session for I_T Nexus */
+ struct se_session *tvn_se_sess;
+};
+
+struct scsiback_tport {
+ /* SCSI protocol the tport is providing */
+ u8 tport_proto_id;
+ /* Binary World Wide unique Port Name for pvscsi Target port */
+ u64 tport_wwpn;
+ /* ASCII formatted WWPN for pvscsi Target port */
+ char tport_name[VSCSI_NAMELEN];
+ /* Returned by scsiback_make_tport() */
+ struct se_wwn tport_wwn;
+};
+
+struct scsiback_tpg {
+ /* scsiback port target portal group tag for TCM */
+ u16 tport_tpgt;
+ /* track number of TPG Port/Lun Links wrt explicit I_T Nexus shutdown */
+ int tv_tpg_port_count;
+ /* xen-pvscsi references to tpg_nexus, protected by tv_tpg_mutex */
+ int tv_tpg_fe_count;
+ /* list for scsiback_list */
+ struct list_head tv_tpg_list;
+ /* Used to protect access for tpg_nexus */
+ struct mutex tv_tpg_mutex;
+ /* Pointer to the TCM pvscsi I_T Nexus for this TPG endpoint */
+ struct scsiback_nexus *tpg_nexus;
+ /* Pointer back to scsiback_tport */
+ struct scsiback_tport *tport;
+ /* Returned by scsiback_make_tpg() */
+ struct se_portal_group se_tpg;
+ /* alias used in xenstore */
+ char param_alias[VSCSI_NAMELEN];
+ /* list of info structures related to this target portal group */
+ struct list_head info_list;
+};
+
+#define SCSIBACK_INVALID_HANDLE (~0)
+
+static bool log_print_stat;
+module_param(log_print_stat, bool, 0644);
+
+static int scsiback_max_buffer_pages = 1024;
+module_param_named(max_buffer_pages, scsiback_max_buffer_pages, int, 0644);
+MODULE_PARM_DESC(max_buffer_pages,
+"Maximum number of free pages to keep in backend buffer");
+
+static struct kmem_cache *scsiback_cachep;
+static DEFINE_SPINLOCK(free_pages_lock);
+static int free_pages_num;
+static LIST_HEAD(scsiback_free_pages);
+
+/* Global spinlock to protect scsiback TPG list */
+static DEFINE_MUTEX(scsiback_mutex);
+static LIST_HEAD(scsiback_list);
+
+/* Local pointer to allocated TCM configfs fabric module */
+static struct target_fabric_configfs *scsiback_fabric_configfs;
+
+static void scsiback_get(struct vscsibk_info *info)
+{
+ atomic_inc(&info->nr_unreplied_reqs);
+}
+
+static void scsiback_put(struct vscsibk_info *info)
+{
+ if (atomic_dec_and_test(&info->nr_unreplied_reqs))
+ wake_up(&info->waiting_to_free);
+}
+
+static void put_free_pages(struct page **page, int num)
+{
+ unsigned long flags;
+ int i = free_pages_num + num, n = num;
+
+ if (num == 0)
+ return;
+ if (i > scsiback_max_buffer_pages) {
+ n = min(num, i - scsiback_max_buffer_pages);
+ free_xenballooned_pages(n, page + num - n);
+ n = num - n;
+ }
+ spin_lock_irqsave(&free_pages_lock, flags);
+ for (i = 0; i < n; i++)
+ list_add(&page[i]->lru, &scsiback_free_pages);
+ free_pages_num += n;
+ spin_unlock_irqrestore(&free_pages_lock, flags);
+}
+
+static int get_free_page(struct page **page)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&free_pages_lock, flags);
+ if (list_empty(&scsiback_free_pages)) {
+ spin_unlock_irqrestore(&free_pages_lock, flags);
+ return alloc_xenballooned_pages(1, page, false);
+ }
+ page[0] = list_first_entry(&scsiback_free_pages, struct page, lru);
+ list_del(&page[0]->lru);
+ free_pages_num--;
+ spin_unlock_irqrestore(&free_pages_lock, flags);
+ return 0;
+}
+
+static unsigned long vaddr_page(struct page *page)
+{
+ unsigned long pfn = page_to_pfn(page);
+
+ return (unsigned long)pfn_to_kaddr(pfn);
+}
+
+static unsigned long vaddr(struct vscsibk_pend *req, int seg)
+{
+ return vaddr_page(req->pages[seg]);
+}
+
+static void scsiback_print_status(char *sense_buffer, int errors,
+ struct vscsibk_pend *pending_req)
+{
+ struct scsiback_tpg *tpg = pending_req->v2p->tpg;
+
+ pr_err("xen-pvscsi[%s:%d] cmnd[0]=%02x -> st=%02x msg=%02x host=%02x drv=%02x\n",
+ tpg->tport->tport_name, pending_req->v2p->lun,
+ pending_req->cmnd[0], status_byte(errors), msg_byte(errors),
+ host_byte(errors), driver_byte(errors));
+
+ if (CHECK_CONDITION & status_byte(errors))
+ __scsi_print_sense("xen-pvscsi", sense_buffer,
+ SCSI_SENSE_BUFFERSIZE);
+}
+
+static void scsiback_fast_flush_area(struct vscsibk_pend *req)
+{
+ struct gnttab_unmap_grant_ref unmap[VSCSI_GRANT_BATCH];
+ struct page *pages[VSCSI_GRANT_BATCH];
+ unsigned int i, invcount = 0;
+ grant_handle_t handle;
+ int err;
+
+ kfree(req->sgl);
+ req->sgl = NULL;
+ req->n_sg = 0;
+
+ if (!req->n_grants)
+ return;
+
+ for (i = 0; i < req->n_grants; i++) {
+ handle = req->grant_handles[i];
+ if (handle == SCSIBACK_INVALID_HANDLE)
+ continue;
+ gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
+ GNTMAP_host_map, handle);
+ req->grant_handles[i] = SCSIBACK_INVALID_HANDLE;
+ pages[invcount] = req->pages[i];
+ put_page(pages[invcount]);
+ invcount++;
+ if (invcount < VSCSI_GRANT_BATCH)
+ continue;
+ err = gnttab_unmap_refs(unmap, NULL, pages, invcount);
+ BUG_ON(err);
+ invcount = 0;
+ }
+
+ if (invcount) {
+ err = gnttab_unmap_refs(unmap, NULL, pages, invcount);
+ BUG_ON(err);
+ }
+
+ put_free_pages(req->pages, req->n_grants);
+ req->n_grants = 0;
+}
+
+static void scsiback_free_translation_entry(struct kref *kref)
+{
+ struct v2p_entry *entry = container_of(kref, struct v2p_entry, kref);
+ struct scsiback_tpg *tpg = entry->tpg;
+
+ mutex_lock(&tpg->tv_tpg_mutex);
+ tpg->tv_tpg_fe_count--;
+ mutex_unlock(&tpg->tv_tpg_mutex);
+
+ kfree(entry);
+}
+
+static void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result,
+ uint32_t resid, struct vscsibk_pend *pending_req)
+{
+ struct vscsiif_response *ring_res;
+ struct vscsibk_info *info = pending_req->info;
+ int notify;
+ struct scsi_sense_hdr sshdr;
+ unsigned long flags;
+ unsigned len;
+
+ spin_lock_irqsave(&info->ring_lock, flags);
+
+ ring_res = RING_GET_RESPONSE(&info->ring, info->ring.rsp_prod_pvt);
+ info->ring.rsp_prod_pvt++;
+
+ ring_res->rslt = result;
+ ring_res->rqid = pending_req->rqid;
+
+ if (sense_buffer != NULL &&
+ scsi_normalize_sense(sense_buffer, VSCSIIF_SENSE_BUFFERSIZE,
+ &sshdr)) {
+ len = min_t(unsigned, 8 + sense_buffer[7],
+ VSCSIIF_SENSE_BUFFERSIZE);
+ memcpy(ring_res->sense_buffer, sense_buffer, len);
+ ring_res->sense_len = len;
+ } else {
+ ring_res->sense_len = 0;
+ }
+
+ ring_res->residual_len = resid;
+
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&info->ring, notify);
+ spin_unlock_irqrestore(&info->ring_lock, flags);
+
+ if (notify)
+ notify_remote_via_irq(info->irq);
+
+ if (pending_req->v2p)
+ kref_put(&pending_req->v2p->kref,
+ scsiback_free_translation_entry);
+}
+
+static void scsiback_cmd_done(struct vscsibk_pend *pending_req)
+{
+ struct vscsibk_info *info = pending_req->info;
+ unsigned char *sense_buffer;
+ unsigned int resid;
+ int errors;
+
+ sense_buffer = pending_req->sense_buffer;
+ resid = pending_req->se_cmd.residual_count;
+ errors = pending_req->result;
+
+ if (errors && log_print_stat)
+ scsiback_print_status(sense_buffer, errors, pending_req);
+
+ scsiback_fast_flush_area(pending_req);
+ scsiback_do_resp_with_sense(sense_buffer, errors, resid, pending_req);
+ scsiback_put(info);
+}
+
+static void scsiback_cmd_exec(struct vscsibk_pend *pending_req)
+{
+ struct se_cmd *se_cmd = &pending_req->se_cmd;
+ struct se_session *sess = pending_req->v2p->tpg->tpg_nexus->tvn_se_sess;
+ int rc;
+
+ memset(pending_req->sense_buffer, 0, VSCSIIF_SENSE_BUFFERSIZE);
+
+ memset(se_cmd, 0, sizeof(*se_cmd));
+
+ scsiback_get(pending_req->info);
+ rc = target_submit_cmd_map_sgls(se_cmd, sess, pending_req->cmnd,
+ pending_req->sense_buffer, pending_req->v2p->lun,
+ pending_req->data_len, 0,
+ pending_req->sc_data_direction, 0,
+ pending_req->sgl, pending_req->n_sg,
+ NULL, 0, NULL, 0);
+ if (rc < 0) {
+ transport_send_check_condition_and_sense(se_cmd,
+ TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE, 0);
+ transport_generic_free_cmd(se_cmd, 0);
+ }
+}
+
+static int scsiback_gnttab_data_map_batch(struct gnttab_map_grant_ref *map,
+ struct page **pg, grant_handle_t *grant, int cnt)
+{
+ int err, i;
+
+ if (!cnt)
+ return 0;
+
+ err = gnttab_map_refs(map, NULL, pg, cnt);
+ BUG_ON(err);
+ for (i = 0; i < cnt; i++) {
+ if (unlikely(map[i].status != GNTST_okay)) {
+ pr_err("xen-pvscsi: invalid buffer -- could not remap it\n");
+ map[i].handle = SCSIBACK_INVALID_HANDLE;
+ err = -ENOMEM;
+ } else {
+ get_page(pg[i]);
+ }
+ grant[i] = map[i].handle;
+ }
+ return err;
+}
+
+static int scsiback_gnttab_data_map_list(struct vscsibk_pend *pending_req,
+ struct scsiif_request_segment *seg, struct page **pg,
+ grant_handle_t *grant, int cnt, u32 flags)
+{
+ int mapcount = 0, i, err = 0;
+ struct gnttab_map_grant_ref map[VSCSI_GRANT_BATCH];
+ struct vscsibk_info *info = pending_req->info;
+
+ for (i = 0; i < cnt; i++) {
+ if (get_free_page(pg + mapcount)) {
+ put_free_pages(pg, mapcount);
+ pr_err("xen-pvscsi: no grant page\n");
+ return -ENOMEM;
+ }
+ gnttab_set_map_op(&map[mapcount], vaddr_page(pg[mapcount]),
+ flags, seg[i].gref, info->domid);
+ mapcount++;
+ if (mapcount < VSCSI_GRANT_BATCH)
+ continue;
+ err = scsiback_gnttab_data_map_batch(map, pg, grant, mapcount);
+ pg += mapcount;
+ grant += mapcount;
+ pending_req->n_grants += mapcount;
+ if (err)
+ return err;
+ mapcount = 0;
+ }
+ err = scsiback_gnttab_data_map_batch(map, pg, grant, mapcount);
+ pending_req->n_grants += mapcount;
+ return err;
+}
+
+static int scsiback_gnttab_data_map(struct vscsiif_request *ring_req,
+ struct vscsibk_pend *pending_req)
+{
+ u32 flags;
+ int i, err, n_segs, i_seg = 0;
+ struct page **pg;
+ struct scsiif_request_segment *seg;
+ unsigned long end_seg = 0;
+ unsigned int nr_segments = (unsigned int)ring_req->nr_segments;
+ unsigned int nr_sgl = 0;
+ struct scatterlist *sg;
+ grant_handle_t *grant;
+
+ pending_req->n_sg = 0;
+ pending_req->n_grants = 0;
+ pending_req->data_len = 0;
+
+ nr_segments &= ~VSCSIIF_SG_GRANT;
+ if (!nr_segments)
+ return 0;
+
+ if (nr_segments > VSCSIIF_SG_TABLESIZE) {
+ DPRINTK("xen-pvscsi: invalid parameter nr_seg = %d\n",
+ ring_req->nr_segments);
+ return -EINVAL;
+ }
+
+ if (ring_req->nr_segments & VSCSIIF_SG_GRANT) {
+ err = scsiback_gnttab_data_map_list(pending_req, ring_req->seg,
+ pending_req->pages, pending_req->grant_handles,
+ nr_segments, GNTMAP_host_map | GNTMAP_readonly);
+ if (err)
+ return err;
+ nr_sgl = nr_segments;
+ nr_segments = 0;
+ for (i = 0; i < nr_sgl; i++) {
+ n_segs = ring_req->seg[i].length /
+ sizeof(struct scsiif_request_segment);
+ if ((unsigned)ring_req->seg[i].offset +
+ (unsigned)ring_req->seg[i].length > PAGE_SIZE ||
+ n_segs * sizeof(struct scsiif_request_segment) !=
+ ring_req->seg[i].length)
+ return -EINVAL;
+ nr_segments += n_segs;
+ }
+ if (nr_segments > SG_ALL) {
+ DPRINTK("xen-pvscsi: invalid nr_seg = %d\n",
+ nr_segments);
+ return -EINVAL;
+ }
+ }
+
+ /* free of (sgl) in fast_flush_area()*/
+ pending_req->sgl = kmalloc_array(nr_segments,
+ sizeof(struct scatterlist), GFP_KERNEL);
+ if (!pending_req->sgl)
+ return -ENOMEM;
+
+ sg_init_table(pending_req->sgl, nr_segments);
+ pending_req->n_sg = nr_segments;
+
+ flags = GNTMAP_host_map;
+ if (pending_req->sc_data_direction == DMA_TO_DEVICE)
+ flags |= GNTMAP_readonly;
+
+ pg = pending_req->pages + nr_sgl;
+ grant = pending_req->grant_handles + nr_sgl;
+ if (!nr_sgl) {
+ seg = ring_req->seg;
+ err = scsiback_gnttab_data_map_list(pending_req, seg,
+ pg, grant, nr_segments, flags);
+ if (err)
+ return err;
+ } else {
+ for (i = 0; i < nr_sgl; i++) {
+ seg = (struct scsiif_request_segment *)(
+ vaddr(pending_req, i) + ring_req->seg[i].offset);
+ n_segs = ring_req->seg[i].length /
+ sizeof(struct scsiif_request_segment);
+ err = scsiback_gnttab_data_map_list(pending_req, seg,
+ pg, grant, n_segs, flags);
+ if (err)
+ return err;
+ pg += n_segs;
+ grant += n_segs;
+ }
+ end_seg = vaddr(pending_req, 0) + ring_req->seg[0].offset;
+ seg = (struct scsiif_request_segment *)end_seg;
+ end_seg += ring_req->seg[0].length;
+ pg = pending_req->pages + nr_sgl;
+ }
+
+ for_each_sg(pending_req->sgl, sg, nr_segments, i) {
+ sg_set_page(sg, pg[i], seg->length, seg->offset);
+ pending_req->data_len += seg->length;
+ seg++;
+ if (nr_sgl && (unsigned long)seg >= end_seg) {
+ i_seg++;
+ end_seg = vaddr(pending_req, i_seg) +
+ ring_req->seg[i_seg].offset;
+ seg = (struct scsiif_request_segment *)end_seg;
+ end_seg += ring_req->seg[i_seg].length;
+ }
+ if (sg->offset >= PAGE_SIZE ||
+ sg->length > PAGE_SIZE ||
+ sg->offset + sg->length > PAGE_SIZE)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void scsiback_disconnect(struct vscsibk_info *info)
+{
+ wait_event(info->waiting_to_free,
+ atomic_read(&info->nr_unreplied_reqs) == 0);
+
+ unbind_from_irqhandler(info->irq, info);
+ info->irq = 0;
+ xenbus_unmap_ring_vfree(info->dev, info->ring.sring);
+}
+
+static void scsiback_device_action(struct vscsibk_pend *pending_req,
+ enum tcm_tmreq_table act, int tag)
+{
+ int rc, err = FAILED;
+ struct scsiback_tpg *tpg = pending_req->v2p->tpg;
+ struct se_cmd *se_cmd = &pending_req->se_cmd;
+ struct scsiback_tmr *tmr;
+
+ tmr = kzalloc(sizeof(struct scsiback_tmr), GFP_KERNEL);
+ if (!tmr)
+ goto out;
+
+ init_waitqueue_head(&tmr->tmr_wait);
+
+ transport_init_se_cmd(se_cmd, tpg->se_tpg.se_tpg_tfo,
+ tpg->tpg_nexus->tvn_se_sess, 0, DMA_NONE, MSG_SIMPLE_TAG,
+ &pending_req->sense_buffer[0]);
+
+ rc = core_tmr_alloc_req(se_cmd, tmr, act, GFP_KERNEL);
+ if (rc < 0)
+ goto out;
+
+ se_cmd->se_tmr_req->ref_task_tag = tag;
+
+ if (transport_lookup_tmr_lun(se_cmd, pending_req->v2p->lun) < 0)
+ goto out;
+
+ transport_generic_handle_tmr(se_cmd);
+ wait_event(tmr->tmr_wait, atomic_read(&tmr->tmr_complete));
+
+ err = (se_cmd->se_tmr_req->response == TMR_FUNCTION_COMPLETE) ?
+ SUCCESS : FAILED;
+
+out:
+ if (tmr) {
+ transport_generic_free_cmd(&pending_req->se_cmd, 1);
+ kfree(tmr);
+ }
+
+ scsiback_do_resp_with_sense(NULL, err, 0, pending_req);
+
+ kmem_cache_free(scsiback_cachep, pending_req);
+}
+
+/*
+ Perform virtual to physical translation
+*/
+static struct v2p_entry *scsiback_do_translation(struct vscsibk_info *info,
+ struct ids_tuple *v)
+{
+ struct v2p_entry *entry;
+ struct list_head *head = &(info->v2p_entry_lists);
+ unsigned long flags;
+
+ spin_lock_irqsave(&info->v2p_lock, flags);
+ list_for_each_entry(entry, head, l) {
+ if ((entry->v.chn == v->chn) &&
+ (entry->v.tgt == v->tgt) &&
+ (entry->v.lun == v->lun)) {
+ kref_get(&entry->kref);
+ goto out;
+ }
+ }
+ entry = NULL;
+
+out:
+ spin_unlock_irqrestore(&info->v2p_lock, flags);
+ return entry;
+}
+
+static int prepare_pending_reqs(struct vscsibk_info *info,
+ struct vscsiif_request *ring_req,
+ struct vscsibk_pend *pending_req)
+{
+ struct v2p_entry *v2p;
+ struct ids_tuple vir;
+
+ pending_req->rqid = ring_req->rqid;
+ pending_req->info = info;
+
+ vir.chn = ring_req->channel;
+ vir.tgt = ring_req->id;
+ vir.lun = ring_req->lun;
+
+ v2p = scsiback_do_translation(info, &vir);
+ if (!v2p) {
+ pending_req->v2p = NULL;
+ DPRINTK("xen-pvscsi: doesn't exist.\n");
+ return -ENODEV;
+ }
+ pending_req->v2p = v2p;
+
+ /* request range check from frontend */
+ pending_req->sc_data_direction = ring_req->sc_data_direction;
+ if ((pending_req->sc_data_direction != DMA_BIDIRECTIONAL) &&
+ (pending_req->sc_data_direction != DMA_TO_DEVICE) &&
+ (pending_req->sc_data_direction != DMA_FROM_DEVICE) &&
+ (pending_req->sc_data_direction != DMA_NONE)) {
+ DPRINTK("xen-pvscsi: invalid parameter data_dir = %d\n",
+ pending_req->sc_data_direction);
+ return -EINVAL;
+ }
+
+ pending_req->cmd_len = ring_req->cmd_len;
+ if (pending_req->cmd_len > VSCSIIF_MAX_COMMAND_SIZE) {
+ DPRINTK("xen-pvscsi: invalid parameter cmd_len = %d\n",
+ pending_req->cmd_len);
+ return -EINVAL;
+ }
+ memcpy(pending_req->cmnd, ring_req->cmnd, pending_req->cmd_len);
+
+ return 0;
+}
+
+static int scsiback_do_cmd_fn(struct vscsibk_info *info)
+{
+ struct vscsiif_back_ring *ring = &info->ring;
+ struct vscsiif_request *ring_req;
+ struct vscsibk_pend *pending_req;
+ RING_IDX rc, rp;
+ int err, more_to_do;
+ uint32_t result;
+ uint8_t act;
+
+ rc = ring->req_cons;
+ rp = ring->sring->req_prod;
+ rmb(); /* guest system is accessing ring, too */
+
+ if (RING_REQUEST_PROD_OVERFLOW(ring, rp)) {
+ rc = ring->rsp_prod_pvt;
+ pr_warn("xen-pvscsi: Dom%d provided bogus ring requests (%#x - %#x = %u). Halting ring processing\n",
+ info->domid, rp, rc, rp - rc);
+ info->ring_error = 1;
+ return 0;
+ }
+
+ while ((rc != rp)) {
+ if (RING_REQUEST_CONS_OVERFLOW(ring, rc))
+ break;
+ pending_req = kmem_cache_alloc(scsiback_cachep, GFP_KERNEL);
+ if (!pending_req)
+ return 1;
+
+ ring_req = RING_GET_REQUEST(ring, rc);
+ ring->req_cons = ++rc;
+
+ act = ring_req->act;
+ err = prepare_pending_reqs(info, ring_req, pending_req);
+ if (err) {
+ switch (err) {
+ case -ENODEV:
+ result = DID_NO_CONNECT;
+ break;
+ default:
+ result = DRIVER_ERROR;
+ break;
+ }
+ scsiback_do_resp_with_sense(NULL, result << 24, 0,
+ pending_req);
+ kmem_cache_free(scsiback_cachep, pending_req);
+ return 1;
+ }
+
+ switch (act) {
+ case VSCSIIF_ACT_SCSI_CDB:
+ if (scsiback_gnttab_data_map(ring_req, pending_req)) {
+ scsiback_fast_flush_area(pending_req);
+ scsiback_do_resp_with_sense(NULL,
+ DRIVER_ERROR << 24, 0, pending_req);
+ kmem_cache_free(scsiback_cachep, pending_req);
+ } else {
+ scsiback_cmd_exec(pending_req);
+ }
+ break;
+ case VSCSIIF_ACT_SCSI_ABORT:
+ scsiback_device_action(pending_req, TMR_ABORT_TASK,
+ ring_req->ref_rqid);
+ break;
+ case VSCSIIF_ACT_SCSI_RESET:
+ scsiback_device_action(pending_req, TMR_LUN_RESET, 0);
+ break;
+ default:
+ pr_err_ratelimited("xen-pvscsi: invalid request\n");
+ scsiback_do_resp_with_sense(NULL, DRIVER_ERROR << 24,
+ 0, pending_req);
+ kmem_cache_free(scsiback_cachep, pending_req);
+ break;
+ }
+
+ /* Yield point for this unbounded loop. */
+ cond_resched();
+ }
+
+ RING_FINAL_CHECK_FOR_REQUESTS(&info->ring, more_to_do);
+ return more_to_do;
+}
+
+static irqreturn_t scsiback_irq_fn(int irq, void *dev_id)
+{
+ struct vscsibk_info *info = dev_id;
+
+ if (info->ring_error)
+ return IRQ_HANDLED;
+
+ while (scsiback_do_cmd_fn(info))
+ cond_resched();
+
+ return IRQ_HANDLED;
+}
+
+static int scsiback_init_sring(struct vscsibk_info *info, grant_ref_t ring_ref,
+ evtchn_port_t evtchn)
+{
+ void *area;
+ struct vscsiif_sring *sring;
+ int err;
+
+ if (info->irq)
+ return -1;
+
+ err = xenbus_map_ring_valloc(info->dev, ring_ref, &area);
+ if (err)
+ return err;
+
+ sring = (struct vscsiif_sring *)area;
+ BACK_RING_INIT(&info->ring, sring, PAGE_SIZE);
+
+ err = bind_interdomain_evtchn_to_irq(info->domid, evtchn);
+ if (err < 0)
+ goto unmap_page;
+
+ info->irq = err;
+
+ err = request_threaded_irq(info->irq, NULL, scsiback_irq_fn,
+ IRQF_ONESHOT, "vscsiif-backend", info);
+ if (err)
+ goto free_irq;
+
+ return 0;
+
+free_irq:
+ unbind_from_irqhandler(info->irq, info);
+ info->irq = 0;
+unmap_page:
+ xenbus_unmap_ring_vfree(info->dev, area);
+
+ return err;
+}
+
+static int scsiback_map(struct vscsibk_info *info)
+{
+ struct xenbus_device *dev = info->dev;
+ unsigned int ring_ref, evtchn;
+ int err;
+
+ err = xenbus_gather(XBT_NIL, dev->otherend,
+ "ring-ref", "%u", &ring_ref,
+ "event-channel", "%u", &evtchn, NULL);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "reading %s ring", dev->otherend);
+ return err;
+ }
+
+ return scsiback_init_sring(info, ring_ref, evtchn);
+}
+
+/*
+ Add a new translation entry
+*/
+static int scsiback_add_translation_entry(struct vscsibk_info *info,
+ char *phy, struct ids_tuple *v)
+{
+ int err = 0;
+ struct v2p_entry *entry;
+ struct v2p_entry *new;
+ struct list_head *head = &(info->v2p_entry_lists);
+ unsigned long flags;
+ char *lunp;
+ unsigned int lun;
+ struct scsiback_tpg *tpg_entry, *tpg = NULL;
+ char *error = "doesn't exist";
+
+ lunp = strrchr(phy, ':');
+ if (!lunp) {
+ pr_err("xen-pvscsi: illegal format of physical device %s\n",
+ phy);
+ return -EINVAL;
+ }
+ *lunp = 0;
+ lunp++;
+ if (kstrtouint(lunp, 10, &lun) || lun >= TRANSPORT_MAX_LUNS_PER_TPG) {
+ pr_err("xen-pvscsi: lun number not valid: %s\n", lunp);
+ return -EINVAL;
+ }
+
+ mutex_lock(&scsiback_mutex);
+ list_for_each_entry(tpg_entry, &scsiback_list, tv_tpg_list) {
+ if (!strcmp(phy, tpg_entry->tport->tport_name) ||
+ !strcmp(phy, tpg_entry->param_alias)) {
+ spin_lock(&tpg_entry->se_tpg.tpg_lun_lock);
+ if (tpg_entry->se_tpg.tpg_lun_list[lun]->lun_status ==
+ TRANSPORT_LUN_STATUS_ACTIVE) {
+ if (!tpg_entry->tpg_nexus)
+ error = "nexus undefined";
+ else
+ tpg = tpg_entry;
+ }
+ spin_unlock(&tpg_entry->se_tpg.tpg_lun_lock);
+ break;
+ }
+ }
+ if (tpg) {
+ mutex_lock(&tpg->tv_tpg_mutex);
+ tpg->tv_tpg_fe_count++;
+ mutex_unlock(&tpg->tv_tpg_mutex);
+ }
+ mutex_unlock(&scsiback_mutex);
+
+ if (!tpg) {
+ pr_err("xen-pvscsi: %s:%d %s\n", phy, lun, error);
+ return -ENODEV;
+ }
+
+ new = kmalloc(sizeof(struct v2p_entry), GFP_KERNEL);
+ if (new == NULL) {
+ err = -ENOMEM;
+ goto out_free;
+ }
+
+ spin_lock_irqsave(&info->v2p_lock, flags);
+
+ /* Check double assignment to identical virtual ID */
+ list_for_each_entry(entry, head, l) {
+ if ((entry->v.chn == v->chn) &&
+ (entry->v.tgt == v->tgt) &&
+ (entry->v.lun == v->lun)) {
+ pr_warn("xen-pvscsi: Virtual ID is already used. Assignment was not performed.\n");
+ err = -EEXIST;
+ goto out;
+ }
+
+ }
+
+ /* Create a new translation entry and add to the list */
+ kref_init(&new->kref);
+ new->v = *v;
+ new->tpg = tpg;
+ new->lun = lun;
+ list_add_tail(&new->l, head);
+
+out:
+ spin_unlock_irqrestore(&info->v2p_lock, flags);
+
+out_free:
+ mutex_lock(&tpg->tv_tpg_mutex);
+ tpg->tv_tpg_fe_count--;
+ mutex_unlock(&tpg->tv_tpg_mutex);
+
+ if (err)
+ kfree(new);
+
+ return err;
+}
+
+static void __scsiback_del_translation_entry(struct v2p_entry *entry)
+{
+ list_del(&entry->l);
+ kref_put(&entry->kref, scsiback_free_translation_entry);
+}
+
+/*
+ Delete the translation entry specfied
+*/
+static int scsiback_del_translation_entry(struct vscsibk_info *info,
+ struct ids_tuple *v)
+{
+ struct v2p_entry *entry;
+ struct list_head *head = &(info->v2p_entry_lists);
+ unsigned long flags;
+
+ spin_lock_irqsave(&info->v2p_lock, flags);
+ /* Find out the translation entry specified */
+ list_for_each_entry(entry, head, l) {
+ if ((entry->v.chn == v->chn) &&
+ (entry->v.tgt == v->tgt) &&
+ (entry->v.lun == v->lun)) {
+ goto found;
+ }
+ }
+
+ spin_unlock_irqrestore(&info->v2p_lock, flags);
+ return 1;
+
+found:
+ /* Delete the translation entry specfied */
+ __scsiback_del_translation_entry(entry);
+
+ spin_unlock_irqrestore(&info->v2p_lock, flags);
+ return 0;
+}
+
+static void scsiback_do_add_lun(struct vscsibk_info *info, const char *state,
+ char *phy, struct ids_tuple *vir)
+{
+ if (!scsiback_add_translation_entry(info, phy, vir)) {
+ if (xenbus_printf(XBT_NIL, info->dev->nodename, state,
+ "%d", XenbusStateInitialised)) {
+ pr_err("xen-pvscsi: xenbus_printf error %s\n", state);
+ scsiback_del_translation_entry(info, vir);
+ }
+ } else {
+ xenbus_printf(XBT_NIL, info->dev->nodename, state,
+ "%d", XenbusStateClosed);
+ }
+}
+
+static void scsiback_do_del_lun(struct vscsibk_info *info, const char *state,
+ struct ids_tuple *vir)
+{
+ if (!scsiback_del_translation_entry(info, vir)) {
+ if (xenbus_printf(XBT_NIL, info->dev->nodename, state,
+ "%d", XenbusStateClosed))
+ pr_err("xen-pvscsi: xenbus_printf error %s\n", state);
+ }
+}
+
+#define VSCSIBACK_OP_ADD_OR_DEL_LUN 1
+#define VSCSIBACK_OP_UPDATEDEV_STATE 2
+
+static void scsiback_do_1lun_hotplug(struct vscsibk_info *info, int op,
+ char *ent)
+{
+ int err;
+ struct ids_tuple vir;
+ char *val;
+ int device_state;
+ char phy[VSCSI_NAMELEN];
+ char str[64];
+ char state[64];
+ struct xenbus_device *dev = info->dev;
+
+ /* read status */
+ snprintf(state, sizeof(state), "vscsi-devs/%s/state", ent);
+ err = xenbus_scanf(XBT_NIL, dev->nodename, state, "%u", &device_state);
+ if (XENBUS_EXIST_ERR(err))
+ return;
+
+ /* physical SCSI device */
+ snprintf(str, sizeof(str), "vscsi-devs/%s/p-dev", ent);
+ val = xenbus_read(XBT_NIL, dev->nodename, str, NULL);
+ if (IS_ERR(val)) {
+ xenbus_printf(XBT_NIL, dev->nodename, state,
+ "%d", XenbusStateClosed);
+ return;
+ }
+ strlcpy(phy, val, VSCSI_NAMELEN);
+ kfree(val);
+
+ /* virtual SCSI device */
+ snprintf(str, sizeof(str), "vscsi-devs/%s/v-dev", ent);
+ err = xenbus_scanf(XBT_NIL, dev->nodename, str, "%u:%u:%u:%u",
+ &vir.hst, &vir.chn, &vir.tgt, &vir.lun);
+ if (XENBUS_EXIST_ERR(err)) {
+ xenbus_printf(XBT_NIL, dev->nodename, state,
+ "%d", XenbusStateClosed);
+ return;
+ }
+
+ switch (op) {
+ case VSCSIBACK_OP_ADD_OR_DEL_LUN:
+ if (device_state == XenbusStateInitialising)
+ scsiback_do_add_lun(info, state, phy, &vir);
+ if (device_state == XenbusStateClosing)
+ scsiback_do_del_lun(info, state, &vir);
+ break;
+
+ case VSCSIBACK_OP_UPDATEDEV_STATE:
+ if (device_state == XenbusStateInitialised) {
+ /* modify vscsi-devs/dev-x/state */
+ if (xenbus_printf(XBT_NIL, dev->nodename, state,
+ "%d", XenbusStateConnected)) {
+ pr_err("xen-pvscsi: xenbus_printf error %s\n",
+ str);
+ scsiback_del_translation_entry(info, &vir);
+ xenbus_printf(XBT_NIL, dev->nodename, state,
+ "%d", XenbusStateClosed);
+ }
+ }
+ break;
+ /*When it is necessary, processing is added here.*/
+ default:
+ break;
+ }
+}
+
+static void scsiback_do_lun_hotplug(struct vscsibk_info *info, int op)
+{
+ int i;
+ char **dir;
+ unsigned int ndir = 0;
+
+ dir = xenbus_directory(XBT_NIL, info->dev->nodename, "vscsi-devs",
+ &ndir);
+ if (IS_ERR(dir))
+ return;
+
+ for (i = 0; i < ndir; i++)
+ scsiback_do_1lun_hotplug(info, op, dir[i]);
+
+ kfree(dir);
+}
+
+static void scsiback_frontend_changed(struct xenbus_device *dev,
+ enum xenbus_state frontend_state)
+{
+ struct vscsibk_info *info = dev_get_drvdata(&dev->dev);
+
+ switch (frontend_state) {
+ case XenbusStateInitialising:
+ break;
+
+ case XenbusStateInitialised:
+ if (scsiback_map(info))
+ break;
+
+ scsiback_do_lun_hotplug(info, VSCSIBACK_OP_ADD_OR_DEL_LUN);
+ xenbus_switch_state(dev, XenbusStateConnected);
+ break;
+
+ case XenbusStateConnected:
+ scsiback_do_lun_hotplug(info, VSCSIBACK_OP_UPDATEDEV_STATE);
+
+ if (dev->state == XenbusStateConnected)
+ break;
+
+ xenbus_switch_state(dev, XenbusStateConnected);
+ break;
+
+ case XenbusStateClosing:
+ if (info->irq)
+ scsiback_disconnect(info);
+
+ xenbus_switch_state(dev, XenbusStateClosing);
+ break;
+
+ case XenbusStateClosed:
+ xenbus_switch_state(dev, XenbusStateClosed);
+ if (xenbus_dev_is_online(dev))
+ break;
+ /* fall through if not online */
+ case XenbusStateUnknown:
+ device_unregister(&dev->dev);
+ break;
+
+ case XenbusStateReconfiguring:
+ scsiback_do_lun_hotplug(info, VSCSIBACK_OP_ADD_OR_DEL_LUN);
+ xenbus_switch_state(dev, XenbusStateReconfigured);
+
+ break;
+
+ default:
+ xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
+ frontend_state);
+ break;
+ }
+}
+
+/*
+ Release the translation entry specfied
+*/
+static void scsiback_release_translation_entry(struct vscsibk_info *info)
+{
+ struct v2p_entry *entry, *tmp;
+ struct list_head *head = &(info->v2p_entry_lists);
+ unsigned long flags;
+
+ spin_lock_irqsave(&info->v2p_lock, flags);
+
+ list_for_each_entry_safe(entry, tmp, head, l)
+ __scsiback_del_translation_entry(entry);
+
+ spin_unlock_irqrestore(&info->v2p_lock, flags);
+}
+
+static int scsiback_remove(struct xenbus_device *dev)
+{
+ struct vscsibk_info *info = dev_get_drvdata(&dev->dev);
+
+ if (info->irq)
+ scsiback_disconnect(info);
+
+ scsiback_release_translation_entry(info);
+
+ dev_set_drvdata(&dev->dev, NULL);
+
+ return 0;
+}
+
+static int scsiback_probe(struct xenbus_device *dev,
+ const struct xenbus_device_id *id)
+{
+ int err;
+
+ struct vscsibk_info *info = kzalloc(sizeof(struct vscsibk_info),
+ GFP_KERNEL);
+
+ DPRINTK("%p %d\n", dev, dev->otherend_id);
+
+ if (!info) {
+ xenbus_dev_fatal(dev, -ENOMEM, "allocating backend structure");
+ return -ENOMEM;
+ }
+ info->dev = dev;
+ dev_set_drvdata(&dev->dev, info);
+
+ info->domid = dev->otherend_id;
+ spin_lock_init(&info->ring_lock);
+ info->ring_error = 0;
+ atomic_set(&info->nr_unreplied_reqs, 0);
+ init_waitqueue_head(&info->waiting_to_free);
+ info->dev = dev;
+ info->irq = 0;
+ INIT_LIST_HEAD(&info->v2p_entry_lists);
+ spin_lock_init(&info->v2p_lock);
+
+ err = xenbus_printf(XBT_NIL, dev->nodename, "feature-sg-grant", "%u",
+ SG_ALL);
+ if (err)
+ xenbus_dev_error(dev, err, "writing feature-sg-grant");
+
+ err = xenbus_switch_state(dev, XenbusStateInitWait);
+ if (err)
+ goto fail;
+
+ return 0;
+
+fail:
+ pr_warn("xen-pvscsi: %s failed\n", __func__);
+ scsiback_remove(dev);
+
+ return err;
+}
+
+static char *scsiback_dump_proto_id(struct scsiback_tport *tport)
+{
+ switch (tport->tport_proto_id) {
+ case SCSI_PROTOCOL_SAS:
+ return "SAS";
+ case SCSI_PROTOCOL_FCP:
+ return "FCP";
+ case SCSI_PROTOCOL_ISCSI:
+ return "iSCSI";
+ default:
+ break;
+ }
+
+ return "Unknown";
+}
+
+static u8 scsiback_get_fabric_proto_ident(struct se_portal_group *se_tpg)
+{
+ struct scsiback_tpg *tpg = container_of(se_tpg,
+ struct scsiback_tpg, se_tpg);
+ struct scsiback_tport *tport = tpg->tport;
+
+ switch (tport->tport_proto_id) {
+ case SCSI_PROTOCOL_SAS:
+ return sas_get_fabric_proto_ident(se_tpg);
+ case SCSI_PROTOCOL_FCP:
+ return fc_get_fabric_proto_ident(se_tpg);
+ case SCSI_PROTOCOL_ISCSI:
+ return iscsi_get_fabric_proto_ident(se_tpg);
+ default:
+ pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n",
+ tport->tport_proto_id);
+ break;
+ }
+
+ return sas_get_fabric_proto_ident(se_tpg);
+}
+
+static char *scsiback_get_fabric_wwn(struct se_portal_group *se_tpg)
+{
+ struct scsiback_tpg *tpg = container_of(se_tpg,
+ struct scsiback_tpg, se_tpg);
+ struct scsiback_tport *tport = tpg->tport;
+
+ return &tport->tport_name[0];
+}
+
+static u16 scsiback_get_tag(struct se_portal_group *se_tpg)
+{
+ struct scsiback_tpg *tpg = container_of(se_tpg,
+ struct scsiback_tpg, se_tpg);
+ return tpg->tport_tpgt;
+}
+
+static u32 scsiback_get_default_depth(struct se_portal_group *se_tpg)
+{
+ return 1;
+}
+
+static u32
+scsiback_get_pr_transport_id(struct se_portal_group *se_tpg,
+ struct se_node_acl *se_nacl,
+ struct t10_pr_registration *pr_reg,
+ int *format_code,
+ unsigned char *buf)
+{
+ struct scsiback_tpg *tpg = container_of(se_tpg,
+ struct scsiback_tpg, se_tpg);
+ struct scsiback_tport *tport = tpg->tport;
+
+ switch (tport->tport_proto_id) {
+ case SCSI_PROTOCOL_SAS:
+ return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg,
+ format_code, buf);
+ case SCSI_PROTOCOL_FCP:
+ return fc_get_pr_transport_id(se_tpg, se_nacl, pr_reg,
+ format_code, buf);
+ case SCSI_PROTOCOL_ISCSI:
+ return iscsi_get_pr_transport_id(se_tpg, se_nacl, pr_reg,
+ format_code, buf);
+ default:
+ pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n",
+ tport->tport_proto_id);
+ break;
+ }
+
+ return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg,
+ format_code, buf);
+}
+
+static u32
+scsiback_get_pr_transport_id_len(struct se_portal_group *se_tpg,
+ struct se_node_acl *se_nacl,
+ struct t10_pr_registration *pr_reg,
+ int *format_code)
+{
+ struct scsiback_tpg *tpg = container_of(se_tpg,
+ struct scsiback_tpg, se_tpg);
+ struct scsiback_tport *tport = tpg->tport;
+
+ switch (tport->tport_proto_id) {
+ case SCSI_PROTOCOL_SAS:
+ return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,
+ format_code);
+ case SCSI_PROTOCOL_FCP:
+ return fc_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,
+ format_code);
+ case SCSI_PROTOCOL_ISCSI:
+ return iscsi_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,
+ format_code);
+ default:
+ pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n",
+ tport->tport_proto_id);
+ break;
+ }
+
+ return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,
+ format_code);
+}
+
+static char *
+scsiback_parse_pr_out_transport_id(struct se_portal_group *se_tpg,
+ const char *buf,
+ u32 *out_tid_len,
+ char **port_nexus_ptr)
+{
+ struct scsiback_tpg *tpg = container_of(se_tpg,
+ struct scsiback_tpg, se_tpg);
+ struct scsiback_tport *tport = tpg->tport;
+
+ switch (tport->tport_proto_id) {
+ case SCSI_PROTOCOL_SAS:
+ return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,
+ port_nexus_ptr);
+ case SCSI_PROTOCOL_FCP:
+ return fc_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,
+ port_nexus_ptr);
+ case SCSI_PROTOCOL_ISCSI:
+ return iscsi_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,
+ port_nexus_ptr);
+ default:
+ pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n",
+ tport->tport_proto_id);
+ break;
+ }
+
+ return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,
+ port_nexus_ptr);
+}
+
+static struct se_wwn *
+scsiback_make_tport(struct target_fabric_configfs *tf,
+ struct config_group *group,
+ const char *name)
+{
+ struct scsiback_tport *tport;
+ char *ptr;
+ u64 wwpn = 0;
+ int off = 0;
+
+ tport = kzalloc(sizeof(struct scsiback_tport), GFP_KERNEL);
+ if (!tport)
+ return ERR_PTR(-ENOMEM);
+
+ tport->tport_wwpn = wwpn;
+ /*
+ * Determine the emulated Protocol Identifier and Target Port Name
+ * based on the incoming configfs directory name.
+ */
+ ptr = strstr(name, "naa.");
+ if (ptr) {
+ tport->tport_proto_id = SCSI_PROTOCOL_SAS;
+ goto check_len;
+ }
+ ptr = strstr(name, "fc.");
+ if (ptr) {
+ tport->tport_proto_id = SCSI_PROTOCOL_FCP;
+ off = 3; /* Skip over "fc." */
+ goto check_len;
+ }
+ ptr = strstr(name, "iqn.");
+ if (ptr) {
+ tport->tport_proto_id = SCSI_PROTOCOL_ISCSI;
+ goto check_len;
+ }
+
+ pr_err("Unable to locate prefix for emulated Target Port: %s\n", name);
+ kfree(tport);
+ return ERR_PTR(-EINVAL);
+
+check_len:
+ if (strlen(name) >= VSCSI_NAMELEN) {
+ pr_err("Emulated %s Address: %s, exceeds max: %d\n", name,
+ scsiback_dump_proto_id(tport), VSCSI_NAMELEN);
+ kfree(tport);
+ return ERR_PTR(-EINVAL);
+ }
+ snprintf(&tport->tport_name[0], VSCSI_NAMELEN, "%s", &name[off]);
+
+ pr_debug("xen-pvscsi: Allocated emulated Target %s Address: %s\n",
+ scsiback_dump_proto_id(tport), name);
+
+ return &tport->tport_wwn;
+}
+
+static void scsiback_drop_tport(struct se_wwn *wwn)
+{
+ struct scsiback_tport *tport = container_of(wwn,
+ struct scsiback_tport, tport_wwn);
+
+ pr_debug("xen-pvscsi: Deallocating emulated Target %s Address: %s\n",
+ scsiback_dump_proto_id(tport), tport->tport_name);
+
+ kfree(tport);
+}
+
+static struct se_node_acl *
+scsiback_alloc_fabric_acl(struct se_portal_group *se_tpg)
+{
+ return kzalloc(sizeof(struct se_node_acl), GFP_KERNEL);
+}
+
+static void
+scsiback_release_fabric_acl(struct se_portal_group *se_tpg,
+ struct se_node_acl *se_nacl)
+{
+ kfree(se_nacl);
+}
+
+static u32 scsiback_tpg_get_inst_index(struct se_portal_group *se_tpg)
+{
+ return 1;
+}
+
+static int scsiback_check_stop_free(struct se_cmd *se_cmd)
+{
+ /*
+ * Do not release struct se_cmd's containing a valid TMR
+ * pointer. These will be released directly in scsiback_device_action()
+ * with transport_generic_free_cmd().
+ */
+ if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)
+ return 0;
+
+ transport_generic_free_cmd(se_cmd, 0);
+ return 1;
+}
+
+static void scsiback_release_cmd(struct se_cmd *se_cmd)
+{
+ struct vscsibk_pend *pending_req = container_of(se_cmd,
+ struct vscsibk_pend, se_cmd);
+
+ kmem_cache_free(scsiback_cachep, pending_req);
+}
+
+static int scsiback_shutdown_session(struct se_session *se_sess)
+{
+ return 0;
+}
+
+static void scsiback_close_session(struct se_session *se_sess)
+{
+}
+
+static u32 scsiback_sess_get_index(struct se_session *se_sess)
+{
+ return 0;
+}
+
+static int scsiback_write_pending(struct se_cmd *se_cmd)
+{
+ /* Go ahead and process the write immediately */
+ target_execute_cmd(se_cmd);
+
+ return 0;
+}
+
+static int scsiback_write_pending_status(struct se_cmd *se_cmd)
+{
+ return 0;
+}
+
+static void scsiback_set_default_node_attrs(struct se_node_acl *nacl)
+{
+}
+
+static u32 scsiback_get_task_tag(struct se_cmd *se_cmd)
+{
+ struct vscsibk_pend *pending_req = container_of(se_cmd,
+ struct vscsibk_pend, se_cmd);
+
+ return pending_req->rqid;
+}
+
+static int scsiback_get_cmd_state(struct se_cmd *se_cmd)
+{
+ return 0;
+}
+
+static int scsiback_queue_data_in(struct se_cmd *se_cmd)
+{
+ struct vscsibk_pend *pending_req = container_of(se_cmd,
+ struct vscsibk_pend, se_cmd);
+
+ pending_req->result = SAM_STAT_GOOD;
+ scsiback_cmd_done(pending_req);
+ return 0;
+}
+
+static int scsiback_queue_status(struct se_cmd *se_cmd)
+{
+ struct vscsibk_pend *pending_req = container_of(se_cmd,
+ struct vscsibk_pend, se_cmd);
+
+ if (se_cmd->sense_buffer &&
+ ((se_cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) ||
+ (se_cmd->se_cmd_flags & SCF_EMULATED_TASK_SENSE)))
+ pending_req->result = (DRIVER_SENSE << 24) |
+ SAM_STAT_CHECK_CONDITION;
+ else
+ pending_req->result = se_cmd->scsi_status;
+
+ scsiback_cmd_done(pending_req);
+ return 0;
+}
+
+static void scsiback_queue_tm_rsp(struct se_cmd *se_cmd)
+{
+ struct se_tmr_req *se_tmr = se_cmd->se_tmr_req;
+ struct scsiback_tmr *tmr = se_tmr->fabric_tmr_ptr;
+
+ atomic_set(&tmr->tmr_complete, 1);
+ wake_up(&tmr->tmr_wait);
+}
+
+static void scsiback_aborted_task(struct se_cmd *se_cmd)
+{
+}
+
+static ssize_t scsiback_tpg_param_show_alias(struct se_portal_group *se_tpg,
+ char *page)
+{
+ struct scsiback_tpg *tpg = container_of(se_tpg, struct scsiback_tpg,
+ se_tpg);
+ ssize_t rb;
+
+ mutex_lock(&tpg->tv_tpg_mutex);
+ rb = snprintf(page, PAGE_SIZE, "%s\n", tpg->param_alias);
+ mutex_unlock(&tpg->tv_tpg_mutex);
+
+ return rb;
+}
+
+static ssize_t scsiback_tpg_param_store_alias(struct se_portal_group *se_tpg,
+ const char *page, size_t count)
+{
+ struct scsiback_tpg *tpg = container_of(se_tpg, struct scsiback_tpg,
+ se_tpg);
+ int len;
+
+ if (strlen(page) >= VSCSI_NAMELEN) {
+ pr_err("param alias: %s, exceeds max: %d\n", page,
+ VSCSI_NAMELEN);
+ return -EINVAL;
+ }
+
+ mutex_lock(&tpg->tv_tpg_mutex);
+ len = snprintf(tpg->param_alias, VSCSI_NAMELEN, "%s", page);
+ if (tpg->param_alias[len - 1] == '\n')
+ tpg->param_alias[len - 1] = '\0';
+ mutex_unlock(&tpg->tv_tpg_mutex);
+
+ return count;
+}
+
+TF_TPG_PARAM_ATTR(scsiback, alias, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *scsiback_param_attrs[] = {
+ &scsiback_tpg_param_alias.attr,
+ NULL,
+};
+
+static int scsiback_make_nexus(struct scsiback_tpg *tpg,
+ const char *name)
+{
+ struct se_portal_group *se_tpg;
+ struct se_session *se_sess;
+ struct scsiback_nexus *tv_nexus;
+
+ mutex_lock(&tpg->tv_tpg_mutex);
+ if (tpg->tpg_nexus) {
+ mutex_unlock(&tpg->tv_tpg_mutex);
+ pr_debug("tpg->tpg_nexus already exists\n");
+ return -EEXIST;
+ }
+ se_tpg = &tpg->se_tpg;
+
+ tv_nexus = kzalloc(sizeof(struct scsiback_nexus), GFP_KERNEL);
+ if (!tv_nexus) {
+ mutex_unlock(&tpg->tv_tpg_mutex);
+ return -ENOMEM;
+ }
+ /*
+ * Initialize the struct se_session pointer
+ */
+ tv_nexus->tvn_se_sess = transport_init_session(TARGET_PROT_NORMAL);
+ if (IS_ERR(tv_nexus->tvn_se_sess)) {
+ mutex_unlock(&tpg->tv_tpg_mutex);
+ kfree(tv_nexus);
+ return -ENOMEM;
+ }
+ se_sess = tv_nexus->tvn_se_sess;
+ /*
+ * Since we are running in 'demo mode' this call with generate a
+ * struct se_node_acl for the scsiback struct se_portal_group with
+ * the SCSI Initiator port name of the passed configfs group 'name'.
+ */
+ tv_nexus->tvn_se_sess->se_node_acl = core_tpg_check_initiator_node_acl(
+ se_tpg, (unsigned char *)name);
+ if (!tv_nexus->tvn_se_sess->se_node_acl) {
+ mutex_unlock(&tpg->tv_tpg_mutex);
+ pr_debug("core_tpg_check_initiator_node_acl() failed for %s\n",
+ name);
+ goto out;
+ }
+ /*
+ * Now register the TCM pvscsi virtual I_T Nexus as active with the
+ * call to __transport_register_session()
+ */
+ __transport_register_session(se_tpg, tv_nexus->tvn_se_sess->se_node_acl,
+ tv_nexus->tvn_se_sess, tv_nexus);
+ tpg->tpg_nexus = tv_nexus;
+
+ mutex_unlock(&tpg->tv_tpg_mutex);
+ return 0;
+
+out:
+ transport_free_session(se_sess);
+ kfree(tv_nexus);
+ return -ENOMEM;
+}
+
+static int scsiback_drop_nexus(struct scsiback_tpg *tpg)
+{
+ struct se_session *se_sess;
+ struct scsiback_nexus *tv_nexus;
+
+ mutex_lock(&tpg->tv_tpg_mutex);
+ tv_nexus = tpg->tpg_nexus;
+ if (!tv_nexus) {
+ mutex_unlock(&tpg->tv_tpg_mutex);
+ return -ENODEV;
+ }
+
+ se_sess = tv_nexus->tvn_se_sess;
+ if (!se_sess) {
+ mutex_unlock(&tpg->tv_tpg_mutex);
+ return -ENODEV;
+ }
+
+ if (tpg->tv_tpg_port_count != 0) {
+ mutex_unlock(&tpg->tv_tpg_mutex);
+ pr_err("Unable to remove xen-pvscsi I_T Nexus with active TPG port count: %d\n",
+ tpg->tv_tpg_port_count);
+ return -EBUSY;
+ }
+
+ if (tpg->tv_tpg_fe_count != 0) {
+ mutex_unlock(&tpg->tv_tpg_mutex);
+ pr_err("Unable to remove xen-pvscsi I_T Nexus with active TPG frontend count: %d\n",
+ tpg->tv_tpg_fe_count);
+ return -EBUSY;
+ }
+
+ pr_debug("xen-pvscsi: Removing I_T Nexus to emulated %s Initiator Port: %s\n",
+ scsiback_dump_proto_id(tpg->tport),
+ tv_nexus->tvn_se_sess->se_node_acl->initiatorname);
+
+ /*
+ * Release the SCSI I_T Nexus to the emulated xen-pvscsi Target Port
+ */
+ transport_deregister_session(tv_nexus->tvn_se_sess);
+ tpg->tpg_nexus = NULL;
+ mutex_unlock(&tpg->tv_tpg_mutex);
+
+ kfree(tv_nexus);
+ return 0;
+}
+
+static ssize_t scsiback_tpg_show_nexus(struct se_portal_group *se_tpg,
+ char *page)
+{
+ struct scsiback_tpg *tpg = container_of(se_tpg,
+ struct scsiback_tpg, se_tpg);
+ struct scsiback_nexus *tv_nexus;
+ ssize_t ret;
+
+ mutex_lock(&tpg->tv_tpg_mutex);
+ tv_nexus = tpg->tpg_nexus;
+ if (!tv_nexus) {
+ mutex_unlock(&tpg->tv_tpg_mutex);
+ return -ENODEV;
+ }
+ ret = snprintf(page, PAGE_SIZE, "%s\n",
+ tv_nexus->tvn_se_sess->se_node_acl->initiatorname);
+ mutex_unlock(&tpg->tv_tpg_mutex);
+
+ return ret;
+}
+
+static ssize_t scsiback_tpg_store_nexus(struct se_portal_group *se_tpg,
+ const char *page,
+ size_t count)
+{
+ struct scsiback_tpg *tpg = container_of(se_tpg,
+ struct scsiback_tpg, se_tpg);
+ struct scsiback_tport *tport_wwn = tpg->tport;
+ unsigned char i_port[VSCSI_NAMELEN], *ptr, *port_ptr;
+ int ret;
+ /*
+ * Shutdown the active I_T nexus if 'NULL' is passed..
+ */
+ if (!strncmp(page, "NULL", 4)) {
+ ret = scsiback_drop_nexus(tpg);
+ return (!ret) ? count : ret;
+ }
+ /*
+ * Otherwise make sure the passed virtual Initiator port WWN matches
+ * the fabric protocol_id set in scsiback_make_tport(), and call
+ * scsiback_make_nexus().
+ */
+ if (strlen(page) >= VSCSI_NAMELEN) {
+ pr_err("Emulated NAA Sas Address: %s, exceeds max: %d\n",
+ page, VSCSI_NAMELEN);
+ return -EINVAL;
+ }
+ snprintf(&i_port[0], VSCSI_NAMELEN, "%s", page);
+
+ ptr = strstr(i_port, "naa.");
+ if (ptr) {
+ if (tport_wwn->tport_proto_id != SCSI_PROTOCOL_SAS) {
+ pr_err("Passed SAS Initiator Port %s does not match target port protoid: %s\n",
+ i_port, scsiback_dump_proto_id(tport_wwn));
+ return -EINVAL;
+ }
+ port_ptr = &i_port[0];
+ goto check_newline;
+ }
+ ptr = strstr(i_port, "fc.");
+ if (ptr) {
+ if (tport_wwn->tport_proto_id != SCSI_PROTOCOL_FCP) {
+ pr_err("Passed FCP Initiator Port %s does not match target port protoid: %s\n",
+ i_port, scsiback_dump_proto_id(tport_wwn));
+ return -EINVAL;
+ }
+ port_ptr = &i_port[3]; /* Skip over "fc." */
+ goto check_newline;
+ }
+ ptr = strstr(i_port, "iqn.");
+ if (ptr) {
+ if (tport_wwn->tport_proto_id != SCSI_PROTOCOL_ISCSI) {
+ pr_err("Passed iSCSI Initiator Port %s does not match target port protoid: %s\n",
+ i_port, scsiback_dump_proto_id(tport_wwn));
+ return -EINVAL;
+ }
+ port_ptr = &i_port[0];
+ goto check_newline;
+ }
+ pr_err("Unable to locate prefix for emulated Initiator Port: %s\n",
+ i_port);
+ return -EINVAL;
+ /*
+ * Clear any trailing newline for the NAA WWN
+ */
+check_newline:
+ if (i_port[strlen(i_port) - 1] == '\n')
+ i_port[strlen(i_port) - 1] = '\0';
+
+ ret = scsiback_make_nexus(tpg, port_ptr);
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+
+TF_TPG_BASE_ATTR(scsiback, nexus, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *scsiback_tpg_attrs[] = {
+ &scsiback_tpg_nexus.attr,
+ NULL,
+};
+
+static ssize_t
+scsiback_wwn_show_attr_version(struct target_fabric_configfs *tf,
+ char *page)
+{
+ return sprintf(page, "xen-pvscsi fabric module %s on %s/%s on "
+ UTS_RELEASE"\n",
+ VSCSI_VERSION, utsname()->sysname, utsname()->machine);
+}
+
+TF_WWN_ATTR_RO(scsiback, version);
+
+static struct configfs_attribute *scsiback_wwn_attrs[] = {
+ &scsiback_wwn_version.attr,
+ NULL,
+};
+
+static char *scsiback_get_fabric_name(void)
+{
+ return "xen-pvscsi";
+}
+
+static int scsiback_port_link(struct se_portal_group *se_tpg,
+ struct se_lun *lun)
+{
+ struct scsiback_tpg *tpg = container_of(se_tpg,
+ struct scsiback_tpg, se_tpg);
+
+ mutex_lock(&tpg->tv_tpg_mutex);
+ tpg->tv_tpg_port_count++;
+ mutex_unlock(&tpg->tv_tpg_mutex);
+
+ return 0;
+}
+
+static void scsiback_port_unlink(struct se_portal_group *se_tpg,
+ struct se_lun *lun)
+{
+ struct scsiback_tpg *tpg = container_of(se_tpg,
+ struct scsiback_tpg, se_tpg);
+
+ mutex_lock(&tpg->tv_tpg_mutex);
+ tpg->tv_tpg_port_count--;
+ mutex_unlock(&tpg->tv_tpg_mutex);
+}
+
+static struct se_portal_group *
+scsiback_make_tpg(struct se_wwn *wwn,
+ struct config_group *group,
+ const char *name)
+{
+ struct scsiback_tport *tport = container_of(wwn,
+ struct scsiback_tport, tport_wwn);
+
+ struct scsiback_tpg *tpg;
+ u16 tpgt;
+ int ret;
+
+ if (strstr(name, "tpgt_") != name)
+ return ERR_PTR(-EINVAL);
+ ret = kstrtou16(name + 5, 10, &tpgt);
+ if (ret)
+ return ERR_PTR(ret);
+
+ tpg = kzalloc(sizeof(struct scsiback_tpg), GFP_KERNEL);
+ if (!tpg)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&tpg->tv_tpg_mutex);
+ INIT_LIST_HEAD(&tpg->tv_tpg_list);
+ INIT_LIST_HEAD(&tpg->info_list);
+ tpg->tport = tport;
+ tpg->tport_tpgt = tpgt;
+
+ ret = core_tpg_register(&scsiback_fabric_configfs->tf_ops, wwn,
+ &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL);
+ if (ret < 0) {
+ kfree(tpg);
+ return NULL;
+ }
+ mutex_lock(&scsiback_mutex);
+ list_add_tail(&tpg->tv_tpg_list, &scsiback_list);
+ mutex_unlock(&scsiback_mutex);
+
+ return &tpg->se_tpg;
+}
+
+static void scsiback_drop_tpg(struct se_portal_group *se_tpg)
+{
+ struct scsiback_tpg *tpg = container_of(se_tpg,
+ struct scsiback_tpg, se_tpg);
+
+ mutex_lock(&scsiback_mutex);
+ list_del(&tpg->tv_tpg_list);
+ mutex_unlock(&scsiback_mutex);
+ /*
+ * Release the virtual I_T Nexus for this xen-pvscsi TPG
+ */
+ scsiback_drop_nexus(tpg);
+ /*
+ * Deregister the se_tpg from TCM..
+ */
+ core_tpg_deregister(se_tpg);
+ kfree(tpg);
+}
+
+static int scsiback_check_true(struct se_portal_group *se_tpg)
+{
+ return 1;
+}
+
+static int scsiback_check_false(struct se_portal_group *se_tpg)
+{
+ return 0;
+}
+
+static struct target_core_fabric_ops scsiback_ops = {
+ .get_fabric_name = scsiback_get_fabric_name,
+ .get_fabric_proto_ident = scsiback_get_fabric_proto_ident,
+ .tpg_get_wwn = scsiback_get_fabric_wwn,
+ .tpg_get_tag = scsiback_get_tag,
+ .tpg_get_default_depth = scsiback_get_default_depth,
+ .tpg_get_pr_transport_id = scsiback_get_pr_transport_id,
+ .tpg_get_pr_transport_id_len = scsiback_get_pr_transport_id_len,
+ .tpg_parse_pr_out_transport_id = scsiback_parse_pr_out_transport_id,
+ .tpg_check_demo_mode = scsiback_check_true,
+ .tpg_check_demo_mode_cache = scsiback_check_true,
+ .tpg_check_demo_mode_write_protect = scsiback_check_false,
+ .tpg_check_prod_mode_write_protect = scsiback_check_false,
+ .tpg_alloc_fabric_acl = scsiback_alloc_fabric_acl,
+ .tpg_release_fabric_acl = scsiback_release_fabric_acl,
+ .tpg_get_inst_index = scsiback_tpg_get_inst_index,
+ .check_stop_free = scsiback_check_stop_free,
+ .release_cmd = scsiback_release_cmd,
+ .put_session = NULL,
+ .shutdown_session = scsiback_shutdown_session,
+ .close_session = scsiback_close_session,
+ .sess_get_index = scsiback_sess_get_index,
+ .sess_get_initiator_sid = NULL,
+ .write_pending = scsiback_write_pending,
+ .write_pending_status = scsiback_write_pending_status,
+ .set_default_node_attributes = scsiback_set_default_node_attrs,
+ .get_task_tag = scsiback_get_task_tag,
+ .get_cmd_state = scsiback_get_cmd_state,
+ .queue_data_in = scsiback_queue_data_in,
+ .queue_status = scsiback_queue_status,
+ .queue_tm_rsp = scsiback_queue_tm_rsp,
+ .aborted_task = scsiback_aborted_task,
+ /*
+ * Setup callers for generic logic in target_core_fabric_configfs.c
+ */
+ .fabric_make_wwn = scsiback_make_tport,
+ .fabric_drop_wwn = scsiback_drop_tport,
+ .fabric_make_tpg = scsiback_make_tpg,
+ .fabric_drop_tpg = scsiback_drop_tpg,
+ .fabric_post_link = scsiback_port_link,
+ .fabric_pre_unlink = scsiback_port_unlink,
+ .fabric_make_np = NULL,
+ .fabric_drop_np = NULL,
+#if 0
+ .fabric_make_nodeacl = scsiback_make_nodeacl,
+ .fabric_drop_nodeacl = scsiback_drop_nodeacl,
+#endif
+};
+
+static int scsiback_register_configfs(void)
+{
+ struct target_fabric_configfs *fabric;
+ int ret;
+
+ pr_debug("xen-pvscsi: fabric module %s on %s/%s on "UTS_RELEASE"\n",
+ VSCSI_VERSION, utsname()->sysname, utsname()->machine);
+ /*
+ * Register the top level struct config_item_type with TCM core
+ */
+ fabric = target_fabric_configfs_init(THIS_MODULE, "xen-pvscsi");
+ if (IS_ERR(fabric))
+ return PTR_ERR(fabric);
+
+ /*
+ * Setup fabric->tf_ops from our local scsiback_ops
+ */
+ fabric->tf_ops = scsiback_ops;
+ /*
+ * Setup default attribute lists for various fabric->tf_cit_tmpl
+ */
+ fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = scsiback_wwn_attrs;
+ fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = scsiback_tpg_attrs;
+ fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL;
+ fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = scsiback_param_attrs;
+ fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
+ fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;
+ fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
+ fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
+ fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;
+ /*
+ * Register the fabric for use within TCM
+ */
+ ret = target_fabric_configfs_register(fabric);
+ if (ret < 0) {
+ target_fabric_configfs_free(fabric);
+ return ret;
+ }
+ /*
+ * Setup our local pointer to *fabric
+ */
+ scsiback_fabric_configfs = fabric;
+ pr_debug("xen-pvscsi: Set fabric -> scsiback_fabric_configfs\n");
+ return 0;
+};
+
+static void scsiback_deregister_configfs(void)
+{
+ if (!scsiback_fabric_configfs)
+ return;
+
+ target_fabric_configfs_deregister(scsiback_fabric_configfs);
+ scsiback_fabric_configfs = NULL;
+ pr_debug("xen-pvscsi: Cleared scsiback_fabric_configfs\n");
+};
+
+static const struct xenbus_device_id scsiback_ids[] = {
+ { "vscsi" },
+ { "" }
+};
+
+static struct xenbus_driver scsiback_driver = {
+ .ids = scsiback_ids,
+ .probe = scsiback_probe,
+ .remove = scsiback_remove,
+ .otherend_changed = scsiback_frontend_changed
+};
+
+static void scsiback_init_pend(void *p)
+{
+ struct vscsibk_pend *pend = p;
+ int i;
+
+ memset(pend, 0, sizeof(*pend));
+ for (i = 0; i < VSCSI_MAX_GRANTS; i++)
+ pend->grant_handles[i] = SCSIBACK_INVALID_HANDLE;
+}
+
+static int __init scsiback_init(void)
+{
+ int ret;
+
+ if (!xen_domain())
+ return -ENODEV;
+
+ scsiback_cachep = kmem_cache_create("vscsiif_cache",
+ sizeof(struct vscsibk_pend), 0, 0, scsiback_init_pend);
+ if (!scsiback_cachep)
+ return -ENOMEM;
+
+ ret = xenbus_register_backend(&scsiback_driver);
+ if (ret)
+ goto out_cache_destroy;
+
+ ret = scsiback_register_configfs();
+ if (ret)
+ goto out_unregister_xenbus;
+
+ return 0;
+
+out_unregister_xenbus:
+ xenbus_unregister_driver(&scsiback_driver);
+out_cache_destroy:
+ kmem_cache_destroy(scsiback_cachep);
+ pr_err("xen-pvscsi: %s: error %d\n", __func__, ret);
+ return ret;
+}
+
+static void __exit scsiback_exit(void)
+{
+ struct page *page;
+
+ while (free_pages_num) {
+ if (get_free_page(&page))
+ BUG();
+ free_xenballooned_pages(1, &page);
+ }
+ scsiback_deregister_configfs();
+ xenbus_unregister_driver(&scsiback_driver);
+ kmem_cache_destroy(scsiback_cachep);
+}
+
+module_init(scsiback_init);
+module_exit(scsiback_exit);
+
+MODULE_DESCRIPTION("Xen SCSI backend driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_ALIAS("xen-backend:vscsi");
+MODULE_AUTHOR("Juergen Gross <jgross@suse.com>");
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 439c9dca9eee..ca744102b666 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -259,7 +259,6 @@ static char *error_path(struct xenbus_device *dev)
static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
const char *fmt, va_list ap)
{
- int ret;
unsigned int len;
char *printf_buffer = NULL;
char *path_buffer = NULL;
@@ -270,9 +269,7 @@ static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
goto fail;
len = sprintf(printf_buffer, "%i ", -err);
- ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
-
- BUG_ON(len + ret > PRINTF_BUFFER_SIZE-1);
+ vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
dev_err(&dev->dev, "%s\n", printf_buffer);
@@ -361,8 +358,8 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
* @ring_mfn: mfn of ring to grant
* Grant access to the given @ring_mfn to the peer of the given device. Return
- * 0 on success, or -errno on error. On error, the device will switch to
- * XenbusStateClosing, and the error will be saved in the store.
+ * a grant reference on success, or -errno on error. On error, the device will
+ * switch to XenbusStateClosing, and the error will be saved in the store.
*/
int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
{
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 3c0a74b3e9b1..564b31584860 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -297,9 +297,13 @@ void xenbus_dev_shutdown(struct device *_dev)
EXPORT_SYMBOL_GPL(xenbus_dev_shutdown);
int xenbus_register_driver_common(struct xenbus_driver *drv,
- struct xen_bus_type *bus)
+ struct xen_bus_type *bus,
+ struct module *owner, const char *mod_name)
{
+ drv->driver.name = drv->name ? drv->name : drv->ids[0].devicetype;
drv->driver.bus = &bus->bus;
+ drv->driver.owner = owner;
+ drv->driver.mod_name = mod_name;
return driver_register(&drv->driver);
}
diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h
index 1085ec294a19..c9ec7ca1f7ab 100644
--- a/drivers/xen/xenbus/xenbus_probe.h
+++ b/drivers/xen/xenbus/xenbus_probe.h
@@ -60,7 +60,9 @@ extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
extern int xenbus_dev_probe(struct device *_dev);
extern int xenbus_dev_remove(struct device *_dev);
extern int xenbus_register_driver_common(struct xenbus_driver *drv,
- struct xen_bus_type *bus);
+ struct xen_bus_type *bus,
+ struct module *owner,
+ const char *mod_name);
extern int xenbus_probe_node(struct xen_bus_type *bus,
const char *type,
const char *nodename);
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c
index 5125dce11a60..04f7f85a5edf 100644
--- a/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -234,13 +234,15 @@ int xenbus_dev_is_online(struct xenbus_device *dev)
}
EXPORT_SYMBOL_GPL(xenbus_dev_is_online);
-int xenbus_register_backend(struct xenbus_driver *drv)
+int __xenbus_register_backend(struct xenbus_driver *drv, struct module *owner,
+ const char *mod_name)
{
drv->read_otherend_details = read_frontend_details;
- return xenbus_register_driver_common(drv, &xenbus_backend);
+ return xenbus_register_driver_common(drv, &xenbus_backend,
+ owner, mod_name);
}
-EXPORT_SYMBOL_GPL(xenbus_register_backend);
+EXPORT_SYMBOL_GPL(__xenbus_register_backend);
static int backend_probe_and_watch(struct notifier_block *notifier,
unsigned long event,
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index cb385c10d2b1..bcb53bdc469c 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -317,13 +317,15 @@ static void wait_for_devices(struct xenbus_driver *xendrv)
print_device_status);
}
-int xenbus_register_frontend(struct xenbus_driver *drv)
+int __xenbus_register_frontend(struct xenbus_driver *drv, struct module *owner,
+ const char *mod_name)
{
int ret;
drv->read_otherend_details = read_backend_details;
- ret = xenbus_register_driver_common(drv, &xenbus_frontend);
+ ret = xenbus_register_driver_common(drv, &xenbus_frontend,
+ owner, mod_name);
if (ret)
return ret;
@@ -332,7 +334,7 @@ int xenbus_register_frontend(struct xenbus_driver *drv)
return 0;
}
-EXPORT_SYMBOL_GPL(xenbus_register_frontend);
+EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq);
static int backend_state;
diff --git a/include/xen/events.h b/include/xen/events.h
index 8bee7a75e850..5321cd9636e6 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -28,6 +28,8 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
unsigned long irqflags,
const char *devname,
void *dev_id);
+int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
+ unsigned int remote_port);
int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
unsigned int remote_port,
irq_handler_t handler,
diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h
index 6f4eae328ca7..f90b03454659 100644
--- a/include/xen/interface/elfnote.h
+++ b/include/xen/interface/elfnote.h
@@ -3,6 +3,24 @@
*
* Definitions used for the Xen ELF notes.
*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
* Copyright (c) 2006, Ian Campbell, XenSource Ltd.
*/
@@ -18,12 +36,13 @@
*
* LEGACY indicated the fields in the legacy __xen_guest string which
* this a note type replaces.
+ *
+ * String values (for non-legacy) are NULL terminated ASCII, also known
+ * as ASCIZ type.
*/
/*
* NAME=VALUE pair (string).
- *
- * LEGACY: FEATURES and PAE
*/
#define XEN_ELFNOTE_INFO 0
@@ -137,10 +156,30 @@
/*
* Whether or not the guest supports cooperative suspend cancellation.
+ * This is a numeric value.
+ *
+ * Default is 0
*/
#define XEN_ELFNOTE_SUSPEND_CANCEL 14
/*
+ * The (non-default) location the initial phys-to-machine map should be
+ * placed at by the hypervisor (Dom0) or the tools (DomU).
+ * The kernel must be prepared for this mapping to be established using
+ * large pages, despite such otherwise not being available to guests.
+ * The kernel must also be able to handle the page table pages used for
+ * this mapping not being accessible through the initial mapping.
+ * (Only x86-64 supports this at present.)
+ */
+#define XEN_ELFNOTE_INIT_P2M 15
+
+/*
+ * Whether or not the guest can deal with being passed an initrd not
+ * mapped through its initial page tables.
+ */
+#define XEN_ELFNOTE_MOD_START_PFN 16
+
+/*
* The features supported by this kernel (numeric).
*
* Other than XEN_ELFNOTE_FEATURES on pre-4.2 Xen, this note allows a
@@ -153,6 +192,11 @@
*/
#define XEN_ELFNOTE_SUPPORTED_FEATURES 17
+/*
+ * The number of the highest elfnote defined.
+ */
+#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUPPORTED_FEATURES
+
#endif /* __XEN_PUBLIC_ELFNOTE_H__ */
/*
diff --git a/include/xen/interface/io/vscsiif.h b/include/xen/interface/io/vscsiif.h
new file mode 100644
index 000000000000..d07d7aca8d1c
--- /dev/null
+++ b/include/xen/interface/io/vscsiif.h
@@ -0,0 +1,229 @@
+/******************************************************************************
+ * vscsiif.h
+ *
+ * Based on the blkif.h code.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright(c) FUJITSU Limited 2008.
+ */
+
+#ifndef __XEN__PUBLIC_IO_SCSI_H__
+#define __XEN__PUBLIC_IO_SCSI_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+/*
+ * Feature and Parameter Negotiation
+ * =================================
+ * The two halves of a Xen pvSCSI driver utilize nodes within the XenStore to
+ * communicate capabilities and to negotiate operating parameters. This
+ * section enumerates these nodes which reside in the respective front and
+ * backend portions of the XenStore, following the XenBus convention.
+ *
+ * Any specified default value is in effect if the corresponding XenBus node
+ * is not present in the XenStore.
+ *
+ * XenStore nodes in sections marked "PRIVATE" are solely for use by the
+ * driver side whose XenBus tree contains them.
+ *
+ *****************************************************************************
+ * Backend XenBus Nodes
+ *****************************************************************************
+ *
+ *------------------ Backend Device Identification (PRIVATE) ------------------
+ *
+ * p-devname
+ * Values: string
+ *
+ * A free string used to identify the physical device (e.g. a disk name).
+ *
+ * p-dev
+ * Values: string
+ *
+ * A string specifying the backend device: either a 4-tuple "h:c:t:l"
+ * (host, controller, target, lun, all integers), or a WWN (e.g.
+ * "naa.60014054ac780582").
+ *
+ * v-dev
+ * Values: string
+ *
+ * A string specifying the frontend device in form of a 4-tuple "h:c:t:l"
+ * (host, controller, target, lun, all integers).
+ *
+ *--------------------------------- Features ---------------------------------
+ *
+ * feature-sg-grant
+ * Values: unsigned [VSCSIIF_SG_TABLESIZE...65535]
+ * Default Value: 0
+ *
+ * Specifies the maximum number of scatter/gather elements in grant pages
+ * supported. If not set, the backend supports up to VSCSIIF_SG_TABLESIZE
+ * SG elements specified directly in the request.
+ *
+ *****************************************************************************
+ * Frontend XenBus Nodes
+ *****************************************************************************
+ *
+ *----------------------- Request Transport Parameters -----------------------
+ *
+ * event-channel
+ * Values: unsigned
+ *
+ * The identifier of the Xen event channel used to signal activity
+ * in the ring buffer.
+ *
+ * ring-ref
+ * Values: unsigned
+ *
+ * The Xen grant reference granting permission for the backend to map
+ * the sole page in a single page sized ring buffer.
+ *
+ * protocol
+ * Values: string (XEN_IO_PROTO_ABI_*)
+ * Default Value: XEN_IO_PROTO_ABI_NATIVE
+ *
+ * The machine ABI rules governing the format of all ring request and
+ * response structures.
+ */
+
+/* Requests from the frontend to the backend */
+
+/*
+ * Request a SCSI operation specified via a CDB in vscsiif_request.cmnd.
+ * The target is specified via channel, id and lun.
+ *
+ * The operation to be performed is specified via a CDB in cmnd[], the length
+ * of the CDB is in cmd_len. sc_data_direction specifies the direction of data
+ * (to the device, from the device, or none at all).
+ *
+ * If data is to be transferred to or from the device the buffer(s) in the
+ * guest memory is/are specified via one or multiple scsiif_request_segment
+ * descriptors each specifying a memory page via a grant_ref_t, a offset into
+ * the page and the length of the area in that page. All scsiif_request_segment
+ * areas concatenated form the resulting data buffer used by the operation.
+ * If the number of scsiif_request_segment areas is not too large (less than
+ * or equal VSCSIIF_SG_TABLESIZE) the areas can be specified directly in the
+ * seg[] array and the number of valid scsiif_request_segment elements is to be
+ * set in nr_segments.
+ *
+ * If "feature-sg-grant" in the Xenstore is set it is possible to specify more
+ * than VSCSIIF_SG_TABLESIZE scsiif_request_segment elements via indirection.
+ * The maximum number of allowed scsiif_request_segment elements is the value
+ * of the "feature-sg-grant" entry from Xenstore. When using indirection the
+ * seg[] array doesn't contain specifications of the data buffers, but
+ * references to scsiif_request_segment arrays, which in turn reference the
+ * data buffers. While nr_segments holds the number of populated seg[] entries
+ * (plus the set VSCSIIF_SG_GRANT bit), the number of scsiif_request_segment
+ * elements referencing the target data buffers is calculated from the lengths
+ * of the seg[] elements (the sum of all valid seg[].length divided by the
+ * size of one scsiif_request_segment structure).
+ */
+#define VSCSIIF_ACT_SCSI_CDB 1
+
+/*
+ * Request abort of a running operation for the specified target given by
+ * channel, id, lun and the operation's rqid in ref_rqid.
+ */
+#define VSCSIIF_ACT_SCSI_ABORT 2
+
+/*
+ * Request a device reset of the specified target (channel and id).
+ */
+#define VSCSIIF_ACT_SCSI_RESET 3
+
+/*
+ * Preset scatter/gather elements for a following request. Deprecated.
+ * Keeping the define only to avoid usage of the value "4" for other actions.
+ */
+#define VSCSIIF_ACT_SCSI_SG_PRESET 4
+
+/*
+ * Maximum scatter/gather segments per request.
+ *
+ * Considering balance between allocating at least 16 "vscsiif_request"
+ * structures on one page (4096 bytes) and the number of scatter/gather
+ * elements needed, we decided to use 26 as a magic number.
+ *
+ * If "feature-sg-grant" is set, more scatter/gather elements can be specified
+ * by placing them in one or more (up to VSCSIIF_SG_TABLESIZE) granted pages.
+ * In this case the vscsiif_request seg elements don't contain references to
+ * the user data, but to the SG elements referencing the user data.
+ */
+#define VSCSIIF_SG_TABLESIZE 26
+
+/*
+ * based on Linux kernel 2.6.18, still valid
+ * Changing these values requires support of multiple protocols via the rings
+ * as "old clients" will blindly use these values and the resulting structure
+ * sizes.
+ */
+#define VSCSIIF_MAX_COMMAND_SIZE 16
+#define VSCSIIF_SENSE_BUFFERSIZE 96
+
+struct scsiif_request_segment {
+ grant_ref_t gref;
+ uint16_t offset;
+ uint16_t length;
+};
+
+#define VSCSIIF_SG_PER_PAGE (PAGE_SIZE / sizeof(struct scsiif_request_segment))
+
+/* Size of one request is 252 bytes */
+struct vscsiif_request {
+ uint16_t rqid; /* private guest value, echoed in resp */
+ uint8_t act; /* command between backend and frontend */
+ uint8_t cmd_len; /* valid CDB bytes */
+
+ uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE]; /* the CDB */
+ uint16_t timeout_per_command; /* deprecated */
+ uint16_t channel, id, lun; /* (virtual) device specification */
+ uint16_t ref_rqid; /* command abort reference */
+ uint8_t sc_data_direction; /* for DMA_TO_DEVICE(1)
+ DMA_FROM_DEVICE(2)
+ DMA_NONE(3) requests */
+ uint8_t nr_segments; /* Number of pieces of scatter-gather */
+/*
+ * flag in nr_segments: SG elements via grant page
+ *
+ * If VSCSIIF_SG_GRANT is set, the low 7 bits of nr_segments specify the number
+ * of grant pages containing SG elements. Usable if "feature-sg-grant" set.
+ */
+#define VSCSIIF_SG_GRANT 0x80
+
+ struct scsiif_request_segment seg[VSCSIIF_SG_TABLESIZE];
+ uint32_t reserved[3];
+};
+
+/* Size of one response is 252 bytes */
+struct vscsiif_response {
+ uint16_t rqid; /* identifies request */
+ uint8_t padding;
+ uint8_t sense_len;
+ uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE];
+ int32_t rslt;
+ uint32_t residual_len; /* request bufflen -
+ return the value from physical device */
+ uint32_t reserved[36];
+};
+
+DEFINE_RING_TYPES(vscsiif, struct vscsiif_request, struct vscsiif_response);
+
+#endif /*__XEN__PUBLIC_IO_SCSI_H__*/
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index de082130ba4b..f68719f405af 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -3,6 +3,24 @@
*
* Guest OS interface to Xen.
*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
* Copyright (c) 2004, K A Fraser
*/
@@ -73,13 +91,23 @@
* VIRTUAL INTERRUPTS
*
* Virtual interrupts that a guest OS may receive from Xen.
+ * In the side comments, 'V.' denotes a per-VCPU VIRQ while 'G.' denotes a
+ * global VIRQ. The former can be bound once per VCPU and cannot be re-bound.
+ * The latter can be allocated only once per guest: they must initially be
+ * allocated to VCPU0 but can subsequently be re-bound.
*/
-#define VIRQ_TIMER 0 /* Timebase update, and/or requested timeout. */
-#define VIRQ_DEBUG 1 /* Request guest to dump debug info. */
-#define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */
-#define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */
-#define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */
-#define VIRQ_PCPU_STATE 9 /* (DOM0) PCPU state changed */
+#define VIRQ_TIMER 0 /* V. Timebase update, and/or requested timeout. */
+#define VIRQ_DEBUG 1 /* V. Request guest to dump debug info. */
+#define VIRQ_CONSOLE 2 /* G. (DOM0) Bytes received on emergency console. */
+#define VIRQ_DOM_EXC 3 /* G. (DOM0) Exceptional event for some domain. */
+#define VIRQ_TBUF 4 /* G. (DOM0) Trace buffer has records available. */
+#define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */
+#define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */
+#define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */
+#define VIRQ_PCPU_STATE 9 /* G. (DOM0) PCPU state changed */
+#define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */
+#define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */
+#define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */
/* Architecture-specific VIRQ definitions. */
#define VIRQ_ARCH_0 16
@@ -92,24 +120,68 @@
#define VIRQ_ARCH_7 23
#define NR_VIRQS 24
+
/*
- * MMU-UPDATE REQUESTS
- *
- * HYPERVISOR_mmu_update() accepts a list of (ptr, val) pairs.
- * A foreigndom (FD) can be specified (or DOMID_SELF for none).
- * Where the FD has some effect, it is described below.
- * ptr[1:0] specifies the appropriate MMU_* command.
+ * enum neg_errnoval HYPERVISOR_mmu_update(const struct mmu_update reqs[],
+ * unsigned count, unsigned *done_out,
+ * unsigned foreigndom)
+ * @reqs is an array of mmu_update_t structures ((ptr, val) pairs).
+ * @count is the length of the above array.
+ * @pdone is an output parameter indicating number of completed operations
+ * @foreigndom[15:0]: FD, the expected owner of data pages referenced in this
+ * hypercall invocation. Can be DOMID_SELF.
+ * @foreigndom[31:16]: PFD, the expected owner of pagetable pages referenced
+ * in this hypercall invocation. The value of this field
+ * (x) encodes the PFD as follows:
+ * x == 0 => PFD == DOMID_SELF
+ * x != 0 => PFD == x - 1
*
+ * Sub-commands: ptr[1:0] specifies the appropriate MMU_* command.
+ * -------------
* ptr[1:0] == MMU_NORMAL_PT_UPDATE:
- * Updates an entry in a page table. If updating an L1 table, and the new
- * table entry is valid/present, the mapped frame must belong to the FD, if
- * an FD has been specified. If attempting to map an I/O page then the
- * caller assumes the privilege of the FD.
+ * Updates an entry in a page table belonging to PFD. If updating an L1 table,
+ * and the new table entry is valid/present, the mapped frame must belong to
+ * FD. If attempting to map an I/O page then the caller assumes the privilege
+ * of the FD.
* FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller.
* FD == DOMID_XEN: Map restricted areas of Xen's heap space.
* ptr[:2] -- Machine address of the page-table entry to modify.
* val -- Value to write.
*
+ * There also certain implicit requirements when using this hypercall. The
+ * pages that make up a pagetable must be mapped read-only in the guest.
+ * This prevents uncontrolled guest updates to the pagetable. Xen strictly
+ * enforces this, and will disallow any pagetable update which will end up
+ * mapping pagetable page RW, and will disallow using any writable page as a
+ * pagetable. In practice it means that when constructing a page table for a
+ * process, thread, etc, we MUST be very dilligient in following these rules:
+ * 1). Start with top-level page (PGD or in Xen language: L4). Fill out
+ * the entries.
+ * 2). Keep on going, filling out the upper (PUD or L3), and middle (PMD
+ * or L2).
+ * 3). Start filling out the PTE table (L1) with the PTE entries. Once
+ * done, make sure to set each of those entries to RO (so writeable bit
+ * is unset). Once that has been completed, set the PMD (L2) for this
+ * PTE table as RO.
+ * 4). When completed with all of the PMD (L2) entries, and all of them have
+ * been set to RO, make sure to set RO the PUD (L3). Do the same
+ * operation on PGD (L4) pagetable entries that have a PUD (L3) entry.
+ * 5). Now before you can use those pages (so setting the cr3), you MUST also
+ * pin them so that the hypervisor can verify the entries. This is done
+ * via the HYPERVISOR_mmuext_op(MMUEXT_PIN_L4_TABLE, guest physical frame
+ * number of the PGD (L4)). And this point the HYPERVISOR_mmuext_op(
+ * MMUEXT_NEW_BASEPTR, guest physical frame number of the PGD (L4)) can be
+ * issued.
+ * For 32-bit guests, the L4 is not used (as there is less pagetables), so
+ * instead use L3.
+ * At this point the pagetables can be modified using the MMU_NORMAL_PT_UPDATE
+ * hypercall. Also if so desired the OS can also try to write to the PTE
+ * and be trapped by the hypervisor (as the PTE entry is RO).
+ *
+ * To deallocate the pages, the operations are the reverse of the steps
+ * mentioned above. The argument is MMUEXT_UNPIN_TABLE for all levels and the
+ * pagetable MUST not be in use (meaning that the cr3 is not set to it).
+ *
* ptr[1:0] == MMU_MACHPHYS_UPDATE:
* Updates an entry in the machine->pseudo-physical mapping table.
* ptr[:2] -- Machine address within the frame whose mapping to modify.
@@ -119,6 +191,72 @@
* ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD:
* As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed
* with those in @val.
+ *
+ * @val is usually the machine frame number along with some attributes.
+ * The attributes by default follow the architecture defined bits. Meaning that
+ * if this is a X86_64 machine and four page table layout is used, the layout
+ * of val is:
+ * - 63 if set means No execute (NX)
+ * - 46-13 the machine frame number
+ * - 12 available for guest
+ * - 11 available for guest
+ * - 10 available for guest
+ * - 9 available for guest
+ * - 8 global
+ * - 7 PAT (PSE is disabled, must use hypercall to make 4MB or 2MB pages)
+ * - 6 dirty
+ * - 5 accessed
+ * - 4 page cached disabled
+ * - 3 page write through
+ * - 2 userspace accessible
+ * - 1 writeable
+ * - 0 present
+ *
+ * The one bits that does not fit with the default layout is the PAGE_PSE
+ * also called PAGE_PAT). The MMUEXT_[UN]MARK_SUPER arguments to the
+ * HYPERVISOR_mmuext_op serve as mechanism to set a pagetable to be 4MB
+ * (or 2MB) instead of using the PAGE_PSE bit.
+ *
+ * The reason that the PAGE_PSE (bit 7) is not being utilized is due to Xen
+ * using it as the Page Attribute Table (PAT) bit - for details on it please
+ * refer to Intel SDM 10.12. The PAT allows to set the caching attributes of
+ * pages instead of using MTRRs.
+ *
+ * The PAT MSR is as follows (it is a 64-bit value, each entry is 8 bits):
+ * PAT4 PAT0
+ * +-----+-----+----+----+----+-----+----+----+
+ * | UC | UC- | WC | WB | UC | UC- | WC | WB | <= Linux
+ * +-----+-----+----+----+----+-----+----+----+
+ * | UC | UC- | WT | WB | UC | UC- | WT | WB | <= BIOS (default when machine boots)
+ * +-----+-----+----+----+----+-----+----+----+
+ * | rsv | rsv | WP | WC | UC | UC- | WT | WB | <= Xen
+ * +-----+-----+----+----+----+-----+----+----+
+ *
+ * The lookup of this index table translates to looking up
+ * Bit 7, Bit 4, and Bit 3 of val entry:
+ *
+ * PAT/PSE (bit 7) ... PCD (bit 4) .. PWT (bit 3).
+ *
+ * If all bits are off, then we are using PAT0. If bit 3 turned on,
+ * then we are using PAT1, if bit 3 and bit 4, then PAT2..
+ *
+ * As you can see, the Linux PAT1 translates to PAT4 under Xen. Which means
+ * that if a guest that follows Linux's PAT setup and would like to set Write
+ * Combined on pages it MUST use PAT4 entry. Meaning that Bit 7 (PAGE_PAT) is
+ * set. For example, under Linux it only uses PAT0, PAT1, and PAT2 for the
+ * caching as:
+ *
+ * WB = none (so PAT0)
+ * WC = PWT (bit 3 on)
+ * UC = PWT | PCD (bit 3 and 4 are on).
+ *
+ * To make it work with Xen, it needs to translate the WC bit as so:
+ *
+ * PWT (so bit 3 on) --> PAT (so bit 7 is on) and clear bit 3
+ *
+ * And to translate back it would:
+ *
+ * PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7.
*/
#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */
#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */
@@ -127,7 +265,12 @@
/*
* MMU EXTENDED OPERATIONS
*
- * HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures.
+ * enum neg_errnoval HYPERVISOR_mmuext_op(mmuext_op_t uops[],
+ * unsigned int count,
+ * unsigned int *pdone,
+ * unsigned int foreigndom)
+ */
+/* HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures.
* A foreigndom (FD) can be specified (or DOMID_SELF for none).
* Where the FD has some effect, it is described below.
*
@@ -164,9 +307,23 @@
* cmd: MMUEXT_FLUSH_CACHE
* No additional arguments. Writes back and flushes cache contents.
*
+ * cmd: MMUEXT_FLUSH_CACHE_GLOBAL
+ * No additional arguments. Writes back and flushes cache contents
+ * on all CPUs in the system.
+ *
* cmd: MMUEXT_SET_LDT
* linear_addr: Linear address of LDT base (NB. must be page-aligned).
* nr_ents: Number of entries in LDT.
+ *
+ * cmd: MMUEXT_CLEAR_PAGE
+ * mfn: Machine frame number to be cleared.
+ *
+ * cmd: MMUEXT_COPY_PAGE
+ * mfn: Machine frame number of the destination page.
+ * src_mfn: Machine frame number of the source page.
+ *
+ * cmd: MMUEXT_[UN]MARK_SUPER
+ * mfn: Machine frame number of head of superpage to be [un]marked.
*/
#define MMUEXT_PIN_L1_TABLE 0
#define MMUEXT_PIN_L2_TABLE 1
@@ -183,12 +340,18 @@
#define MMUEXT_FLUSH_CACHE 12
#define MMUEXT_SET_LDT 13
#define MMUEXT_NEW_USER_BASEPTR 15
+#define MMUEXT_CLEAR_PAGE 16
+#define MMUEXT_COPY_PAGE 17
+#define MMUEXT_FLUSH_CACHE_GLOBAL 18
+#define MMUEXT_MARK_SUPER 19
+#define MMUEXT_UNMARK_SUPER 20
#ifndef __ASSEMBLY__
struct mmuext_op {
unsigned int cmd;
union {
- /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */
+ /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR
+ * CLEAR_PAGE, COPY_PAGE, [UN]MARK_SUPER */
xen_pfn_t mfn;
/* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
unsigned long linear_addr;
@@ -198,6 +361,8 @@ struct mmuext_op {
unsigned int nr_ents;
/* TLB_FLUSH_MULTI, INVLPG_MULTI */
void *vcpumask;
+ /* COPY_PAGE */
+ xen_pfn_t src_mfn;
} arg2;
};
DEFINE_GUEST_HANDLE_STRUCT(mmuext_op);
@@ -225,10 +390,23 @@ DEFINE_GUEST_HANDLE_STRUCT(mmuext_op);
*/
#define VMASST_CMD_enable 0
#define VMASST_CMD_disable 1
+
+/* x86/32 guests: simulate full 4GB segment limits. */
#define VMASST_TYPE_4gb_segments 0
+
+/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */
#define VMASST_TYPE_4gb_segments_notify 1
+
+/*
+ * x86 guests: support writes to bottom-level PTEs.
+ * NB1. Page-directory entries cannot be written.
+ * NB2. Guest must continue to remove all writable mappings of PTEs.
+ */
#define VMASST_TYPE_writable_pagetables 2
+
+/* x86/PAE guests: support PDPTs above 4GB. */
#define VMASST_TYPE_pae_extended_cr3 3
+
#define MAX_VMASST_TYPE 3
#ifndef __ASSEMBLY__
@@ -260,6 +438,15 @@ typedef uint16_t domid_t;
*/
#define DOMID_XEN (0x7FF2U)
+/* DOMID_COW is used as the owner of sharable pages */
+#define DOMID_COW (0x7FF3U)
+
+/* DOMID_INVALID is used to identify pages with unknown owner. */
+#define DOMID_INVALID (0x7FF4U)
+
+/* Idle domain. */
+#define DOMID_IDLE (0x7FFFU)
+
/*
* Send an array of these to HYPERVISOR_mmu_update().
* NB. The fields are natural pointer/address size for this architecture.
@@ -272,7 +459,9 @@ DEFINE_GUEST_HANDLE_STRUCT(mmu_update);
/*
* Send an array of these to HYPERVISOR_multicall().
- * NB. The fields are natural register size for this architecture.
+ * NB. The fields are logically the natural register size for this
+ * architecture. In cases where xen_ulong_t is larger than this then
+ * any unused bits in the upper portion must be zero.
*/
struct multicall_entry {
xen_ulong_t op;
@@ -442,8 +631,48 @@ struct start_info {
unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */
unsigned long mod_len; /* Size (bytes) of pre-loaded module. */
int8_t cmd_line[MAX_GUEST_CMDLINE];
+ /* The pfn range here covers both page table and p->m table frames. */
+ unsigned long first_p2m_pfn;/* 1st pfn forming initial P->M table. */
+ unsigned long nr_p2m_frames;/* # of pfns forming initial P->M table. */
};
+/* These flags are passed in the 'flags' field of start_info_t. */
+#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */
+#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */
+#define SIF_MULTIBOOT_MOD (1<<2) /* Is mod_start a multiboot module? */
+#define SIF_MOD_START_PFN (1<<3) /* Is mod_start a PFN? */
+#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */
+
+/*
+ * A multiboot module is a package containing modules very similar to a
+ * multiboot module array. The only differences are:
+ * - the array of module descriptors is by convention simply at the beginning
+ * of the multiboot module,
+ * - addresses in the module descriptors are based on the beginning of the
+ * multiboot module,
+ * - the number of modules is determined by a termination descriptor that has
+ * mod_start == 0.
+ *
+ * This permits to both build it statically and reference it in a configuration
+ * file, and let the PV guest easily rebase the addresses to virtual addresses
+ * and at the same time count the number of modules.
+ */
+struct xen_multiboot_mod_list {
+ /* Address of first byte of the module */
+ uint32_t mod_start;
+ /* Address of last byte of the module (inclusive) */
+ uint32_t mod_end;
+ /* Address of zero-terminated command line */
+ uint32_t cmdline;
+ /* Unused, must be zero */
+ uint32_t pad;
+};
+/*
+ * The console structure in start_info.console.dom0
+ *
+ * This structure includes a variety of information required to
+ * have a working VGA/VESA console.
+ */
struct dom0_vga_console_info {
uint8_t video_type;
#define XEN_VGATYPE_TEXT_MODE_3 0x03
@@ -484,11 +713,6 @@ struct dom0_vga_console_info {
} u;
};
-/* These flags are passed in the 'flags' field of start_info_t. */
-#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */
-#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */
-#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */
-
typedef uint64_t cpumap_t;
typedef uint8_t xen_domain_handle_t[16];
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index 0324c6d340c1..b78f21caf55a 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -86,6 +86,7 @@ struct xenbus_device_id
/* A xenbus driver. */
struct xenbus_driver {
+ const char *name; /* defaults to ids[0].devicetype */
const struct xenbus_device_id *ids;
int (*probe)(struct xenbus_device *dev,
const struct xenbus_device_id *id);
@@ -100,20 +101,22 @@ struct xenbus_driver {
int (*is_ready)(struct xenbus_device *dev);
};
-#define DEFINE_XENBUS_DRIVER(var, drvname, methods...) \
-struct xenbus_driver var ## _driver = { \
- .driver.name = drvname + 0 ?: var ## _ids->devicetype, \
- .driver.owner = THIS_MODULE, \
- .ids = var ## _ids, ## methods \
-}
-
static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv)
{
return container_of(drv, struct xenbus_driver, driver);
}
-int __must_check xenbus_register_frontend(struct xenbus_driver *);
-int __must_check xenbus_register_backend(struct xenbus_driver *);
+int __must_check __xenbus_register_frontend(struct xenbus_driver *drv,
+ struct module *owner,
+ const char *mod_name);
+int __must_check __xenbus_register_backend(struct xenbus_driver *drv,
+ struct module *owner,
+ const char *mod_name);
+
+#define xenbus_register_frontend(drv) \
+ __xenbus_register_frontend(drv, THIS_MODULE, KBUILD_MODNAME);
+#define xenbus_register_backend(drv) \
+ __xenbus_register_backend(drv, THIS_MODULE, KBUILD_MODNAME);
void xenbus_unregister_driver(struct xenbus_driver *drv);