From 608c0d8804ef3ca4cda8ec6ad914e47deb283d7b Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Thu, 9 Nov 2017 08:00:35 -0600 Subject: PCI/IOV: Add pci_vf_drivers_autoprobe() interface Add a pci_vf_drivers_autoprobe() interface. Setting autoprobe to false on the PF prevents drivers from binding to VFs when they are enabled. Signed-off-by: Bryant G. Ly Signed-off-by: Juan J. Alvarez Acked-by: Bjorn Helgaas Acked-by: Russell Currey Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 0403894147a3..e3e94467687a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1961,6 +1961,7 @@ int pci_vfs_assigned(struct pci_dev *dev); int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs); int pci_sriov_get_totalvfs(struct pci_dev *dev); resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno); +void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe); #else static inline int pci_iov_virtfn_bus(struct pci_dev *dev, int id) { @@ -1988,6 +1989,7 @@ static inline int pci_sriov_get_totalvfs(struct pci_dev *dev) { return 0; } static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno) { return 0; } +static inline void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe) { } #endif #if defined(CONFIG_HOTPLUG_PCI) || defined(CONFIG_HOTPLUG_PCI_MODULE) -- cgit v1.2.3 From b1db551324f72fa14ad82ca31237a7ed418104df Mon Sep 17 00:00:00 2001 From: Christophe Lombard Date: Thu, 11 Jan 2018 09:55:25 +0100 Subject: cxl: Add support for ASB_Notify on POWER9 The POWER9 core supports a new feature: ASB_Notify which requires the support of the Special Purpose Register: TIDR. The ASB_Notify command, generated by the AFU, will attempt to wake-up the host thread identified by the particular LPID:PID:TID. This patch assign a unique TIDR (thread id) for the current thread which will be used in the process element entry. Signed-off-by: Christophe Lombard Reviewed-by: Philippe Bergheaud Acked-by: Frederic Barrat Reviewed-by: Vaibhav Jain Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 1 + drivers/misc/cxl/context.c | 2 ++ drivers/misc/cxl/cxl.h | 3 +++ drivers/misc/cxl/cxllib.c | 3 ++- drivers/misc/cxl/file.c | 15 +++++++++++++-- drivers/misc/cxl/native.c | 13 ++++++++++++- include/uapi/misc/cxl.h | 10 ++++++---- 7 files changed, 39 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 1563d1190da3..9eb78ec0ce5b 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1590,6 +1590,7 @@ int set_thread_tidr(struct task_struct *t) return 0; } +EXPORT_SYMBOL_GPL(set_thread_tidr); #endif /* CONFIG_PPC64 */ diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 12a41b2753f0..7ff315ad3692 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -45,6 +45,8 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master) ctx->pid = NULL; /* Set in start work ioctl */ mutex_init(&ctx->mapping_lock); ctx->mapping = NULL; + ctx->tidr = 0; + ctx->assign_tidr = false; if (cxl_is_power8()) { spin_lock_init(&ctx->sste_lock); diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index e46a4062904a..53149fbd780e 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -630,6 +630,9 @@ struct cxl_context { struct list_head extra_irq_contexts; struct mm_struct *mm; + + u16 tidr; + bool assign_tidr; }; struct cxl_irq_info; diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c index dc9bc1807fdf..30ccba436b3b 100644 --- a/drivers/misc/cxl/cxllib.c +++ b/drivers/misc/cxl/cxllib.c @@ -199,10 +199,11 @@ int cxllib_get_PE_attributes(struct task_struct *task, */ attr->pid = mm->context.id; mmput(mm); + attr->tid = task->thread.tidr; } else { attr->pid = 0; + attr->tid = 0; } - attr->tid = 0; return 0; } EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes); diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c index 76c0b0ca9388..93fd381f6484 100644 --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -173,7 +173,7 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, * flags are set it's invalid */ if (work.reserved1 || work.reserved2 || work.reserved3 || - work.reserved4 || work.reserved5 || work.reserved6 || + work.reserved4 || work.reserved5 || (work.flags & ~CXL_START_WORK_ALL)) { rc = -EINVAL; goto out; @@ -186,12 +186,16 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, rc = -EINVAL; goto out; } + if ((rc = afu_register_irqs(ctx, work.num_interrupts))) goto out; if (work.flags & CXL_START_WORK_AMR) amr = work.amr & mfspr(SPRN_UAMOR); + if (work.flags & CXL_START_WORK_TID) + ctx->assign_tidr = true; + ctx->mmio_err_ff = !!(work.flags & CXL_START_WORK_ERR_FF); /* @@ -263,8 +267,15 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, goto out; } - ctx->status = STARTED; rc = 0; + if (work.flags & CXL_START_WORK_TID) { + work.tid = ctx->tidr; + if (copy_to_user(uwork, &work, sizeof(work))) + rc = -EFAULT; + } + + ctx->status = STARTED; + out: mutex_unlock(&ctx->status_mutex); return rc; diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index 02b6b45b4c20..1b3d7c65ea3f 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "cxl.h" @@ -655,6 +656,7 @@ static void update_ivtes_directed(struct cxl_context *ctx) static int process_element_entry_psl9(struct cxl_context *ctx, u64 wed, u64 amr) { u32 pid; + int rc; cxl_assign_psn_space(ctx); @@ -673,7 +675,16 @@ static int process_element_entry_psl9(struct cxl_context *ctx, u64 wed, u64 amr) pid = ctx->mm->context.id; } - ctx->elem->common.tid = 0; + /* Assign a unique TIDR (thread id) for the current thread */ + if (!(ctx->tidr) && (ctx->assign_tidr)) { + rc = set_thread_tidr(current); + if (rc) + return -ENODEV; + ctx->tidr = current->thread.tidr; + pr_devel("%s: current tidr: %d\n", __func__, ctx->tidr); + } + + ctx->elem->common.tid = cpu_to_be32(ctx->tidr); ctx->elem->common.pid = cpu_to_be32(pid); ctx->elem->sr = cpu_to_be64(calculate_sr(ctx)); diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h index 49e8fd08855a..56376d3907d8 100644 --- a/include/uapi/misc/cxl.h +++ b/include/uapi/misc/cxl.h @@ -20,20 +20,22 @@ struct cxl_ioctl_start_work { __u64 work_element_descriptor; __u64 amr; __s16 num_interrupts; - __s16 reserved1; - __s32 reserved2; + __u16 tid; + __s32 reserved1; + __u64 reserved2; __u64 reserved3; __u64 reserved4; __u64 reserved5; - __u64 reserved6; }; #define CXL_START_WORK_AMR 0x0000000000000001ULL #define CXL_START_WORK_NUM_IRQS 0x0000000000000002ULL #define CXL_START_WORK_ERR_FF 0x0000000000000004ULL +#define CXL_START_WORK_TID 0x0000000000000008ULL #define CXL_START_WORK_ALL (CXL_START_WORK_AMR |\ CXL_START_WORK_NUM_IRQS |\ - CXL_START_WORK_ERR_FF) + CXL_START_WORK_ERR_FF |\ + CXL_START_WORK_TID) /* Possible modes that an afu can be in */ -- cgit v1.2.3 From c5cc1f4df6b16646f8fae7aab523c1820bf916e8 Mon Sep 17 00:00:00 2001 From: Thiago Jung Bauermann Date: Thu, 18 Jan 2018 17:50:43 -0800 Subject: powerpc/ptrace: Add memory protection key regset The AMR/IAMR/UAMOR are part of the program context. Allow it to be accessed via ptrace and through core files. Signed-off-by: Ram Pai Signed-off-by: Thiago Jung Bauermann Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pkeys.h | 5 +++ arch/powerpc/include/uapi/asm/elf.h | 1 + arch/powerpc/kernel/ptrace.c | 66 +++++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/traps.c | 7 ++++ include/uapi/linux/elf.h | 1 + 5 files changed, 80 insertions(+) (limited to 'include') diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h index 2298771b066b..c3cbad824e5a 100644 --- a/arch/powerpc/include/asm/pkeys.h +++ b/arch/powerpc/include/asm/pkeys.h @@ -202,6 +202,11 @@ static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, return __arch_set_user_pkey_access(tsk, pkey, init_val); } +static inline bool arch_pkeys_enabled(void) +{ + return !static_branch_likely(&pkey_disabled); +} + extern void pkey_mm_init(struct mm_struct *mm); extern void thread_pkey_regs_save(struct thread_struct *thread); extern void thread_pkey_regs_restore(struct thread_struct *new_thread, diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h index 5f201d40bcca..860c59291bfc 100644 --- a/arch/powerpc/include/uapi/asm/elf.h +++ b/arch/powerpc/include/uapi/asm/elf.h @@ -97,6 +97,7 @@ #define ELF_NTMSPRREG 3 /* include tfhar, tfiar, texasr */ #define ELF_NEBB 3 /* includes ebbrr, ebbhr, bescr */ #define ELF_NPMU 5 /* includes siar, sdar, sier, mmcr2, mmcr0 */ +#define ELF_NPKEY 3 /* includes amr, iamr, uamor */ typedef unsigned long elf_greg_t64; typedef elf_greg_t64 elf_gregset_t64[ELF_NGREG]; diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index aef08e579946..ca72d7391d40 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -1787,6 +1788,61 @@ static int pmu_set(struct task_struct *target, return ret; } #endif + +#ifdef CONFIG_PPC_MEM_KEYS +static int pkey_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!arch_pkeys_enabled()) + return -ENODEV; + + return regset->n; +} + +static int pkey_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr)); + BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor)); + + if (!arch_pkeys_enabled()) + return -ENODEV; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.amr, 0, + ELF_NPKEY * sizeof(unsigned long)); +} + +static int pkey_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + u64 new_amr; + int ret; + + if (!arch_pkeys_enabled()) + return -ENODEV; + + /* Only the AMR can be set from userspace */ + if (pos != 0 || count != sizeof(new_amr)) + return -EINVAL; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &new_amr, 0, sizeof(new_amr)); + if (ret) + return ret; + + /* UAMOR determines which bits of the AMR can be set from userspace. */ + target->thread.amr = (new_amr & target->thread.uamor) | + (target->thread.amr & ~target->thread.uamor); + + return 0; +} +#endif /* CONFIG_PPC_MEM_KEYS */ + /* * These are our native regset flavors. */ @@ -1821,6 +1877,9 @@ enum powerpc_regset { REGSET_EBB, /* EBB registers */ REGSET_PMR, /* Performance Monitor Registers */ #endif +#ifdef CONFIG_PPC_MEM_KEYS + REGSET_PKEY, /* AMR register */ +#endif }; static const struct user_regset native_regsets[] = { @@ -1926,6 +1985,13 @@ static const struct user_regset native_regsets[] = { .active = pmu_active, .get = pmu_get, .set = pmu_set }, #endif +#ifdef CONFIG_PPC_MEM_KEYS + [REGSET_PKEY] = { + .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY, + .size = sizeof(u64), .align = sizeof(u64), + .active = pkey_active, .get = pkey_get, .set = pkey_set + }, +#endif }; static const struct user_regset_view user_ppc_native_view = { diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 4b1a8e2ec023..122a3c883f4e 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -292,6 +292,13 @@ void _exception_pkey(int signr, struct pt_regs *regs, int code, local_irq_enable(); current->thread.trap_nr = code; + + /* + * Save all the pkey registers AMR/IAMR/UAMOR. Eg: Core dumps need + * to capture the content, if the task gets killed. + */ + thread_pkey_regs_save(¤t->thread); + memset(&info, 0, sizeof(info)); info.si_signo = signr; info.si_code = code; diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index bb6836986200..3bf73fb58045 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -396,6 +396,7 @@ typedef struct elf64_shdr { #define NT_PPC_TM_CTAR 0x10d /* TM checkpointed Target Address Register */ #define NT_PPC_TM_CPPR 0x10e /* TM checkpointed Program Priority Register */ #define NT_PPC_TM_CDSCR 0x10f /* TM checkpointed Data Stream Control Register */ +#define NT_PPC_PKEY 0x110 /* Memory Protection Keys registers */ #define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */ #define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */ #define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */ -- cgit v1.2.3 From 2cb3d64b26984703a6bb80e66adcc3727ad37f9f Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Tue, 23 Jan 2018 12:31:40 +0100 Subject: powerpc/powernv: Capture actag information for the device In the opencapi protocol, host memory contexts are referenced by a 'actag'. During setup, a driver must tell the device how many actags it can used, and what values are acceptable. On POWER9, the NPU can handle 64 actags per link, so they must be shared between all the PCI functions of the link. To get a global picture of how many actags are used by each AFU of every function, we capture some data at the end of PCI enumeration, so that actags can be shared fairly if needed. This is not powernv specific per say, but rather a consequence of the opencapi configuration specification being quite general. The number of available actags on POWER9 makes it more likely to be hit. This is somewhat mitigated by the fact that existing AFUs are coded by requesting a reasonable count of actags and existing devices carry only one AFU. Signed-off-by: Frederic Barrat Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pnv-ocxl.h | 4 + arch/powerpc/platforms/powernv/ocxl.c | 305 ++++++++++++++++++++++++++++++++++ include/misc/ocxl-config.h | 45 +++++ 3 files changed, 354 insertions(+) create mode 100644 include/misc/ocxl-config.h (limited to 'include') diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h index 36868d49aeed..398d05b30600 100644 --- a/arch/powerpc/include/asm/pnv-ocxl.h +++ b/arch/powerpc/include/asm/pnv-ocxl.h @@ -9,6 +9,10 @@ #define PNV_OCXL_TL_BITS_PER_RATE 4 #define PNV_OCXL_TL_RATE_BUF_SIZE ((PNV_OCXL_TL_MAX_TEMPLATE+1) * PNV_OCXL_TL_BITS_PER_RATE / 8) +extern int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, + u16 *supported); +extern int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count); + extern int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap, char *rate_buf, int rate_buf_size); extern int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap, diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c index d61186805a07..1faaa4ef6903 100644 --- a/arch/powerpc/platforms/powernv/ocxl.c +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -2,13 +2,318 @@ // Copyright 2017 IBM Corp. #include #include +#include #include "pci.h" #define PNV_OCXL_TL_P9_RECV_CAP 0x000000000000000Full +#define PNV_OCXL_ACTAG_MAX 64 /* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */ #define PNV_OCXL_PASID_BITS 15 #define PNV_OCXL_PASID_MAX ((1 << PNV_OCXL_PASID_BITS) - 1) +#define AFU_PRESENT (1 << 31) +#define AFU_INDEX_MASK 0x3F000000 +#define AFU_INDEX_SHIFT 24 +#define ACTAG_MASK 0xFFF + + +struct actag_range { + u16 start; + u16 count; +}; + +struct npu_link { + struct list_head list; + int domain; + int bus; + int dev; + u16 fn_desired_actags[8]; + struct actag_range fn_actags[8]; + bool assignment_done; +}; +static struct list_head links_list = LIST_HEAD_INIT(links_list); +static DEFINE_MUTEX(links_list_lock); + + +/* + * opencapi actags handling: + * + * When sending commands, the opencapi device references the memory + * context it's targeting with an 'actag', which is really an alias + * for a (BDF, pasid) combination. When it receives a command, the NPU + * must do a lookup of the actag to identify the memory context. The + * hardware supports a finite number of actags per link (64 for + * POWER9). + * + * The device can carry multiple functions, and each function can have + * multiple AFUs. Each AFU advertises in its config space the number + * of desired actags. The host must configure in the config space of + * the AFU how many actags the AFU is really allowed to use (which can + * be less than what the AFU desires). + * + * When a PCI function is probed by the driver, it has no visibility + * about the other PCI functions and how many actags they'd like, + * which makes it impossible to distribute actags fairly among AFUs. + * + * Unfortunately, the only way to know how many actags a function + * desires is by looking at the data for each AFU in the config space + * and add them up. Similarly, the only way to know how many actags + * all the functions of the physical device desire is by adding the + * previously computed function counts. Then we can match that against + * what the hardware supports. + * + * To get a comprehensive view, we use a 'pci fixup': at the end of + * PCI enumeration, each function counts how many actags its AFUs + * desire and we save it in a 'npu_link' structure, shared between all + * the PCI functions of a same device. Therefore, when the first + * function is probed by the driver, we can get an idea of the total + * count of desired actags for the device, and assign the actags to + * the AFUs, by pro-rating if needed. + */ + +static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos) +{ + int vsec = pos; + u16 vendor, id; + + while ((vsec = pci_find_next_ext_capability(dev, vsec, + OCXL_EXT_CAP_ID_DVSEC))) { + pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET, + &vendor); + pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id); + if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id) + return vsec; + } + return 0; +} + +static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx) +{ + int vsec = 0; + u8 idx; + + while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID, + vsec))) { + pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX, + &idx); + if (idx == afu_idx) + return vsec; + } + return 0; +} + +static int get_max_afu_index(struct pci_dev *dev, int *afu_idx) +{ + int pos; + u32 val; + + pos = find_dvsec_from_pos(dev, OCXL_DVSEC_FUNC_ID, 0); + if (!pos) + return -ESRCH; + + pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val); + if (val & AFU_PRESENT) + *afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT; + else + *afu_idx = -1; + return 0; +} + +static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag) +{ + int pos; + u16 actag_sup; + + pos = find_dvsec_afu_ctrl(dev, afu_idx); + if (!pos) + return -ESRCH; + + pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP, + &actag_sup); + *actag = actag_sup & ACTAG_MASK; + return 0; +} + +static struct npu_link *find_link(struct pci_dev *dev) +{ + struct npu_link *link; + + list_for_each_entry(link, &links_list, list) { + /* The functions of a device all share the same link */ + if (link->domain == pci_domain_nr(dev->bus) && + link->bus == dev->bus->number && + link->dev == PCI_SLOT(dev->devfn)) { + return link; + } + } + + /* link doesn't exist yet. Allocate one */ + link = kzalloc(sizeof(struct npu_link), GFP_KERNEL); + if (!link) + return NULL; + link->domain = pci_domain_nr(dev->bus); + link->bus = dev->bus->number; + link->dev = PCI_SLOT(dev->devfn); + list_add(&link->list, &links_list); + return link; +} + +static void pnv_ocxl_fixup_actag(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + struct npu_link *link; + int rc, afu_idx = -1, i, actag; + + if (!machine_is(powernv)) + return; + + if (phb->type != PNV_PHB_NPU_OCAPI) + return; + + mutex_lock(&links_list_lock); + + link = find_link(dev); + if (!link) { + dev_warn(&dev->dev, "couldn't update actag information\n"); + mutex_unlock(&links_list_lock); + return; + } + + /* + * Check how many actags are desired for the AFUs under that + * function and add it to the count for the link + */ + rc = get_max_afu_index(dev, &afu_idx); + if (rc) { + /* Most likely an invalid config space */ + dev_dbg(&dev->dev, "couldn't find AFU information\n"); + afu_idx = -1; + } + + link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0; + for (i = 0; i <= afu_idx; i++) { + /* + * AFU index 'holes' are allowed. So don't fail if we + * can't read the actag info for an index + */ + rc = get_actag_count(dev, i, &actag); + if (rc) + continue; + link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag; + } + dev_dbg(&dev->dev, "total actags for function: %d\n", + link->fn_desired_actags[PCI_FUNC(dev->devfn)]); + + mutex_unlock(&links_list_lock); +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag); + +static u16 assign_fn_actags(u16 desired, u16 total) +{ + u16 count; + + if (total <= PNV_OCXL_ACTAG_MAX) + count = desired; + else + count = PNV_OCXL_ACTAG_MAX * desired / total; + + return count; +} + +static void assign_actags(struct npu_link *link) +{ + u16 actag_count, range_start = 0, total_desired = 0; + int i; + + for (i = 0; i < 8; i++) + total_desired += link->fn_desired_actags[i]; + + for (i = 0; i < 8; i++) { + if (link->fn_desired_actags[i]) { + actag_count = assign_fn_actags( + link->fn_desired_actags[i], + total_desired); + link->fn_actags[i].start = range_start; + link->fn_actags[i].count = actag_count; + range_start += actag_count; + WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX); + } + pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n", + link->domain, link->bus, link->dev, i, + link->fn_actags[i].start, link->fn_actags[i].count, + link->fn_desired_actags[i]); + } + link->assignment_done = true; +} + +int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, + u16 *supported) +{ + struct npu_link *link; + + mutex_lock(&links_list_lock); + + link = find_link(dev); + if (!link) { + dev_err(&dev->dev, "actag information not found\n"); + mutex_unlock(&links_list_lock); + return -ENODEV; + } + /* + * On p9, we only have 64 actags per link, so they must be + * shared by all the functions of the same adapter. We counted + * the desired actag counts during PCI enumeration, so that we + * can allocate a pro-rated number of actags to each function. + */ + if (!link->assignment_done) + assign_actags(link); + + *base = link->fn_actags[PCI_FUNC(dev->devfn)].start; + *enabled = link->fn_actags[PCI_FUNC(dev->devfn)].count; + *supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)]; + + mutex_unlock(&links_list_lock); + return 0; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag); + +int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count) +{ + struct npu_link *link; + int i, rc = -EINVAL; + + /* + * The number of PASIDs (process address space ID) which can + * be used by a function depends on how many functions exist + * on the device. The NPU needs to be configured to know how + * many bits are available to PASIDs and how many are to be + * used by the function BDF indentifier. + * + * We only support one AFU-carrying function for now. + */ + mutex_lock(&links_list_lock); + + link = find_link(dev); + if (!link) { + dev_err(&dev->dev, "actag information not found\n"); + mutex_unlock(&links_list_lock); + return -ENODEV; + } + + for (i = 0; i < 8; i++) + if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) { + *count = PNV_OCXL_PASID_MAX; + rc = 0; + break; + } + + mutex_unlock(&links_list_lock); + dev_dbg(&dev->dev, "%d PASIDs available for function\n", + rc ? 0 : *count); + return rc; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count); static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf) { diff --git a/include/misc/ocxl-config.h b/include/misc/ocxl-config.h new file mode 100644 index 000000000000..3526fa996a22 --- /dev/null +++ b/include/misc/ocxl-config.h @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#ifndef _OCXL_CONFIG_H_ +#define _OCXL_CONFIG_H_ + +/* + * This file lists the various constants used to read the + * configuration space of an opencapi adapter. + * + * It follows the specification for opencapi 3.0 + */ + +#define OCXL_EXT_CAP_ID_DVSEC 0x23 + +#define OCXL_DVSEC_VENDOR_OFFSET 0x4 +#define OCXL_DVSEC_ID_OFFSET 0x8 +#define OCXL_DVSEC_TL_ID 0xF000 +#define OCXL_DVSEC_TL_BACKOFF_TIMERS 0x10 +#define OCXL_DVSEC_TL_RECV_CAP 0x18 +#define OCXL_DVSEC_TL_SEND_CAP 0x20 +#define OCXL_DVSEC_TL_RECV_RATE 0x30 +#define OCXL_DVSEC_TL_SEND_RATE 0x50 +#define OCXL_DVSEC_FUNC_ID 0xF001 +#define OCXL_DVSEC_FUNC_OFF_INDEX 0x08 +#define OCXL_DVSEC_FUNC_OFF_ACTAG 0x0C +#define OCXL_DVSEC_AFU_INFO_ID 0xF003 +#define OCXL_DVSEC_AFU_INFO_AFU_IDX 0x0A +#define OCXL_DVSEC_AFU_INFO_OFF 0x0C +#define OCXL_DVSEC_AFU_INFO_DATA 0x10 +#define OCXL_DVSEC_AFU_CTRL_ID 0xF004 +#define OCXL_DVSEC_AFU_CTRL_AFU_IDX 0x0A +#define OCXL_DVSEC_AFU_CTRL_TERM_PASID 0x0C +#define OCXL_DVSEC_AFU_CTRL_ENABLE 0x0F +#define OCXL_DVSEC_AFU_CTRL_PASID_SUP 0x10 +#define OCXL_DVSEC_AFU_CTRL_PASID_EN 0x11 +#define OCXL_DVSEC_AFU_CTRL_PASID_BASE 0x14 +#define OCXL_DVSEC_AFU_CTRL_ACTAG_SUP 0x18 +#define OCXL_DVSEC_AFU_CTRL_ACTAG_EN 0x1A +#define OCXL_DVSEC_AFU_CTRL_ACTAG_BASE 0x1C +#define OCXL_DVSEC_VENDOR_ID 0xF0F0 +#define OCXL_DVSEC_VENDOR_CFG_VERS 0x0C +#define OCXL_DVSEC_VENDOR_TLX_VERS 0x10 +#define OCXL_DVSEC_VENDOR_DLX_VERS 0x20 + +#endif /* _OCXL_CONFIG_H_ */ -- cgit v1.2.3 From 5ef3166e8a32d78dfa985a323aa45ed485ff663a Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Tue, 23 Jan 2018 12:31:41 +0100 Subject: ocxl: Driver code for 'generic' opencapi devices Add an ocxl driver to handle generic opencapi devices. Of course, it's not meant to be the only opencapi driver, any device is free to implement its own. But if a host application only needs basic services like attaching to an opencapi adapter, have translation faults handled or allocate AFU interrupts, it should suffice. The AFU config space must follow the opencapi specification and use the expected vendor/device ID to be seen by the generic driver. The driver exposes the device AFUs as a char device in /dev/ocxl/ Note that the driver currently doesn't handle memory attached to the opencapi device. Signed-off-by: Frederic Barrat Signed-off-by: Andrew Donnellan Signed-off-by: Alastair D'Silva Signed-off-by: Michael Ellerman --- drivers/misc/ocxl/config.c | 712 ++++++++++++++++++++++++++++++++++++++ drivers/misc/ocxl/context.c | 230 ++++++++++++ drivers/misc/ocxl/file.c | 398 +++++++++++++++++++++ drivers/misc/ocxl/link.c | 603 ++++++++++++++++++++++++++++++++ drivers/misc/ocxl/main.c | 33 ++ drivers/misc/ocxl/ocxl_internal.h | 193 +++++++++++ drivers/misc/ocxl/pasid.c | 107 ++++++ drivers/misc/ocxl/pci.c | 585 +++++++++++++++++++++++++++++++ drivers/misc/ocxl/sysfs.c | 142 ++++++++ include/uapi/misc/ocxl.h | 40 +++ 10 files changed, 3043 insertions(+) create mode 100644 drivers/misc/ocxl/config.c create mode 100644 drivers/misc/ocxl/context.c create mode 100644 drivers/misc/ocxl/file.c create mode 100644 drivers/misc/ocxl/link.c create mode 100644 drivers/misc/ocxl/main.c create mode 100644 drivers/misc/ocxl/ocxl_internal.h create mode 100644 drivers/misc/ocxl/pasid.c create mode 100644 drivers/misc/ocxl/pci.c create mode 100644 drivers/misc/ocxl/sysfs.c create mode 100644 include/uapi/misc/ocxl.h (limited to 'include') diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c new file mode 100644 index 000000000000..ea8cca50ea06 --- /dev/null +++ b/drivers/misc/ocxl/config.c @@ -0,0 +1,712 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include +#include +#include +#include "ocxl_internal.h" + +#define EXTRACT_BIT(val, bit) (!!(val & BIT(bit))) +#define EXTRACT_BITS(val, s, e) ((val & GENMASK(e, s)) >> s) + +#define OCXL_DVSEC_AFU_IDX_MASK GENMASK(5, 0) +#define OCXL_DVSEC_ACTAG_MASK GENMASK(11, 0) +#define OCXL_DVSEC_PASID_MASK GENMASK(19, 0) +#define OCXL_DVSEC_PASID_LOG_MASK GENMASK(4, 0) + +#define OCXL_DVSEC_TEMPL_VERSION 0x0 +#define OCXL_DVSEC_TEMPL_NAME 0x4 +#define OCXL_DVSEC_TEMPL_AFU_VERSION 0x1C +#define OCXL_DVSEC_TEMPL_MMIO_GLOBAL 0x20 +#define OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ 0x28 +#define OCXL_DVSEC_TEMPL_MMIO_PP 0x30 +#define OCXL_DVSEC_TEMPL_MMIO_PP_SZ 0x38 +#define OCXL_DVSEC_TEMPL_MEM_SZ 0x3C +#define OCXL_DVSEC_TEMPL_WWID 0x40 + +#define OCXL_MAX_AFU_PER_FUNCTION 64 +#define OCXL_TEMPL_LEN 0x58 +#define OCXL_TEMPL_NAME_LEN 24 +#define OCXL_CFG_TIMEOUT 3 + +static int find_dvsec(struct pci_dev *dev, int dvsec_id) +{ + int vsec = 0; + u16 vendor, id; + + while ((vsec = pci_find_next_ext_capability(dev, vsec, + OCXL_EXT_CAP_ID_DVSEC))) { + pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET, + &vendor); + pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id); + if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id) + return vsec; + } + return 0; +} + +static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx) +{ + int vsec = 0; + u16 vendor, id; + u8 idx; + + while ((vsec = pci_find_next_ext_capability(dev, vsec, + OCXL_EXT_CAP_ID_DVSEC))) { + pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET, + &vendor); + pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id); + + if (vendor == PCI_VENDOR_ID_IBM && + id == OCXL_DVSEC_AFU_CTRL_ID) { + pci_read_config_byte(dev, + vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX, + &idx); + if (idx == afu_idx) + return vsec; + } + } + return 0; +} + +static int read_pasid(struct pci_dev *dev, struct ocxl_fn_config *fn) +{ + u16 val; + int pos; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_PASID); + if (!pos) { + /* + * PASID capability is not mandatory, but there + * shouldn't be any AFU + */ + dev_dbg(&dev->dev, "Function doesn't require any PASID\n"); + fn->max_pasid_log = -1; + goto out; + } + pci_read_config_word(dev, pos + PCI_PASID_CAP, &val); + fn->max_pasid_log = EXTRACT_BITS(val, 8, 12); + +out: + dev_dbg(&dev->dev, "PASID capability:\n"); + dev_dbg(&dev->dev, " Max PASID log = %d\n", fn->max_pasid_log); + return 0; +} + +static int read_dvsec_tl(struct pci_dev *dev, struct ocxl_fn_config *fn) +{ + int pos; + + pos = find_dvsec(dev, OCXL_DVSEC_TL_ID); + if (!pos && PCI_FUNC(dev->devfn) == 0) { + dev_err(&dev->dev, "Can't find TL DVSEC\n"); + return -ENODEV; + } + if (pos && PCI_FUNC(dev->devfn) != 0) { + dev_err(&dev->dev, "TL DVSEC is only allowed on function 0\n"); + return -ENODEV; + } + fn->dvsec_tl_pos = pos; + return 0; +} + +static int read_dvsec_function(struct pci_dev *dev, struct ocxl_fn_config *fn) +{ + int pos, afu_present; + u32 val; + + pos = find_dvsec(dev, OCXL_DVSEC_FUNC_ID); + if (!pos) { + dev_err(&dev->dev, "Can't find function DVSEC\n"); + return -ENODEV; + } + fn->dvsec_function_pos = pos; + + pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val); + afu_present = EXTRACT_BIT(val, 31); + if (!afu_present) { + fn->max_afu_index = -1; + dev_dbg(&dev->dev, "Function doesn't define any AFU\n"); + goto out; + } + fn->max_afu_index = EXTRACT_BITS(val, 24, 29); + +out: + dev_dbg(&dev->dev, "Function DVSEC:\n"); + dev_dbg(&dev->dev, " Max AFU index = %d\n", fn->max_afu_index); + return 0; +} + +static int read_dvsec_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn) +{ + int pos; + + if (fn->max_afu_index < 0) { + fn->dvsec_afu_info_pos = -1; + return 0; + } + + pos = find_dvsec(dev, OCXL_DVSEC_AFU_INFO_ID); + if (!pos) { + dev_err(&dev->dev, "Can't find AFU information DVSEC\n"); + return -ENODEV; + } + fn->dvsec_afu_info_pos = pos; + return 0; +} + +static int read_dvsec_vendor(struct pci_dev *dev) +{ + int pos; + u32 cfg, tlx, dlx; + + /* + * vendor specific DVSEC is optional + * + * It's currently only used on function 0 to specify the + * version of some logic blocks. Some older images may not + * even have it so we ignore any errors + */ + if (PCI_FUNC(dev->devfn) != 0) + return 0; + + pos = find_dvsec(dev, OCXL_DVSEC_VENDOR_ID); + if (!pos) + return 0; + + pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_CFG_VERS, &cfg); + pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_TLX_VERS, &tlx); + pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_DLX_VERS, &dlx); + + dev_dbg(&dev->dev, "Vendor specific DVSEC:\n"); + dev_dbg(&dev->dev, " CFG version = 0x%x\n", cfg); + dev_dbg(&dev->dev, " TLX version = 0x%x\n", tlx); + dev_dbg(&dev->dev, " DLX version = 0x%x\n", dlx); + return 0; +} + +static int validate_function(struct pci_dev *dev, struct ocxl_fn_config *fn) +{ + if (fn->max_pasid_log == -1 && fn->max_afu_index >= 0) { + dev_err(&dev->dev, + "AFUs are defined but no PASIDs are requested\n"); + return -EINVAL; + } + + if (fn->max_afu_index > OCXL_MAX_AFU_PER_FUNCTION) { + dev_err(&dev->dev, + "Max AFU index out of architectural limit (%d vs %d)\n", + fn->max_afu_index, OCXL_MAX_AFU_PER_FUNCTION); + return -EINVAL; + } + return 0; +} + +int ocxl_config_read_function(struct pci_dev *dev, struct ocxl_fn_config *fn) +{ + int rc; + + rc = read_pasid(dev, fn); + if (rc) { + dev_err(&dev->dev, "Invalid PASID configuration: %d\n", rc); + return -ENODEV; + } + + rc = read_dvsec_tl(dev, fn); + if (rc) { + dev_err(&dev->dev, + "Invalid Transaction Layer DVSEC configuration: %d\n", + rc); + return -ENODEV; + } + + rc = read_dvsec_function(dev, fn); + if (rc) { + dev_err(&dev->dev, + "Invalid Function DVSEC configuration: %d\n", rc); + return -ENODEV; + } + + rc = read_dvsec_afu_info(dev, fn); + if (rc) { + dev_err(&dev->dev, "Invalid AFU configuration: %d\n", rc); + return -ENODEV; + } + + rc = read_dvsec_vendor(dev); + if (rc) { + dev_err(&dev->dev, + "Invalid vendor specific DVSEC configuration: %d\n", + rc); + return -ENODEV; + } + + rc = validate_function(dev, fn); + return rc; +} + +static int read_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn, + int offset, u32 *data) +{ + u32 val; + unsigned long timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT); + int pos = fn->dvsec_afu_info_pos; + + /* Protect 'data valid' bit */ + if (EXTRACT_BIT(offset, 31)) { + dev_err(&dev->dev, "Invalid offset in AFU info DVSEC\n"); + return -EINVAL; + } + + pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, offset); + pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val); + while (!EXTRACT_BIT(val, 31)) { + if (time_after_eq(jiffies, timeout)) { + dev_err(&dev->dev, + "Timeout while reading AFU info DVSEC (offset=%d)\n", + offset); + return -EBUSY; + } + cpu_relax(); + pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val); + } + pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_DATA, data); + return 0; +} + +int ocxl_config_check_afu_index(struct pci_dev *dev, + struct ocxl_fn_config *fn, int afu_idx) +{ + u32 val; + int rc, templ_major, templ_minor, len; + + pci_write_config_word(dev, fn->dvsec_afu_info_pos, afu_idx); + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_VERSION, &val); + if (rc) + return rc; + + /* AFU index map can have holes */ + if (!val) + return 0; + + templ_major = EXTRACT_BITS(val, 8, 15); + templ_minor = EXTRACT_BITS(val, 0, 7); + dev_dbg(&dev->dev, "AFU descriptor template version %d.%d\n", + templ_major, templ_minor); + + len = EXTRACT_BITS(val, 16, 31); + if (len != OCXL_TEMPL_LEN) { + dev_warn(&dev->dev, + "Unexpected template length in AFU information (%#x)\n", + len); + } + return 1; +} + +static int read_afu_name(struct pci_dev *dev, struct ocxl_fn_config *fn, + struct ocxl_afu_config *afu) +{ + int i, rc; + u32 val, *ptr; + + BUILD_BUG_ON(OCXL_AFU_NAME_SZ < OCXL_TEMPL_NAME_LEN); + for (i = 0; i < OCXL_TEMPL_NAME_LEN; i += 4) { + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_NAME + i, &val); + if (rc) + return rc; + ptr = (u32 *) &afu->name[i]; + *ptr = val; + } + afu->name[OCXL_AFU_NAME_SZ - 1] = '\0'; /* play safe */ + return 0; +} + +static int read_afu_mmio(struct pci_dev *dev, struct ocxl_fn_config *fn, + struct ocxl_afu_config *afu) +{ + int rc; + u32 val; + + /* + * Global MMIO + */ + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL, &val); + if (rc) + return rc; + afu->global_mmio_bar = EXTRACT_BITS(val, 0, 2); + afu->global_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16; + + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL + 4, &val); + if (rc) + return rc; + afu->global_mmio_offset += (u64) val << 32; + + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ, &val); + if (rc) + return rc; + afu->global_mmio_size = val; + + /* + * Per-process MMIO + */ + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP, &val); + if (rc) + return rc; + afu->pp_mmio_bar = EXTRACT_BITS(val, 0, 2); + afu->pp_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16; + + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP + 4, &val); + if (rc) + return rc; + afu->pp_mmio_offset += (u64) val << 32; + + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP_SZ, &val); + if (rc) + return rc; + afu->pp_mmio_stride = val; + + return 0; +} + +static int read_afu_control(struct pci_dev *dev, struct ocxl_afu_config *afu) +{ + int pos; + u8 val8; + u16 val16; + + pos = find_dvsec_afu_ctrl(dev, afu->idx); + if (!pos) { + dev_err(&dev->dev, "Can't find AFU control DVSEC for AFU %d\n", + afu->idx); + return -ENODEV; + } + afu->dvsec_afu_control_pos = pos; + + pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_SUP, &val8); + afu->pasid_supported_log = EXTRACT_BITS(val8, 0, 4); + + pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP, &val16); + afu->actag_supported = EXTRACT_BITS(val16, 0, 11); + return 0; +} + +static bool char_allowed(int c) +{ + /* + * Permitted Characters : Alphanumeric, hyphen, underscore, comma + */ + if ((c >= 0x30 && c <= 0x39) /* digits */ || + (c >= 0x41 && c <= 0x5A) /* upper case */ || + (c >= 0x61 && c <= 0x7A) /* lower case */ || + c == 0 /* NULL */ || + c == 0x2D /* - */ || + c == 0x5F /* _ */ || + c == 0x2C /* , */) + return true; + return false; +} + +static int validate_afu(struct pci_dev *dev, struct ocxl_afu_config *afu) +{ + int i; + + if (!afu->name[0]) { + dev_err(&dev->dev, "Empty AFU name\n"); + return -EINVAL; + } + for (i = 0; i < OCXL_TEMPL_NAME_LEN; i++) { + if (!char_allowed(afu->name[i])) { + dev_err(&dev->dev, + "Invalid character in AFU name\n"); + return -EINVAL; + } + } + + if (afu->global_mmio_bar != 0 && + afu->global_mmio_bar != 2 && + afu->global_mmio_bar != 4) { + dev_err(&dev->dev, "Invalid global MMIO bar number\n"); + return -EINVAL; + } + if (afu->pp_mmio_bar != 0 && + afu->pp_mmio_bar != 2 && + afu->pp_mmio_bar != 4) { + dev_err(&dev->dev, "Invalid per-process MMIO bar number\n"); + return -EINVAL; + } + return 0; +} + +int ocxl_config_read_afu(struct pci_dev *dev, struct ocxl_fn_config *fn, + struct ocxl_afu_config *afu, u8 afu_idx) +{ + int rc; + u32 val32; + + /* + * First, we need to write the AFU idx for the AFU we want to + * access. + */ + WARN_ON((afu_idx & OCXL_DVSEC_AFU_IDX_MASK) != afu_idx); + afu->idx = afu_idx; + pci_write_config_byte(dev, + fn->dvsec_afu_info_pos + OCXL_DVSEC_AFU_INFO_AFU_IDX, + afu->idx); + + rc = read_afu_name(dev, fn, afu); + if (rc) + return rc; + + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_AFU_VERSION, &val32); + if (rc) + return rc; + afu->version_major = EXTRACT_BITS(val32, 24, 31); + afu->version_minor = EXTRACT_BITS(val32, 16, 23); + afu->afuc_type = EXTRACT_BITS(val32, 14, 15); + afu->afum_type = EXTRACT_BITS(val32, 12, 13); + afu->profile = EXTRACT_BITS(val32, 0, 7); + + rc = read_afu_mmio(dev, fn, afu); + if (rc) + return rc; + + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MEM_SZ, &val32); + if (rc) + return rc; + afu->log_mem_size = EXTRACT_BITS(val32, 0, 7); + + rc = read_afu_control(dev, afu); + if (rc) + return rc; + + dev_dbg(&dev->dev, "AFU configuration:\n"); + dev_dbg(&dev->dev, " name = %s\n", afu->name); + dev_dbg(&dev->dev, " version = %d.%d\n", afu->version_major, + afu->version_minor); + dev_dbg(&dev->dev, " global mmio bar = %hhu\n", afu->global_mmio_bar); + dev_dbg(&dev->dev, " global mmio offset = %#llx\n", + afu->global_mmio_offset); + dev_dbg(&dev->dev, " global mmio size = %#x\n", afu->global_mmio_size); + dev_dbg(&dev->dev, " pp mmio bar = %hhu\n", afu->pp_mmio_bar); + dev_dbg(&dev->dev, " pp mmio offset = %#llx\n", afu->pp_mmio_offset); + dev_dbg(&dev->dev, " pp mmio stride = %#x\n", afu->pp_mmio_stride); + dev_dbg(&dev->dev, " mem size (log) = %hhu\n", afu->log_mem_size); + dev_dbg(&dev->dev, " pasid supported (log) = %u\n", + afu->pasid_supported_log); + dev_dbg(&dev->dev, " actag supported = %u\n", + afu->actag_supported); + + rc = validate_afu(dev, afu); + return rc; +} + +int ocxl_config_get_actag_info(struct pci_dev *dev, u16 *base, u16 *enabled, + u16 *supported) +{ + int rc; + + /* + * This is really a simple wrapper for the kernel API, to + * avoid an external driver using ocxl as a library to call + * platform-dependent code + */ + rc = pnv_ocxl_get_actag(dev, base, enabled, supported); + if (rc) { + dev_err(&dev->dev, "Can't get actag for device: %d\n", rc); + return rc; + } + return 0; +} + +void ocxl_config_set_afu_actag(struct pci_dev *dev, int pos, int actag_base, + int actag_count) +{ + u16 val; + + val = actag_count & OCXL_DVSEC_ACTAG_MASK; + pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_EN, val); + + val = actag_base & OCXL_DVSEC_ACTAG_MASK; + pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_BASE, val); +} + +int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count) +{ + return pnv_ocxl_get_pasid_count(dev, count); +} + +void ocxl_config_set_afu_pasid(struct pci_dev *dev, int pos, int pasid_base, + u32 pasid_count_log) +{ + u8 val8; + u32 val32; + + val8 = pasid_count_log & OCXL_DVSEC_PASID_LOG_MASK; + pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_EN, val8); + + pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE, + &val32); + val32 &= ~OCXL_DVSEC_PASID_MASK; + val32 |= pasid_base & OCXL_DVSEC_PASID_MASK; + pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE, + val32); +} + +void ocxl_config_set_afu_state(struct pci_dev *dev, int pos, int enable) +{ + u8 val; + + pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, &val); + if (enable) + val |= 1; + else + val &= 0xFE; + pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, val); +} + +int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec) +{ + u32 val; + __be32 *be32ptr; + u8 timers; + int i, rc; + long recv_cap; + char *recv_rate; + + /* + * Skip on function != 0, as the TL can only be defined on 0 + */ + if (PCI_FUNC(dev->devfn) != 0) + return 0; + + recv_rate = kzalloc(PNV_OCXL_TL_RATE_BUF_SIZE, GFP_KERNEL); + if (!recv_rate) + return -ENOMEM; + /* + * The spec defines 64 templates for messages in the + * Transaction Layer (TL). + * + * The host and device each support a subset, so we need to + * configure the transmitters on each side to send only + * templates the receiver understands, at a rate the receiver + * can process. Per the spec, template 0 must be supported by + * everybody. That's the template which has been used by the + * host and device so far. + * + * The sending rate limit must be set before the template is + * enabled. + */ + + /* + * Device -> host + */ + rc = pnv_ocxl_get_tl_cap(dev, &recv_cap, recv_rate, + PNV_OCXL_TL_RATE_BUF_SIZE); + if (rc) + goto out; + + for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) { + be32ptr = (__be32 *) &recv_rate[i]; + pci_write_config_dword(dev, + tl_dvsec + OCXL_DVSEC_TL_SEND_RATE + i, + be32_to_cpu(*be32ptr)); + } + val = recv_cap >> 32; + pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP, val); + val = recv_cap & GENMASK(31, 0); + pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP + 4, val); + + /* + * Host -> device + */ + for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) { + pci_read_config_dword(dev, + tl_dvsec + OCXL_DVSEC_TL_RECV_RATE + i, + &val); + be32ptr = (__be32 *) &recv_rate[i]; + *be32ptr = cpu_to_be32(val); + } + pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP, &val); + recv_cap = (long) val << 32; + pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP + 4, &val); + recv_cap |= val; + + rc = pnv_ocxl_set_tl_conf(dev, recv_cap, __pa(recv_rate), + PNV_OCXL_TL_RATE_BUF_SIZE); + if (rc) + goto out; + + /* + * Opencapi commands needing to be retried are classified per + * the TL in 2 groups: short and long commands. + * + * The short back off timer it not used for now. It will be + * for opencapi 4.0. + * + * The long back off timer is typically used when an AFU hits + * a page fault but the NPU is already processing one. So the + * AFU needs to wait before it can resubmit. Having a value + * too low doesn't break anything, but can generate extra + * traffic on the link. + * We set it to 1.6 us for now. It's shorter than, but in the + * same order of magnitude as the time spent to process a page + * fault. + */ + timers = 0x2 << 4; /* long timer = 1.6 us */ + pci_write_config_byte(dev, tl_dvsec + OCXL_DVSEC_TL_BACKOFF_TIMERS, + timers); + + rc = 0; +out: + kfree(recv_rate); + return rc; +} + +int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control, int pasid) +{ + u32 val; + unsigned long timeout; + + pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID, + &val); + if (EXTRACT_BIT(val, 20)) { + dev_err(&dev->dev, + "Can't terminate PASID %#x, previous termination didn't complete\n", + pasid); + return -EBUSY; + } + + val &= ~OCXL_DVSEC_PASID_MASK; + val |= pasid & OCXL_DVSEC_PASID_MASK; + val |= BIT(20); + pci_write_config_dword(dev, + afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID, + val); + + timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT); + pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID, + &val); + while (EXTRACT_BIT(val, 20)) { + if (time_after_eq(jiffies, timeout)) { + dev_err(&dev->dev, + "Timeout while waiting for AFU to terminate PASID %#x\n", + pasid); + return -EBUSY; + } + cpu_relax(); + pci_read_config_dword(dev, + afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID, + &val); + } + return 0; +} + +void ocxl_config_set_actag(struct pci_dev *dev, int func_dvsec, u32 tag_first, + u32 tag_count) +{ + u32 val; + + val = (tag_first & OCXL_DVSEC_ACTAG_MASK) << 16; + val |= tag_count & OCXL_DVSEC_ACTAG_MASK; + pci_write_config_dword(dev, func_dvsec + OCXL_DVSEC_FUNC_OFF_ACTAG, + val); +} diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c new file mode 100644 index 000000000000..b34b836f924c --- /dev/null +++ b/drivers/misc/ocxl/context.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include +#include "ocxl_internal.h" + +struct ocxl_context *ocxl_context_alloc(void) +{ + return kzalloc(sizeof(struct ocxl_context), GFP_KERNEL); +} + +int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu, + struct address_space *mapping) +{ + int pasid; + + ctx->afu = afu; + mutex_lock(&afu->contexts_lock); + pasid = idr_alloc(&afu->contexts_idr, ctx, afu->pasid_base, + afu->pasid_base + afu->pasid_max, GFP_KERNEL); + if (pasid < 0) { + mutex_unlock(&afu->contexts_lock); + return pasid; + } + afu->pasid_count++; + mutex_unlock(&afu->contexts_lock); + + ctx->pasid = pasid; + ctx->status = OPENED; + mutex_init(&ctx->status_mutex); + ctx->mapping = mapping; + mutex_init(&ctx->mapping_lock); + init_waitqueue_head(&ctx->events_wq); + mutex_init(&ctx->xsl_error_lock); + /* + * Keep a reference on the AFU to make sure it's valid for the + * duration of the life of the context + */ + ocxl_afu_get(afu); + return 0; +} + +/* + * Callback for when a translation fault triggers an error + * data: a pointer to the context which triggered the fault + * addr: the address that triggered the error + * dsisr: the value of the PPC64 dsisr register + */ +static void xsl_fault_error(void *data, u64 addr, u64 dsisr) +{ + struct ocxl_context *ctx = (struct ocxl_context *) data; + + mutex_lock(&ctx->xsl_error_lock); + ctx->xsl_error.addr = addr; + ctx->xsl_error.dsisr = dsisr; + ctx->xsl_error.count++; + mutex_unlock(&ctx->xsl_error_lock); + + wake_up_all(&ctx->events_wq); +} + +int ocxl_context_attach(struct ocxl_context *ctx, u64 amr) +{ + int rc; + + mutex_lock(&ctx->status_mutex); + if (ctx->status != OPENED) { + rc = -EIO; + goto out; + } + + rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid, + current->mm->context.id, 0, amr, current->mm, + xsl_fault_error, ctx); + if (rc) + goto out; + + ctx->status = ATTACHED; +out: + mutex_unlock(&ctx->status_mutex); + return rc; +} + +static int map_pp_mmio(struct vm_area_struct *vma, unsigned long address, + u64 offset, struct ocxl_context *ctx) +{ + u64 pp_mmio_addr; + int pasid_off; + + if (offset >= ctx->afu->config.pp_mmio_stride) + return VM_FAULT_SIGBUS; + + mutex_lock(&ctx->status_mutex); + if (ctx->status != ATTACHED) { + mutex_unlock(&ctx->status_mutex); + pr_debug("%s: Context not attached, failing mmio mmap\n", + __func__); + return VM_FAULT_SIGBUS; + } + + pasid_off = ctx->pasid - ctx->afu->pasid_base; + pp_mmio_addr = ctx->afu->pp_mmio_start + + pasid_off * ctx->afu->config.pp_mmio_stride + + offset; + + vm_insert_pfn(vma, address, pp_mmio_addr >> PAGE_SHIFT); + mutex_unlock(&ctx->status_mutex); + return VM_FAULT_NOPAGE; +} + +static int ocxl_mmap_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct ocxl_context *ctx = vma->vm_file->private_data; + u64 offset; + int rc; + + offset = vmf->pgoff << PAGE_SHIFT; + pr_debug("%s: pasid %d address 0x%lx offset 0x%llx\n", __func__, + ctx->pasid, vmf->address, offset); + + rc = map_pp_mmio(vma, vmf->address, offset, ctx); + return rc; +} + +static const struct vm_operations_struct ocxl_vmops = { + .fault = ocxl_mmap_fault, +}; + +static int check_mmap_mmio(struct ocxl_context *ctx, + struct vm_area_struct *vma) +{ + if ((vma_pages(vma) + vma->vm_pgoff) > + (ctx->afu->config.pp_mmio_stride >> PAGE_SHIFT)) + return -EINVAL; + return 0; +} + +int ocxl_context_mmap(struct ocxl_context *ctx, struct vm_area_struct *vma) +{ + int rc; + + rc = check_mmap_mmio(ctx, vma); + if (rc) + return rc; + + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_ops = &ocxl_vmops; + return 0; +} + +int ocxl_context_detach(struct ocxl_context *ctx) +{ + struct pci_dev *dev; + int afu_control_pos; + enum ocxl_context_status status; + int rc; + + mutex_lock(&ctx->status_mutex); + status = ctx->status; + ctx->status = CLOSED; + mutex_unlock(&ctx->status_mutex); + if (status != ATTACHED) + return 0; + + dev = to_pci_dev(ctx->afu->fn->dev.parent); + afu_control_pos = ctx->afu->config.dvsec_afu_control_pos; + + mutex_lock(&ctx->afu->afu_control_lock); + rc = ocxl_config_terminate_pasid(dev, afu_control_pos, ctx->pasid); + mutex_unlock(&ctx->afu->afu_control_lock); + if (rc) { + /* + * If we timeout waiting for the AFU to terminate the + * pasid, then it's dangerous to clean up the Process + * Element entry in the SPA, as it may be referenced + * in the future by the AFU. In which case, we would + * checkstop because of an invalid PE access (FIR + * register 2, bit 42). So leave the PE + * defined. Caller shouldn't free the context so that + * PASID remains allocated. + * + * A link reset will be required to cleanup the AFU + * and the SPA. + */ + if (rc == -EBUSY) + return rc; + } + rc = ocxl_link_remove_pe(ctx->afu->fn->link, ctx->pasid); + if (rc) { + dev_warn(&ctx->afu->dev, + "Couldn't remove PE entry cleanly: %d\n", rc); + } + return 0; +} + +void ocxl_context_detach_all(struct ocxl_afu *afu) +{ + struct ocxl_context *ctx; + int tmp; + + mutex_lock(&afu->contexts_lock); + idr_for_each_entry(&afu->contexts_idr, ctx, tmp) { + ocxl_context_detach(ctx); + /* + * We are force detaching - remove any active mmio + * mappings so userspace cannot interfere with the + * card if it comes back. Easiest way to exercise + * this is to unbind and rebind the driver via sysfs + * while it is in use. + */ + mutex_lock(&ctx->mapping_lock); + if (ctx->mapping) + unmap_mapping_range(ctx->mapping, 0, 0, 1); + mutex_unlock(&ctx->mapping_lock); + } + mutex_unlock(&afu->contexts_lock); +} + +void ocxl_context_free(struct ocxl_context *ctx) +{ + mutex_lock(&ctx->afu->contexts_lock); + ctx->afu->pasid_count--; + idr_remove(&ctx->afu->contexts_idr, ctx->pasid); + mutex_unlock(&ctx->afu->contexts_lock); + + /* reference to the AFU taken in ocxl_context_init */ + ocxl_afu_put(ctx->afu); + kfree(ctx); +} diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c new file mode 100644 index 000000000000..6f0befda6a8a --- /dev/null +++ b/drivers/misc/ocxl/file.c @@ -0,0 +1,398 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include +#include +#include +#include +#include +#include "ocxl_internal.h" + + +#define OCXL_NUM_MINORS 256 /* Total to reserve */ + +static dev_t ocxl_dev; +static struct class *ocxl_class; +static struct mutex minors_idr_lock; +static struct idr minors_idr; + +static struct ocxl_afu *find_and_get_afu(dev_t devno) +{ + struct ocxl_afu *afu; + int afu_minor; + + afu_minor = MINOR(devno); + /* + * We don't declare an RCU critical section here, as our AFU + * is protected by a reference counter on the device. By the time the + * minor number of a device is removed from the idr, the ref count of + * the device is already at 0, so no user API will access that AFU and + * this function can't return it. + */ + afu = idr_find(&minors_idr, afu_minor); + if (afu) + ocxl_afu_get(afu); + return afu; +} + +static int allocate_afu_minor(struct ocxl_afu *afu) +{ + int minor; + + mutex_lock(&minors_idr_lock); + minor = idr_alloc(&minors_idr, afu, 0, OCXL_NUM_MINORS, GFP_KERNEL); + mutex_unlock(&minors_idr_lock); + return minor; +} + +static void free_afu_minor(struct ocxl_afu *afu) +{ + mutex_lock(&minors_idr_lock); + idr_remove(&minors_idr, MINOR(afu->dev.devt)); + mutex_unlock(&minors_idr_lock); +} + +static int afu_open(struct inode *inode, struct file *file) +{ + struct ocxl_afu *afu; + struct ocxl_context *ctx; + int rc; + + pr_debug("%s for device %x\n", __func__, inode->i_rdev); + + afu = find_and_get_afu(inode->i_rdev); + if (!afu) + return -ENODEV; + + ctx = ocxl_context_alloc(); + if (!ctx) { + rc = -ENOMEM; + goto put_afu; + } + + rc = ocxl_context_init(ctx, afu, inode->i_mapping); + if (rc) + goto put_afu; + file->private_data = ctx; + ocxl_afu_put(afu); + return 0; + +put_afu: + ocxl_afu_put(afu); + return rc; +} + +static long afu_ioctl_attach(struct ocxl_context *ctx, + struct ocxl_ioctl_attach __user *uarg) +{ + struct ocxl_ioctl_attach arg; + u64 amr = 0; + int rc; + + pr_debug("%s for context %d\n", __func__, ctx->pasid); + + if (copy_from_user(&arg, uarg, sizeof(arg))) + return -EFAULT; + + /* Make sure reserved fields are not set for forward compatibility */ + if (arg.reserved1 || arg.reserved2 || arg.reserved3) + return -EINVAL; + + amr = arg.amr & mfspr(SPRN_UAMOR); + rc = ocxl_context_attach(ctx, amr); + return rc; +} + +#define CMD_STR(x) (x == OCXL_IOCTL_ATTACH ? "ATTACH" : \ + "UNKNOWN") + +static long afu_ioctl(struct file *file, unsigned int cmd, + unsigned long args) +{ + struct ocxl_context *ctx = file->private_data; + long rc; + + pr_debug("%s for context %d, command %s\n", __func__, ctx->pasid, + CMD_STR(cmd)); + + if (ctx->status == CLOSED) + return -EIO; + + switch (cmd) { + case OCXL_IOCTL_ATTACH: + rc = afu_ioctl_attach(ctx, + (struct ocxl_ioctl_attach __user *) args); + break; + + default: + rc = -EINVAL; + } + return rc; +} + +static long afu_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long args) +{ + return afu_ioctl(file, cmd, args); +} + +static int afu_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct ocxl_context *ctx = file->private_data; + + pr_debug("%s for context %d\n", __func__, ctx->pasid); + return ocxl_context_mmap(ctx, vma); +} + +static bool has_xsl_error(struct ocxl_context *ctx) +{ + bool ret; + + mutex_lock(&ctx->xsl_error_lock); + ret = !!ctx->xsl_error.addr; + mutex_unlock(&ctx->xsl_error_lock); + + return ret; +} + +/* + * Are there any events pending on the AFU + * ctx: The AFU context + * Returns: true if there are events pending + */ +static bool afu_events_pending(struct ocxl_context *ctx) +{ + if (has_xsl_error(ctx)) + return true; + return false; +} + +static unsigned int afu_poll(struct file *file, struct poll_table_struct *wait) +{ + struct ocxl_context *ctx = file->private_data; + unsigned int mask = 0; + bool closed; + + pr_debug("%s for context %d\n", __func__, ctx->pasid); + + poll_wait(file, &ctx->events_wq, wait); + + mutex_lock(&ctx->status_mutex); + closed = (ctx->status == CLOSED); + mutex_unlock(&ctx->status_mutex); + + if (afu_events_pending(ctx)) + mask = POLLIN | POLLRDNORM; + else if (closed) + mask = POLLERR; + + return mask; +} + +/* + * Populate the supplied buffer with a single XSL error + * ctx: The AFU context to report the error from + * header: the event header to populate + * buf: The buffer to write the body into (should be at least + * AFU_EVENT_BODY_XSL_ERROR_SIZE) + * Return: the amount of buffer that was populated + */ +static ssize_t append_xsl_error(struct ocxl_context *ctx, + struct ocxl_kernel_event_header *header, + char __user *buf) +{ + struct ocxl_kernel_event_xsl_fault_error body; + + memset(&body, 0, sizeof(body)); + + mutex_lock(&ctx->xsl_error_lock); + if (!ctx->xsl_error.addr) { + mutex_unlock(&ctx->xsl_error_lock); + return 0; + } + + body.addr = ctx->xsl_error.addr; + body.dsisr = ctx->xsl_error.dsisr; + body.count = ctx->xsl_error.count; + + ctx->xsl_error.addr = 0; + ctx->xsl_error.dsisr = 0; + ctx->xsl_error.count = 0; + + mutex_unlock(&ctx->xsl_error_lock); + + header->type = OCXL_AFU_EVENT_XSL_FAULT_ERROR; + + if (copy_to_user(buf, &body, sizeof(body))) + return -EFAULT; + + return sizeof(body); +} + +#define AFU_EVENT_BODY_MAX_SIZE sizeof(struct ocxl_kernel_event_xsl_fault_error) + +/* + * Reports events on the AFU + * Format: + * Header (struct ocxl_kernel_event_header) + * Body (struct ocxl_kernel_event_*) + * Header... + */ +static ssize_t afu_read(struct file *file, char __user *buf, size_t count, + loff_t *off) +{ + struct ocxl_context *ctx = file->private_data; + struct ocxl_kernel_event_header header; + ssize_t rc; + size_t used = 0; + DEFINE_WAIT(event_wait); + + memset(&header, 0, sizeof(header)); + + /* Require offset to be 0 */ + if (*off != 0) + return -EINVAL; + + if (count < (sizeof(struct ocxl_kernel_event_header) + + AFU_EVENT_BODY_MAX_SIZE)) + return -EINVAL; + + for (;;) { + prepare_to_wait(&ctx->events_wq, &event_wait, + TASK_INTERRUPTIBLE); + + if (afu_events_pending(ctx)) + break; + + if (ctx->status == CLOSED) + break; + + if (file->f_flags & O_NONBLOCK) { + finish_wait(&ctx->events_wq, &event_wait); + return -EAGAIN; + } + + if (signal_pending(current)) { + finish_wait(&ctx->events_wq, &event_wait); + return -ERESTARTSYS; + } + + schedule(); + } + + finish_wait(&ctx->events_wq, &event_wait); + + if (has_xsl_error(ctx)) { + used = append_xsl_error(ctx, &header, buf + sizeof(header)); + if (used < 0) + return used; + } + + if (!afu_events_pending(ctx)) + header.flags |= OCXL_KERNEL_EVENT_FLAG_LAST; + + if (copy_to_user(buf, &header, sizeof(header))) + return -EFAULT; + + used += sizeof(header); + + rc = (ssize_t) used; + return rc; +} + +static int afu_release(struct inode *inode, struct file *file) +{ + struct ocxl_context *ctx = file->private_data; + int rc; + + pr_debug("%s for device %x\n", __func__, inode->i_rdev); + rc = ocxl_context_detach(ctx); + mutex_lock(&ctx->mapping_lock); + ctx->mapping = NULL; + mutex_unlock(&ctx->mapping_lock); + wake_up_all(&ctx->events_wq); + if (rc != -EBUSY) + ocxl_context_free(ctx); + return 0; +} + +static const struct file_operations ocxl_afu_fops = { + .owner = THIS_MODULE, + .open = afu_open, + .unlocked_ioctl = afu_ioctl, + .compat_ioctl = afu_compat_ioctl, + .mmap = afu_mmap, + .poll = afu_poll, + .read = afu_read, + .release = afu_release, +}; + +int ocxl_create_cdev(struct ocxl_afu *afu) +{ + int rc; + + cdev_init(&afu->cdev, &ocxl_afu_fops); + rc = cdev_add(&afu->cdev, afu->dev.devt, 1); + if (rc) { + dev_err(&afu->dev, "Unable to add afu char device: %d\n", rc); + return rc; + } + return 0; +} + +void ocxl_destroy_cdev(struct ocxl_afu *afu) +{ + cdev_del(&afu->cdev); +} + +int ocxl_register_afu(struct ocxl_afu *afu) +{ + int minor; + + minor = allocate_afu_minor(afu); + if (minor < 0) + return minor; + afu->dev.devt = MKDEV(MAJOR(ocxl_dev), minor); + afu->dev.class = ocxl_class; + return device_register(&afu->dev); +} + +void ocxl_unregister_afu(struct ocxl_afu *afu) +{ + free_afu_minor(afu); +} + +static char *ocxl_devnode(struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "ocxl/%s", dev_name(dev)); +} + +int ocxl_file_init(void) +{ + int rc; + + mutex_init(&minors_idr_lock); + idr_init(&minors_idr); + + rc = alloc_chrdev_region(&ocxl_dev, 0, OCXL_NUM_MINORS, "ocxl"); + if (rc) { + pr_err("Unable to allocate ocxl major number: %d\n", rc); + return rc; + } + + ocxl_class = class_create(THIS_MODULE, "ocxl"); + if (IS_ERR(ocxl_class)) { + pr_err("Unable to create ocxl class\n"); + unregister_chrdev_region(ocxl_dev, OCXL_NUM_MINORS); + return PTR_ERR(ocxl_class); + } + + ocxl_class->devnode = ocxl_devnode; + return 0; +} + +void ocxl_file_exit(void) +{ + class_destroy(ocxl_class); + unregister_chrdev_region(ocxl_dev, OCXL_NUM_MINORS); + idr_destroy(&minors_idr); +} diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c new file mode 100644 index 000000000000..64d7a98c904a --- /dev/null +++ b/drivers/misc/ocxl/link.c @@ -0,0 +1,603 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include +#include +#include +#include +#include +#include "ocxl_internal.h" + + +#define SPA_PASID_BITS 15 +#define SPA_PASID_MAX ((1 << SPA_PASID_BITS) - 1) +#define SPA_PE_MASK SPA_PASID_MAX +#define SPA_SPA_SIZE_LOG 22 /* Each SPA is 4 Mb */ + +#define SPA_CFG_SF (1ull << (63-0)) +#define SPA_CFG_TA (1ull << (63-1)) +#define SPA_CFG_HV (1ull << (63-3)) +#define SPA_CFG_UV (1ull << (63-4)) +#define SPA_CFG_XLAT_hpt (0ull << (63-6)) /* Hashed page table (HPT) mode */ +#define SPA_CFG_XLAT_roh (2ull << (63-6)) /* Radix on HPT mode */ +#define SPA_CFG_XLAT_ror (3ull << (63-6)) /* Radix on Radix mode */ +#define SPA_CFG_PR (1ull << (63-49)) +#define SPA_CFG_TC (1ull << (63-54)) +#define SPA_CFG_DR (1ull << (63-59)) + +#define SPA_XSL_TF (1ull << (63-3)) /* Translation fault */ +#define SPA_XSL_S (1ull << (63-38)) /* Store operation */ + +#define SPA_PE_VALID 0x80000000 + + +struct pe_data { + struct mm_struct *mm; + /* callback to trigger when a translation fault occurs */ + void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr); + /* opaque pointer to be passed to the above callback */ + void *xsl_err_data; + struct rcu_head rcu; +}; + +struct spa { + struct ocxl_process_element *spa_mem; + int spa_order; + struct mutex spa_lock; + struct radix_tree_root pe_tree; /* Maps PE handles to pe_data */ + char *irq_name; + int virq; + void __iomem *reg_dsisr; + void __iomem *reg_dar; + void __iomem *reg_tfc; + void __iomem *reg_pe_handle; + /* + * The following field are used by the memory fault + * interrupt handler. We can only have one interrupt at a + * time. The NPU won't raise another interrupt until the + * previous one has been ack'd by writing to the TFC register + */ + struct xsl_fault { + struct work_struct fault_work; + u64 pe; + u64 dsisr; + u64 dar; + struct pe_data pe_data; + } xsl_fault; +}; + +/* + * A opencapi link can be used be by several PCI functions. We have + * one link per device slot. + * + * A linked list of opencapi links should suffice, as there's a + * limited number of opencapi slots on a system and lookup is only + * done when the device is probed + */ +struct link { + struct list_head list; + struct kref ref; + int domain; + int bus; + int dev; + atomic_t irq_available; + struct spa *spa; + void *platform_data; +}; +static struct list_head links_list = LIST_HEAD_INIT(links_list); +static DEFINE_MUTEX(links_list_lock); + +enum xsl_response { + CONTINUE, + ADDRESS_ERROR, + RESTART, +}; + + +static void read_irq(struct spa *spa, u64 *dsisr, u64 *dar, u64 *pe) +{ + u64 reg; + + *dsisr = in_be64(spa->reg_dsisr); + *dar = in_be64(spa->reg_dar); + reg = in_be64(spa->reg_pe_handle); + *pe = reg & SPA_PE_MASK; +} + +static void ack_irq(struct spa *spa, enum xsl_response r) +{ + u64 reg = 0; + + /* continue is not supported */ + if (r == RESTART) + reg = PPC_BIT(31); + else if (r == ADDRESS_ERROR) + reg = PPC_BIT(30); + else + WARN(1, "Invalid irq response %d\n", r); + + if (reg) + out_be64(spa->reg_tfc, reg); +} + +static void xsl_fault_handler_bh(struct work_struct *fault_work) +{ + unsigned int flt = 0; + unsigned long access, flags, inv_flags = 0; + enum xsl_response r; + struct xsl_fault *fault = container_of(fault_work, struct xsl_fault, + fault_work); + struct spa *spa = container_of(fault, struct spa, xsl_fault); + + int rc; + + /* + * We need to release a reference on the mm whenever exiting this + * function (taken in the memory fault interrupt handler) + */ + rc = copro_handle_mm_fault(fault->pe_data.mm, fault->dar, fault->dsisr, + &flt); + if (rc) { + pr_debug("copro_handle_mm_fault failed: %d\n", rc); + if (fault->pe_data.xsl_err_cb) { + fault->pe_data.xsl_err_cb( + fault->pe_data.xsl_err_data, + fault->dar, fault->dsisr); + } + r = ADDRESS_ERROR; + goto ack; + } + + if (!radix_enabled()) { + /* + * update_mmu_cache() will not have loaded the hash + * since current->trap is not a 0x400 or 0x300, so + * just call hash_page_mm() here. + */ + access = _PAGE_PRESENT | _PAGE_READ; + if (fault->dsisr & SPA_XSL_S) + access |= _PAGE_WRITE; + + if (REGION_ID(fault->dar) != USER_REGION_ID) + access |= _PAGE_PRIVILEGED; + + local_irq_save(flags); + hash_page_mm(fault->pe_data.mm, fault->dar, access, 0x300, + inv_flags); + local_irq_restore(flags); + } + r = RESTART; +ack: + mmdrop(fault->pe_data.mm); + ack_irq(spa, r); +} + +static irqreturn_t xsl_fault_handler(int irq, void *data) +{ + struct link *link = (struct link *) data; + struct spa *spa = link->spa; + u64 dsisr, dar, pe_handle; + struct pe_data *pe_data; + struct ocxl_process_element *pe; + int lpid, pid, tid; + + read_irq(spa, &dsisr, &dar, &pe_handle); + + WARN_ON(pe_handle > SPA_PE_MASK); + pe = spa->spa_mem + pe_handle; + lpid = be32_to_cpu(pe->lpid); + pid = be32_to_cpu(pe->pid); + tid = be32_to_cpu(pe->tid); + /* We could be reading all null values here if the PE is being + * removed while an interrupt kicks in. It's not supposed to + * happen if the driver notified the AFU to terminate the + * PASID, and the AFU waited for pending operations before + * acknowledging. But even if it happens, we won't find a + * memory context below and fail silently, so it should be ok. + */ + if (!(dsisr & SPA_XSL_TF)) { + WARN(1, "Invalid xsl interrupt fault register %#llx\n", dsisr); + ack_irq(spa, ADDRESS_ERROR); + return IRQ_HANDLED; + } + + rcu_read_lock(); + pe_data = radix_tree_lookup(&spa->pe_tree, pe_handle); + if (!pe_data) { + /* + * Could only happen if the driver didn't notify the + * AFU about PASID termination before removing the PE, + * or the AFU didn't wait for all memory access to + * have completed. + * + * Either way, we fail early, but we shouldn't log an + * error message, as it is a valid (if unexpected) + * scenario + */ + rcu_read_unlock(); + pr_debug("Unknown mm context for xsl interrupt\n"); + ack_irq(spa, ADDRESS_ERROR); + return IRQ_HANDLED; + } + WARN_ON(pe_data->mm->context.id != pid); + + spa->xsl_fault.pe = pe_handle; + spa->xsl_fault.dar = dar; + spa->xsl_fault.dsisr = dsisr; + spa->xsl_fault.pe_data = *pe_data; + mmgrab(pe_data->mm); /* mm count is released by bottom half */ + + rcu_read_unlock(); + schedule_work(&spa->xsl_fault.fault_work); + return IRQ_HANDLED; +} + +static void unmap_irq_registers(struct spa *spa) +{ + pnv_ocxl_unmap_xsl_regs(spa->reg_dsisr, spa->reg_dar, spa->reg_tfc, + spa->reg_pe_handle); +} + +static int map_irq_registers(struct pci_dev *dev, struct spa *spa) +{ + return pnv_ocxl_map_xsl_regs(dev, &spa->reg_dsisr, &spa->reg_dar, + &spa->reg_tfc, &spa->reg_pe_handle); +} + +static int setup_xsl_irq(struct pci_dev *dev, struct link *link) +{ + struct spa *spa = link->spa; + int rc; + int hwirq; + + rc = pnv_ocxl_get_xsl_irq(dev, &hwirq); + if (rc) + return rc; + + rc = map_irq_registers(dev, spa); + if (rc) + return rc; + + spa->irq_name = kasprintf(GFP_KERNEL, "ocxl-xsl-%x-%x-%x", + link->domain, link->bus, link->dev); + if (!spa->irq_name) { + unmap_irq_registers(spa); + dev_err(&dev->dev, "Can't allocate name for xsl interrupt\n"); + return -ENOMEM; + } + /* + * At some point, we'll need to look into allowing a higher + * number of interrupts. Could we have an IRQ domain per link? + */ + spa->virq = irq_create_mapping(NULL, hwirq); + if (!spa->virq) { + kfree(spa->irq_name); + unmap_irq_registers(spa); + dev_err(&dev->dev, + "irq_create_mapping failed for translation interrupt\n"); + return -EINVAL; + } + + dev_dbg(&dev->dev, "hwirq %d mapped to virq %d\n", hwirq, spa->virq); + + rc = request_irq(spa->virq, xsl_fault_handler, 0, spa->irq_name, + link); + if (rc) { + irq_dispose_mapping(spa->virq); + kfree(spa->irq_name); + unmap_irq_registers(spa); + dev_err(&dev->dev, + "request_irq failed for translation interrupt: %d\n", + rc); + return -EINVAL; + } + return 0; +} + +static void release_xsl_irq(struct link *link) +{ + struct spa *spa = link->spa; + + if (spa->virq) { + free_irq(spa->virq, link); + irq_dispose_mapping(spa->virq); + } + kfree(spa->irq_name); + unmap_irq_registers(spa); +} + +static int alloc_spa(struct pci_dev *dev, struct link *link) +{ + struct spa *spa; + + spa = kzalloc(sizeof(struct spa), GFP_KERNEL); + if (!spa) + return -ENOMEM; + + mutex_init(&spa->spa_lock); + INIT_RADIX_TREE(&spa->pe_tree, GFP_KERNEL); + INIT_WORK(&spa->xsl_fault.fault_work, xsl_fault_handler_bh); + + spa->spa_order = SPA_SPA_SIZE_LOG - PAGE_SHIFT; + spa->spa_mem = (struct ocxl_process_element *) + __get_free_pages(GFP_KERNEL | __GFP_ZERO, spa->spa_order); + if (!spa->spa_mem) { + dev_err(&dev->dev, "Can't allocate Shared Process Area\n"); + kfree(spa); + return -ENOMEM; + } + pr_debug("Allocated SPA for %x:%x:%x at %p\n", link->domain, link->bus, + link->dev, spa->spa_mem); + + link->spa = spa; + return 0; +} + +static void free_spa(struct link *link) +{ + struct spa *spa = link->spa; + + pr_debug("Freeing SPA for %x:%x:%x\n", link->domain, link->bus, + link->dev); + + if (spa && spa->spa_mem) { + free_pages((unsigned long) spa->spa_mem, spa->spa_order); + kfree(spa); + link->spa = NULL; + } +} + +static int alloc_link(struct pci_dev *dev, int PE_mask, struct link **out_link) +{ + struct link *link; + int rc; + + link = kzalloc(sizeof(struct link), GFP_KERNEL); + if (!link) + return -ENOMEM; + + kref_init(&link->ref); + link->domain = pci_domain_nr(dev->bus); + link->bus = dev->bus->number; + link->dev = PCI_SLOT(dev->devfn); + atomic_set(&link->irq_available, MAX_IRQ_PER_LINK); + + rc = alloc_spa(dev, link); + if (rc) + goto err_free; + + rc = setup_xsl_irq(dev, link); + if (rc) + goto err_spa; + + /* platform specific hook */ + rc = pnv_ocxl_spa_setup(dev, link->spa->spa_mem, PE_mask, + &link->platform_data); + if (rc) + goto err_xsl_irq; + + *out_link = link; + return 0; + +err_xsl_irq: + release_xsl_irq(link); +err_spa: + free_spa(link); +err_free: + kfree(link); + return rc; +} + +static void free_link(struct link *link) +{ + release_xsl_irq(link); + free_spa(link); + kfree(link); +} + +int ocxl_link_setup(struct pci_dev *dev, int PE_mask, void **link_handle) +{ + int rc = 0; + struct link *link; + + mutex_lock(&links_list_lock); + list_for_each_entry(link, &links_list, list) { + /* The functions of a device all share the same link */ + if (link->domain == pci_domain_nr(dev->bus) && + link->bus == dev->bus->number && + link->dev == PCI_SLOT(dev->devfn)) { + kref_get(&link->ref); + *link_handle = link; + goto unlock; + } + } + rc = alloc_link(dev, PE_mask, &link); + if (rc) + goto unlock; + + list_add(&link->list, &links_list); + *link_handle = link; +unlock: + mutex_unlock(&links_list_lock); + return rc; +} + +static void release_xsl(struct kref *ref) +{ + struct link *link = container_of(ref, struct link, ref); + + list_del(&link->list); + /* call platform code before releasing data */ + pnv_ocxl_spa_release(link->platform_data); + free_link(link); +} + +void ocxl_link_release(struct pci_dev *dev, void *link_handle) +{ + struct link *link = (struct link *) link_handle; + + mutex_lock(&links_list_lock); + kref_put(&link->ref, release_xsl); + mutex_unlock(&links_list_lock); +} + +static u64 calculate_cfg_state(bool kernel) +{ + u64 state; + + state = SPA_CFG_DR; + if (mfspr(SPRN_LPCR) & LPCR_TC) + state |= SPA_CFG_TC; + if (radix_enabled()) + state |= SPA_CFG_XLAT_ror; + else + state |= SPA_CFG_XLAT_hpt; + state |= SPA_CFG_HV; + if (kernel) { + if (mfmsr() & MSR_SF) + state |= SPA_CFG_SF; + } else { + state |= SPA_CFG_PR; + if (!test_tsk_thread_flag(current, TIF_32BIT)) + state |= SPA_CFG_SF; + } + return state; +} + +int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, + u64 amr, struct mm_struct *mm, + void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr), + void *xsl_err_data) +{ + struct link *link = (struct link *) link_handle; + struct spa *spa = link->spa; + struct ocxl_process_element *pe; + int pe_handle, rc = 0; + struct pe_data *pe_data; + + BUILD_BUG_ON(sizeof(struct ocxl_process_element) != 128); + if (pasid > SPA_PASID_MAX) + return -EINVAL; + + mutex_lock(&spa->spa_lock); + pe_handle = pasid & SPA_PE_MASK; + pe = spa->spa_mem + pe_handle; + + if (pe->software_state) { + rc = -EBUSY; + goto unlock; + } + + pe_data = kmalloc(sizeof(*pe_data), GFP_KERNEL); + if (!pe_data) { + rc = -ENOMEM; + goto unlock; + } + + pe_data->mm = mm; + pe_data->xsl_err_cb = xsl_err_cb; + pe_data->xsl_err_data = xsl_err_data; + + memset(pe, 0, sizeof(struct ocxl_process_element)); + pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0)); + pe->lpid = cpu_to_be32(mfspr(SPRN_LPID)); + pe->pid = cpu_to_be32(pidr); + pe->tid = cpu_to_be32(tidr); + pe->amr = cpu_to_be64(amr); + pe->software_state = cpu_to_be32(SPA_PE_VALID); + + mm_context_add_copro(mm); + /* + * Barrier is to make sure PE is visible in the SPA before it + * is used by the device. It also helps with the global TLBI + * invalidation + */ + mb(); + radix_tree_insert(&spa->pe_tree, pe_handle, pe_data); + + /* + * The mm must stay valid for as long as the device uses it. We + * lower the count when the context is removed from the SPA. + * + * We grab mm_count (and not mm_users), as we don't want to + * end up in a circular dependency if a process mmaps its + * mmio, therefore incrementing the file ref count when + * calling mmap(), and forgets to unmap before exiting. In + * that scenario, when the kernel handles the death of the + * process, the file is not cleaned because unmap was not + * called, and the mm wouldn't be freed because we would still + * have a reference on mm_users. Incrementing mm_count solves + * the problem. + */ + mmgrab(mm); +unlock: + mutex_unlock(&spa->spa_lock); + return rc; +} + +int ocxl_link_remove_pe(void *link_handle, int pasid) +{ + struct link *link = (struct link *) link_handle; + struct spa *spa = link->spa; + struct ocxl_process_element *pe; + struct pe_data *pe_data; + int pe_handle, rc; + + if (pasid > SPA_PASID_MAX) + return -EINVAL; + + /* + * About synchronization with our memory fault handler: + * + * Before removing the PE, the driver is supposed to have + * notified the AFU, which should have cleaned up and make + * sure the PASID is no longer in use, including pending + * interrupts. However, there's no way to be sure... + * + * We clear the PE and remove the context from our radix + * tree. From that point on, any new interrupt for that + * context will fail silently, which is ok. As mentioned + * above, that's not expected, but it could happen if the + * driver or AFU didn't do the right thing. + * + * There could still be a bottom half running, but we don't + * need to wait/flush, as it is managing a reference count on + * the mm it reads from the radix tree. + */ + pe_handle = pasid & SPA_PE_MASK; + pe = spa->spa_mem + pe_handle; + + mutex_lock(&spa->spa_lock); + + if (!(be32_to_cpu(pe->software_state) & SPA_PE_VALID)) { + rc = -EINVAL; + goto unlock; + } + + memset(pe, 0, sizeof(struct ocxl_process_element)); + /* + * The barrier makes sure the PE is removed from the SPA + * before we clear the NPU context cache below, so that the + * old PE cannot be reloaded erroneously. + */ + mb(); + + /* + * hook to platform code + * On powerpc, the entry needs to be cleared from the context + * cache of the NPU. + */ + rc = pnv_ocxl_spa_remove_pe(link->platform_data, pe_handle); + WARN_ON(rc); + + pe_data = radix_tree_delete(&spa->pe_tree, pe_handle); + if (!pe_data) { + WARN(1, "Couldn't find pe data when removing PE\n"); + } else { + mm_context_remove_copro(pe_data->mm); + mmdrop(pe_data->mm); + kfree_rcu(pe_data, rcu); + } +unlock: + mutex_unlock(&spa->spa_lock); + return rc; +} diff --git a/drivers/misc/ocxl/main.c b/drivers/misc/ocxl/main.c new file mode 100644 index 000000000000..7210d9e059be --- /dev/null +++ b/drivers/misc/ocxl/main.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include +#include +#include "ocxl_internal.h" + +static int __init init_ocxl(void) +{ + int rc = 0; + + rc = ocxl_file_init(); + if (rc) + return rc; + + rc = pci_register_driver(&ocxl_pci_driver); + if (rc) { + ocxl_file_exit(); + return rc; + } + return 0; +} + +static void exit_ocxl(void) +{ + pci_unregister_driver(&ocxl_pci_driver); + ocxl_file_exit(); +} + +module_init(init_ocxl); +module_exit(exit_ocxl); + +MODULE_DESCRIPTION("Open Coherent Accelerator"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h new file mode 100644 index 000000000000..04fc160c7bd5 --- /dev/null +++ b/drivers/misc/ocxl/ocxl_internal.h @@ -0,0 +1,193 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#ifndef _OCXL_INTERNAL_H_ +#define _OCXL_INTERNAL_H_ + +#include +#include +#include + +#define OCXL_AFU_NAME_SZ (24+1) /* add 1 for NULL termination */ +#define MAX_IRQ_PER_LINK 2000 +#define MAX_IRQ_PER_CONTEXT MAX_IRQ_PER_LINK + +#define to_ocxl_function(d) container_of(d, struct ocxl_fn, dev) +#define to_ocxl_afu(d) container_of(d, struct ocxl_afu, dev) + +extern struct pci_driver ocxl_pci_driver; + +/* + * The following 2 structures are a fairly generic way of representing + * the configuration data for a function and AFU, as read from the + * configuration space. + */ +struct ocxl_afu_config { + u8 idx; + int dvsec_afu_control_pos; + char name[OCXL_AFU_NAME_SZ]; + u8 version_major; + u8 version_minor; + u8 afuc_type; + u8 afum_type; + u8 profile; + u8 global_mmio_bar; + u64 global_mmio_offset; + u32 global_mmio_size; + u8 pp_mmio_bar; + u64 pp_mmio_offset; + u32 pp_mmio_stride; + u8 log_mem_size; + u8 pasid_supported_log; + u16 actag_supported; +}; + +struct ocxl_fn_config { + int dvsec_tl_pos; + int dvsec_function_pos; + int dvsec_afu_info_pos; + s8 max_pasid_log; + s8 max_afu_index; +}; + +struct ocxl_fn { + struct device dev; + int bar_used[3]; + struct ocxl_fn_config config; + struct list_head afu_list; + int pasid_base; + int actag_base; + int actag_enabled; + int actag_supported; + struct list_head pasid_list; + struct list_head actag_list; + void *link; +}; + +struct ocxl_afu { + struct ocxl_fn *fn; + struct list_head list; + struct device dev; + struct cdev cdev; + struct ocxl_afu_config config; + int pasid_base; + int pasid_count; /* opened contexts */ + int pasid_max; /* maximum number of contexts */ + int actag_base; + int actag_enabled; + struct mutex contexts_lock; + struct idr contexts_idr; + struct mutex afu_control_lock; + u64 global_mmio_start; + u64 irq_base_offset; + void __iomem *global_mmio_ptr; + u64 pp_mmio_start; + struct bin_attribute attr_global_mmio; +}; + +enum ocxl_context_status { + CLOSED, + OPENED, + ATTACHED, +}; + +// Contains metadata about a translation fault +struct ocxl_xsl_error { + u64 addr; // The address that triggered the fault + u64 dsisr; // the value of the dsisr register + u64 count; // The number of times this fault has been triggered +}; + +struct ocxl_context { + struct ocxl_afu *afu; + int pasid; + struct mutex status_mutex; + enum ocxl_context_status status; + struct address_space *mapping; + struct mutex mapping_lock; + wait_queue_head_t events_wq; + struct mutex xsl_error_lock; + struct ocxl_xsl_error xsl_error; + struct mutex irq_lock; + struct idr irq_idr; +}; + +struct ocxl_process_element { + __be64 config_state; + __be32 reserved1[11]; + __be32 lpid; + __be32 tid; + __be32 pid; + __be32 reserved2[10]; + __be64 amr; + __be32 reserved3[3]; + __be32 software_state; +}; + + +extern struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu); +extern void ocxl_afu_put(struct ocxl_afu *afu); + +extern int ocxl_create_cdev(struct ocxl_afu *afu); +extern void ocxl_destroy_cdev(struct ocxl_afu *afu); +extern int ocxl_register_afu(struct ocxl_afu *afu); +extern void ocxl_unregister_afu(struct ocxl_afu *afu); + +extern int ocxl_file_init(void); +extern void ocxl_file_exit(void); + +extern int ocxl_config_read_function(struct pci_dev *dev, + struct ocxl_fn_config *fn); + +extern int ocxl_config_check_afu_index(struct pci_dev *dev, + struct ocxl_fn_config *fn, int afu_idx); +extern int ocxl_config_read_afu(struct pci_dev *dev, + struct ocxl_fn_config *fn, + struct ocxl_afu_config *afu, + u8 afu_idx); +extern int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count); +extern void ocxl_config_set_afu_pasid(struct pci_dev *dev, + int afu_control, + int pasid_base, u32 pasid_count_log); +extern int ocxl_config_get_actag_info(struct pci_dev *dev, + u16 *base, u16 *enabled, u16 *supported); +extern void ocxl_config_set_actag(struct pci_dev *dev, int func_dvsec, + u32 tag_first, u32 tag_count); +extern void ocxl_config_set_afu_actag(struct pci_dev *dev, int afu_control, + int actag_base, int actag_count); +extern void ocxl_config_set_afu_state(struct pci_dev *dev, int afu_control, + int enable); +extern int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec); +extern int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control, + int pasid); + +extern int ocxl_link_setup(struct pci_dev *dev, int PE_mask, + void **link_handle); +extern void ocxl_link_release(struct pci_dev *dev, void *link_handle); +extern int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, + u64 amr, struct mm_struct *mm, + void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr), + void *xsl_err_data); +extern int ocxl_link_remove_pe(void *link_handle, int pasid); +extern int ocxl_link_irq_alloc(void *link_handle, int *hw_irq, + u64 *addr); +extern void ocxl_link_free_irq(void *link_handle, int hw_irq); + +extern int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size); +extern void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size); +extern int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size); +extern void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size); + +extern struct ocxl_context *ocxl_context_alloc(void); +extern int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu, + struct address_space *mapping); +extern int ocxl_context_attach(struct ocxl_context *ctx, u64 amr); +extern int ocxl_context_mmap(struct ocxl_context *ctx, + struct vm_area_struct *vma); +extern int ocxl_context_detach(struct ocxl_context *ctx); +extern void ocxl_context_detach_all(struct ocxl_afu *afu); +extern void ocxl_context_free(struct ocxl_context *ctx); + +extern int ocxl_sysfs_add_afu(struct ocxl_afu *afu); +extern void ocxl_sysfs_remove_afu(struct ocxl_afu *afu); + +#endif /* _OCXL_INTERNAL_H_ */ diff --git a/drivers/misc/ocxl/pasid.c b/drivers/misc/ocxl/pasid.c new file mode 100644 index 000000000000..d14cb56e6920 --- /dev/null +++ b/drivers/misc/ocxl/pasid.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include "ocxl_internal.h" + + +struct id_range { + struct list_head list; + u32 start; + u32 end; +}; + +#ifdef DEBUG +static void dump_list(struct list_head *head, char *type_str) +{ + struct id_range *cur; + + pr_debug("%s ranges allocated:\n", type_str); + list_for_each_entry(cur, head, list) { + pr_debug("Range %d->%d\n", cur->start, cur->end); + } +} +#endif + +static int range_alloc(struct list_head *head, u32 size, int max_id, + char *type_str) +{ + struct list_head *pos; + struct id_range *cur, *new; + int rc, last_end; + + new = kmalloc(sizeof(struct id_range), GFP_KERNEL); + if (!new) + return -ENOMEM; + + pos = head; + last_end = -1; + list_for_each_entry(cur, head, list) { + if ((cur->start - last_end) > size) + break; + last_end = cur->end; + pos = &cur->list; + } + + new->start = last_end + 1; + new->end = new->start + size - 1; + + if (new->end > max_id) { + kfree(new); + rc = -ENOSPC; + } else { + list_add(&new->list, pos); + rc = new->start; + } + +#ifdef DEBUG + dump_list(head, type_str); +#endif + return rc; +} + +static void range_free(struct list_head *head, u32 start, u32 size, + char *type_str) +{ + bool found = false; + struct id_range *cur, *tmp; + + list_for_each_entry_safe(cur, tmp, head, list) { + if (cur->start == start && cur->end == (start + size - 1)) { + found = true; + list_del(&cur->list); + kfree(cur); + break; + } + } + WARN_ON(!found); +#ifdef DEBUG + dump_list(head, type_str); +#endif +} + +int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size) +{ + int max_pasid; + + if (fn->config.max_pasid_log < 0) + return -ENOSPC; + max_pasid = 1 << fn->config.max_pasid_log; + return range_alloc(&fn->pasid_list, size, max_pasid, "afu pasid"); +} + +void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size) +{ + return range_free(&fn->pasid_list, start, size, "afu pasid"); +} + +int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size) +{ + int max_actag; + + max_actag = fn->actag_enabled; + return range_alloc(&fn->actag_list, size, max_actag, "afu actag"); +} + +void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size) +{ + return range_free(&fn->actag_list, start, size, "afu actag"); +} diff --git a/drivers/misc/ocxl/pci.c b/drivers/misc/ocxl/pci.c new file mode 100644 index 000000000000..0051d9ec76cc --- /dev/null +++ b/drivers/misc/ocxl/pci.c @@ -0,0 +1,585 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include +#include +#include +#include +#include "ocxl_internal.h" + +/* + * Any opencapi device which wants to use this 'generic' driver should + * use the 0x062B device ID. Vendors should define the subsystem + * vendor/device ID to help differentiate devices. + */ +static const struct pci_device_id ocxl_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x062B), }, + { } +}; +MODULE_DEVICE_TABLE(pci, ocxl_pci_tbl); + + +static struct ocxl_fn *ocxl_fn_get(struct ocxl_fn *fn) +{ + return (get_device(&fn->dev) == NULL) ? NULL : fn; +} + +static void ocxl_fn_put(struct ocxl_fn *fn) +{ + put_device(&fn->dev); +} + +struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu) +{ + return (get_device(&afu->dev) == NULL) ? NULL : afu; +} + +void ocxl_afu_put(struct ocxl_afu *afu) +{ + put_device(&afu->dev); +} + +static struct ocxl_afu *alloc_afu(struct ocxl_fn *fn) +{ + struct ocxl_afu *afu; + + afu = kzalloc(sizeof(struct ocxl_afu), GFP_KERNEL); + if (!afu) + return NULL; + + mutex_init(&afu->contexts_lock); + mutex_init(&afu->afu_control_lock); + idr_init(&afu->contexts_idr); + afu->fn = fn; + ocxl_fn_get(fn); + return afu; +} + +static void free_afu(struct ocxl_afu *afu) +{ + idr_destroy(&afu->contexts_idr); + ocxl_fn_put(afu->fn); + kfree(afu); +} + +static void free_afu_dev(struct device *dev) +{ + struct ocxl_afu *afu = to_ocxl_afu(dev); + + ocxl_unregister_afu(afu); + free_afu(afu); +} + +static int set_afu_device(struct ocxl_afu *afu, const char *location) +{ + struct ocxl_fn *fn = afu->fn; + int rc; + + afu->dev.parent = &fn->dev; + afu->dev.release = free_afu_dev; + rc = dev_set_name(&afu->dev, "%s.%s.%hhu", afu->config.name, location, + afu->config.idx); + return rc; +} + +static int assign_afu_actag(struct ocxl_afu *afu, struct pci_dev *dev) +{ + struct ocxl_fn *fn = afu->fn; + int actag_count, actag_offset; + + /* + * if there were not enough actags for the function, each afu + * reduces its count as well + */ + actag_count = afu->config.actag_supported * + fn->actag_enabled / fn->actag_supported; + actag_offset = ocxl_actag_afu_alloc(fn, actag_count); + if (actag_offset < 0) { + dev_err(&afu->dev, "Can't allocate %d actags for AFU: %d\n", + actag_count, actag_offset); + return actag_offset; + } + afu->actag_base = fn->actag_base + actag_offset; + afu->actag_enabled = actag_count; + + ocxl_config_set_afu_actag(dev, afu->config.dvsec_afu_control_pos, + afu->actag_base, afu->actag_enabled); + dev_dbg(&afu->dev, "actag base=%d enabled=%d\n", + afu->actag_base, afu->actag_enabled); + return 0; +} + +static void reclaim_afu_actag(struct ocxl_afu *afu) +{ + struct ocxl_fn *fn = afu->fn; + int start_offset, size; + + start_offset = afu->actag_base - fn->actag_base; + size = afu->actag_enabled; + ocxl_actag_afu_free(afu->fn, start_offset, size); +} + +static int assign_afu_pasid(struct ocxl_afu *afu, struct pci_dev *dev) +{ + struct ocxl_fn *fn = afu->fn; + int pasid_count, pasid_offset; + + /* + * We only support the case where the function configuration + * requested enough PASIDs to cover all AFUs. + */ + pasid_count = 1 << afu->config.pasid_supported_log; + pasid_offset = ocxl_pasid_afu_alloc(fn, pasid_count); + if (pasid_offset < 0) { + dev_err(&afu->dev, "Can't allocate %d PASIDs for AFU: %d\n", + pasid_count, pasid_offset); + return pasid_offset; + } + afu->pasid_base = fn->pasid_base + pasid_offset; + afu->pasid_count = 0; + afu->pasid_max = pasid_count; + + ocxl_config_set_afu_pasid(dev, afu->config.dvsec_afu_control_pos, + afu->pasid_base, + afu->config.pasid_supported_log); + dev_dbg(&afu->dev, "PASID base=%d, enabled=%d\n", + afu->pasid_base, pasid_count); + return 0; +} + +static void reclaim_afu_pasid(struct ocxl_afu *afu) +{ + struct ocxl_fn *fn = afu->fn; + int start_offset, size; + + start_offset = afu->pasid_base - fn->pasid_base; + size = 1 << afu->config.pasid_supported_log; + ocxl_pasid_afu_free(afu->fn, start_offset, size); +} + +static int reserve_fn_bar(struct ocxl_fn *fn, int bar) +{ + struct pci_dev *dev = to_pci_dev(fn->dev.parent); + int rc, idx; + + if (bar != 0 && bar != 2 && bar != 4) + return -EINVAL; + + idx = bar >> 1; + if (fn->bar_used[idx]++ == 0) { + rc = pci_request_region(dev, bar, "ocxl"); + if (rc) + return rc; + } + return 0; +} + +static void release_fn_bar(struct ocxl_fn *fn, int bar) +{ + struct pci_dev *dev = to_pci_dev(fn->dev.parent); + int idx; + + if (bar != 0 && bar != 2 && bar != 4) + return; + + idx = bar >> 1; + if (--fn->bar_used[idx] == 0) + pci_release_region(dev, bar); + WARN_ON(fn->bar_used[idx] < 0); +} + +static int map_mmio_areas(struct ocxl_afu *afu, struct pci_dev *dev) +{ + int rc; + + rc = reserve_fn_bar(afu->fn, afu->config.global_mmio_bar); + if (rc) + return rc; + + rc = reserve_fn_bar(afu->fn, afu->config.pp_mmio_bar); + if (rc) { + release_fn_bar(afu->fn, afu->config.global_mmio_bar); + return rc; + } + + afu->global_mmio_start = + pci_resource_start(dev, afu->config.global_mmio_bar) + + afu->config.global_mmio_offset; + afu->pp_mmio_start = + pci_resource_start(dev, afu->config.pp_mmio_bar) + + afu->config.pp_mmio_offset; + + afu->global_mmio_ptr = ioremap(afu->global_mmio_start, + afu->config.global_mmio_size); + if (!afu->global_mmio_ptr) { + release_fn_bar(afu->fn, afu->config.pp_mmio_bar); + release_fn_bar(afu->fn, afu->config.global_mmio_bar); + dev_err(&dev->dev, "Error mapping global mmio area\n"); + return -ENOMEM; + } + + /* + * Leave an empty page between the per-process mmio area and + * the AFU interrupt mappings + */ + afu->irq_base_offset = afu->config.pp_mmio_stride + PAGE_SIZE; + return 0; +} + +static void unmap_mmio_areas(struct ocxl_afu *afu) +{ + if (afu->global_mmio_ptr) { + iounmap(afu->global_mmio_ptr); + afu->global_mmio_ptr = NULL; + } + afu->global_mmio_start = 0; + afu->pp_mmio_start = 0; + release_fn_bar(afu->fn, afu->config.pp_mmio_bar); + release_fn_bar(afu->fn, afu->config.global_mmio_bar); +} + +static int configure_afu(struct ocxl_afu *afu, u8 afu_idx, struct pci_dev *dev) +{ + int rc; + + rc = ocxl_config_read_afu(dev, &afu->fn->config, &afu->config, afu_idx); + if (rc) + return rc; + + rc = set_afu_device(afu, dev_name(&dev->dev)); + if (rc) + return rc; + + rc = assign_afu_actag(afu, dev); + if (rc) + return rc; + + rc = assign_afu_pasid(afu, dev); + if (rc) { + reclaim_afu_actag(afu); + return rc; + } + + rc = map_mmio_areas(afu, dev); + if (rc) { + reclaim_afu_pasid(afu); + reclaim_afu_actag(afu); + return rc; + } + return 0; +} + +static void deconfigure_afu(struct ocxl_afu *afu) +{ + unmap_mmio_areas(afu); + reclaim_afu_pasid(afu); + reclaim_afu_actag(afu); +} + +static int activate_afu(struct pci_dev *dev, struct ocxl_afu *afu) +{ + int rc; + + ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 1); + /* + * Char device creation is the last step, as processes can + * call our driver immediately, so all our inits must be finished. + */ + rc = ocxl_create_cdev(afu); + if (rc) + return rc; + return 0; +} + +static void deactivate_afu(struct ocxl_afu *afu) +{ + struct pci_dev *dev = to_pci_dev(afu->fn->dev.parent); + + ocxl_destroy_cdev(afu); + ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 0); +} + +static int init_afu(struct pci_dev *dev, struct ocxl_fn *fn, u8 afu_idx) +{ + int rc; + struct ocxl_afu *afu; + + afu = alloc_afu(fn); + if (!afu) + return -ENOMEM; + + rc = configure_afu(afu, afu_idx, dev); + if (rc) { + free_afu(afu); + return rc; + } + + rc = ocxl_register_afu(afu); + if (rc) + goto err; + + rc = ocxl_sysfs_add_afu(afu); + if (rc) + goto err; + + rc = activate_afu(dev, afu); + if (rc) + goto err_sys; + + list_add_tail(&afu->list, &fn->afu_list); + return 0; + +err_sys: + ocxl_sysfs_remove_afu(afu); +err: + deconfigure_afu(afu); + device_unregister(&afu->dev); + return rc; +} + +static void remove_afu(struct ocxl_afu *afu) +{ + list_del(&afu->list); + ocxl_context_detach_all(afu); + deactivate_afu(afu); + ocxl_sysfs_remove_afu(afu); + deconfigure_afu(afu); + device_unregister(&afu->dev); +} + +static struct ocxl_fn *alloc_function(struct pci_dev *dev) +{ + struct ocxl_fn *fn; + + fn = kzalloc(sizeof(struct ocxl_fn), GFP_KERNEL); + if (!fn) + return NULL; + + INIT_LIST_HEAD(&fn->afu_list); + INIT_LIST_HEAD(&fn->pasid_list); + INIT_LIST_HEAD(&fn->actag_list); + return fn; +} + +static void free_function(struct ocxl_fn *fn) +{ + WARN_ON(!list_empty(&fn->afu_list)); + WARN_ON(!list_empty(&fn->pasid_list)); + kfree(fn); +} + +static void free_function_dev(struct device *dev) +{ + struct ocxl_fn *fn = to_ocxl_function(dev); + + free_function(fn); +} + +static int set_function_device(struct ocxl_fn *fn, struct pci_dev *dev) +{ + int rc; + + fn->dev.parent = &dev->dev; + fn->dev.release = free_function_dev; + rc = dev_set_name(&fn->dev, "ocxlfn.%s", dev_name(&dev->dev)); + if (rc) + return rc; + pci_set_drvdata(dev, fn); + return 0; +} + +static int assign_function_actag(struct ocxl_fn *fn) +{ + struct pci_dev *dev = to_pci_dev(fn->dev.parent); + u16 base, enabled, supported; + int rc; + + rc = ocxl_config_get_actag_info(dev, &base, &enabled, &supported); + if (rc) + return rc; + + fn->actag_base = base; + fn->actag_enabled = enabled; + fn->actag_supported = supported; + + ocxl_config_set_actag(dev, fn->config.dvsec_function_pos, + fn->actag_base, fn->actag_enabled); + dev_dbg(&fn->dev, "actag range starting at %d, enabled %d\n", + fn->actag_base, fn->actag_enabled); + return 0; +} + +static int set_function_pasid(struct ocxl_fn *fn) +{ + struct pci_dev *dev = to_pci_dev(fn->dev.parent); + int rc, desired_count, max_count; + + /* A function may not require any PASID */ + if (fn->config.max_pasid_log < 0) + return 0; + + rc = ocxl_config_get_pasid_info(dev, &max_count); + if (rc) + return rc; + + desired_count = 1 << fn->config.max_pasid_log; + + if (desired_count > max_count) { + dev_err(&fn->dev, + "Function requires more PASIDs than is available (%d vs. %d)\n", + desired_count, max_count); + return -ENOSPC; + } + + fn->pasid_base = 0; + return 0; +} + +static int configure_function(struct ocxl_fn *fn, struct pci_dev *dev) +{ + int rc; + + rc = pci_enable_device(dev); + if (rc) { + dev_err(&dev->dev, "pci_enable_device failed: %d\n", rc); + return rc; + } + + /* + * Once it has been confirmed to work on our hardware, we + * should reset the function, to force the adapter to restart + * from scratch. + * A function reset would also reset all its AFUs. + * + * Some hints for implementation: + * + * - there's not status bit to know when the reset is done. We + * should try reading the config space to know when it's + * done. + * - probably something like: + * Reset + * wait 100ms + * issue config read + * allow device up to 1 sec to return success on config + * read before declaring it broken + * + * Some shared logic on the card (CFG, TLX) won't be reset, so + * there's no guarantee that it will be enough. + */ + rc = ocxl_config_read_function(dev, &fn->config); + if (rc) + return rc; + + rc = set_function_device(fn, dev); + if (rc) + return rc; + + rc = assign_function_actag(fn); + if (rc) + return rc; + + rc = set_function_pasid(fn); + if (rc) + return rc; + + rc = ocxl_link_setup(dev, 0, &fn->link); + if (rc) + return rc; + + rc = ocxl_config_set_TL(dev, fn->config.dvsec_tl_pos); + if (rc) { + ocxl_link_release(dev, fn->link); + return rc; + } + return 0; +} + +static void deconfigure_function(struct ocxl_fn *fn) +{ + struct pci_dev *dev = to_pci_dev(fn->dev.parent); + + ocxl_link_release(dev, fn->link); + pci_disable_device(dev); +} + +static struct ocxl_fn *init_function(struct pci_dev *dev) +{ + struct ocxl_fn *fn; + int rc; + + fn = alloc_function(dev); + if (!fn) + return ERR_PTR(-ENOMEM); + + rc = configure_function(fn, dev); + if (rc) { + free_function(fn); + return ERR_PTR(rc); + } + + rc = device_register(&fn->dev); + if (rc) { + deconfigure_function(fn); + device_unregister(&fn->dev); + return ERR_PTR(rc); + } + return fn; +} + +static void remove_function(struct ocxl_fn *fn) +{ + deconfigure_function(fn); + device_unregister(&fn->dev); +} + +static int ocxl_probe(struct pci_dev *dev, const struct pci_device_id *id) +{ + int rc, afu_count = 0; + u8 afu; + struct ocxl_fn *fn; + + if (!radix_enabled()) { + dev_err(&dev->dev, "Unsupported memory model (hash)\n"); + return -ENODEV; + } + + fn = init_function(dev); + if (IS_ERR(fn)) { + dev_err(&dev->dev, "function init failed: %li\n", + PTR_ERR(fn)); + return PTR_ERR(fn); + } + + for (afu = 0; afu <= fn->config.max_afu_index; afu++) { + rc = ocxl_config_check_afu_index(dev, &fn->config, afu); + if (rc > 0) { + rc = init_afu(dev, fn, afu); + if (rc) { + dev_err(&dev->dev, + "Can't initialize AFU index %d\n", afu); + continue; + } + afu_count++; + } + } + dev_info(&dev->dev, "%d AFU(s) configured\n", afu_count); + return 0; +} + +static void ocxl_remove(struct pci_dev *dev) +{ + struct ocxl_afu *afu, *tmp; + struct ocxl_fn *fn = pci_get_drvdata(dev); + + list_for_each_entry_safe(afu, tmp, &fn->afu_list, list) { + remove_afu(afu); + } + remove_function(fn); +} + +struct pci_driver ocxl_pci_driver = { + .name = "ocxl", + .id_table = ocxl_pci_tbl, + .probe = ocxl_probe, + .remove = ocxl_remove, + .shutdown = ocxl_remove, +}; diff --git a/drivers/misc/ocxl/sysfs.c b/drivers/misc/ocxl/sysfs.c new file mode 100644 index 000000000000..d9753a1db14b --- /dev/null +++ b/drivers/misc/ocxl/sysfs.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include +#include "ocxl_internal.h" + +static ssize_t global_mmio_size_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct ocxl_afu *afu = to_ocxl_afu(device); + + return scnprintf(buf, PAGE_SIZE, "%d\n", + afu->config.global_mmio_size); +} + +static ssize_t pp_mmio_size_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct ocxl_afu *afu = to_ocxl_afu(device); + + return scnprintf(buf, PAGE_SIZE, "%d\n", + afu->config.pp_mmio_stride); +} + +static ssize_t afu_version_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct ocxl_afu *afu = to_ocxl_afu(device); + + return scnprintf(buf, PAGE_SIZE, "%hhu:%hhu\n", + afu->config.version_major, + afu->config.version_minor); +} + +static ssize_t contexts_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct ocxl_afu *afu = to_ocxl_afu(device); + + return scnprintf(buf, PAGE_SIZE, "%d/%d\n", + afu->pasid_count, afu->pasid_max); +} + +static struct device_attribute afu_attrs[] = { + __ATTR_RO(global_mmio_size), + __ATTR_RO(pp_mmio_size), + __ATTR_RO(afu_version), + __ATTR_RO(contexts), +}; + +static ssize_t global_mmio_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, + loff_t off, size_t count) +{ + struct ocxl_afu *afu = to_ocxl_afu(kobj_to_dev(kobj)); + + if (count == 0 || off < 0 || + off >= afu->config.global_mmio_size) + return 0; + memcpy_fromio(buf, afu->global_mmio_ptr + off, count); + return count; +} + +static int global_mmio_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct ocxl_afu *afu = vma->vm_private_data; + unsigned long offset; + + if (vmf->pgoff >= (afu->config.global_mmio_size >> PAGE_SHIFT)) + return VM_FAULT_SIGBUS; + + offset = vmf->pgoff; + offset += (afu->global_mmio_start >> PAGE_SHIFT); + vm_insert_pfn(vma, vmf->address, offset); + return VM_FAULT_NOPAGE; +} + +static const struct vm_operations_struct global_mmio_vmops = { + .fault = global_mmio_fault, +}; + +static int global_mmio_mmap(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + struct vm_area_struct *vma) +{ + struct ocxl_afu *afu = to_ocxl_afu(kobj_to_dev(kobj)); + + if ((vma_pages(vma) + vma->vm_pgoff) > + (afu->config.global_mmio_size >> PAGE_SHIFT)) + return -EINVAL; + + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_ops = &global_mmio_vmops; + vma->vm_private_data = afu; + return 0; +} + +int ocxl_sysfs_add_afu(struct ocxl_afu *afu) +{ + int i, rc; + + for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) { + rc = device_create_file(&afu->dev, &afu_attrs[i]); + if (rc) + goto err; + } + + sysfs_attr_init(&afu->attr_global_mmio.attr); + afu->attr_global_mmio.attr.name = "global_mmio_area"; + afu->attr_global_mmio.attr.mode = 0600; + afu->attr_global_mmio.size = afu->config.global_mmio_size; + afu->attr_global_mmio.read = global_mmio_read; + afu->attr_global_mmio.mmap = global_mmio_mmap; + rc = device_create_bin_file(&afu->dev, &afu->attr_global_mmio); + if (rc) { + dev_err(&afu->dev, + "Unable to create global mmio attr for afu: %d\n", + rc); + goto err; + } + + return 0; + +err: + for (i--; i >= 0; i--) + device_remove_file(&afu->dev, &afu_attrs[i]); + return rc; +} + +void ocxl_sysfs_remove_afu(struct ocxl_afu *afu) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) + device_remove_file(&afu->dev, &afu_attrs[i]); + device_remove_bin_file(&afu->dev, &afu->attr_global_mmio); +} diff --git a/include/uapi/misc/ocxl.h b/include/uapi/misc/ocxl.h new file mode 100644 index 000000000000..a37e34edf52f --- /dev/null +++ b/include/uapi/misc/ocxl.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* Copyright 2017 IBM Corp. */ +#ifndef _UAPI_MISC_OCXL_H +#define _UAPI_MISC_OCXL_H + +#include +#include + +enum ocxl_event_type { + OCXL_AFU_EVENT_XSL_FAULT_ERROR = 0, +}; + +#define OCXL_KERNEL_EVENT_FLAG_LAST 0x0001 /* This is the last event pending */ + +struct ocxl_kernel_event_header { + __u16 type; + __u16 flags; + __u32 reserved; +}; + +struct ocxl_kernel_event_xsl_fault_error { + __u64 addr; + __u64 dsisr; + __u64 count; + __u64 reserved; +}; + +struct ocxl_ioctl_attach { + __u64 amr; + __u64 reserved1; + __u64 reserved2; + __u64 reserved3; +}; + +/* ioctl numbers */ +#define OCXL_MAGIC 0xCA +/* AFU devices */ +#define OCXL_IOCTL_ATTACH _IOW(OCXL_MAGIC, 0x10, struct ocxl_ioctl_attach) + +#endif /* _UAPI_MISC_OCXL_H */ -- cgit v1.2.3 From aeddad1760aeb206d912b27b230269407efd5b06 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Tue, 23 Jan 2018 12:31:42 +0100 Subject: ocxl: Add AFU interrupt support Add user APIs through ioctl to allocate, free, and be notified of an AFU interrupt. For opencapi, an AFU can trigger an interrupt on the host by sending a specific command targeting a 64-bit object handle. On POWER9, this is implemented by mapping a special page in the address space of a process and a write to that page will trigger an interrupt. Signed-off-by: Frederic Barrat Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pnv-ocxl.h | 3 + arch/powerpc/platforms/powernv/ocxl.c | 30 ++++++ drivers/misc/ocxl/afu_irq.c | 197 ++++++++++++++++++++++++++++++++++ drivers/misc/ocxl/context.c | 51 ++++++++- drivers/misc/ocxl/file.c | 34 ++++++ drivers/misc/ocxl/link.c | 28 +++++ drivers/misc/ocxl/ocxl_internal.h | 7 ++ include/uapi/misc/ocxl.h | 9 ++ 8 files changed, 357 insertions(+), 2 deletions(-) create mode 100644 drivers/misc/ocxl/afu_irq.c (limited to 'include') diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h index 398d05b30600..f6945d3bc971 100644 --- a/arch/powerpc/include/asm/pnv-ocxl.h +++ b/arch/powerpc/include/asm/pnv-ocxl.h @@ -30,4 +30,7 @@ extern int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, extern void pnv_ocxl_spa_release(void *platform_data); extern int pnv_ocxl_spa_remove_pe(void *platform_data, int pe_handle); +extern int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr); +extern void pnv_ocxl_free_xive_irq(u32 irq); + #endif /* _ASM_PNV_OCXL_H */ diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c index 1faaa4ef6903..fa9b53af3c7b 100644 --- a/arch/powerpc/platforms/powernv/ocxl.c +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -2,6 +2,7 @@ // Copyright 2017 IBM Corp. #include #include +#include #include #include "pci.h" @@ -483,3 +484,32 @@ int pnv_ocxl_spa_remove_pe(void *platform_data, int pe_handle) return rc; } EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe); + +int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr) +{ + __be64 flags, trigger_page; + s64 rc; + u32 hwirq; + + hwirq = xive_native_alloc_irq(); + if (!hwirq) + return -ENOENT; + + rc = opal_xive_get_irq_info(hwirq, &flags, NULL, &trigger_page, NULL, + NULL); + if (rc || !trigger_page) { + xive_native_free_irq(hwirq); + return -ENOENT; + } + *irq = hwirq; + *trigger_addr = be64_to_cpu(trigger_page); + return 0; + +} +EXPORT_SYMBOL_GPL(pnv_ocxl_alloc_xive_irq); + +void pnv_ocxl_free_xive_irq(u32 irq) +{ + xive_native_free_irq(irq); +} +EXPORT_SYMBOL_GPL(pnv_ocxl_free_xive_irq); diff --git a/drivers/misc/ocxl/afu_irq.c b/drivers/misc/ocxl/afu_irq.c new file mode 100644 index 000000000000..f40d853de401 --- /dev/null +++ b/drivers/misc/ocxl/afu_irq.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include +#include +#include +#include "ocxl_internal.h" + +struct afu_irq { + int id; + int hw_irq; + unsigned int virq; + char *name; + u64 trigger_page; + struct eventfd_ctx *ev_ctx; +}; + +static int irq_offset_to_id(struct ocxl_context *ctx, u64 offset) +{ + return (offset - ctx->afu->irq_base_offset) >> PAGE_SHIFT; +} + +static u64 irq_id_to_offset(struct ocxl_context *ctx, int id) +{ + return ctx->afu->irq_base_offset + (id << PAGE_SHIFT); +} + +static irqreturn_t afu_irq_handler(int virq, void *data) +{ + struct afu_irq *irq = (struct afu_irq *) data; + + if (irq->ev_ctx) + eventfd_signal(irq->ev_ctx, 1); + return IRQ_HANDLED; +} + +static int setup_afu_irq(struct ocxl_context *ctx, struct afu_irq *irq) +{ + int rc; + + irq->virq = irq_create_mapping(NULL, irq->hw_irq); + if (!irq->virq) { + pr_err("irq_create_mapping failed\n"); + return -ENOMEM; + } + pr_debug("hw_irq %d mapped to virq %u\n", irq->hw_irq, irq->virq); + + irq->name = kasprintf(GFP_KERNEL, "ocxl-afu-%u", irq->virq); + if (!irq->name) { + irq_dispose_mapping(irq->virq); + return -ENOMEM; + } + + rc = request_irq(irq->virq, afu_irq_handler, 0, irq->name, irq); + if (rc) { + kfree(irq->name); + irq->name = NULL; + irq_dispose_mapping(irq->virq); + pr_err("request_irq failed: %d\n", rc); + return rc; + } + return 0; +} + +static void release_afu_irq(struct afu_irq *irq) +{ + free_irq(irq->virq, irq); + irq_dispose_mapping(irq->virq); + kfree(irq->name); +} + +int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset) +{ + struct afu_irq *irq; + int rc; + + irq = kzalloc(sizeof(struct afu_irq), GFP_KERNEL); + if (!irq) + return -ENOMEM; + + /* + * We limit the number of afu irqs per context and per link to + * avoid a single process or user depleting the pool of IPIs + */ + + mutex_lock(&ctx->irq_lock); + + irq->id = idr_alloc(&ctx->irq_idr, irq, 0, MAX_IRQ_PER_CONTEXT, + GFP_KERNEL); + if (irq->id < 0) { + rc = -ENOSPC; + goto err_unlock; + } + + rc = ocxl_link_irq_alloc(ctx->afu->fn->link, &irq->hw_irq, + &irq->trigger_page); + if (rc) + goto err_idr; + + rc = setup_afu_irq(ctx, irq); + if (rc) + goto err_alloc; + + *irq_offset = irq_id_to_offset(ctx, irq->id); + + mutex_unlock(&ctx->irq_lock); + return 0; + +err_alloc: + ocxl_link_free_irq(ctx->afu->fn->link, irq->hw_irq); +err_idr: + idr_remove(&ctx->irq_idr, irq->id); +err_unlock: + mutex_unlock(&ctx->irq_lock); + kfree(irq); + return rc; +} + +static void afu_irq_free(struct afu_irq *irq, struct ocxl_context *ctx) +{ + if (ctx->mapping) + unmap_mapping_range(ctx->mapping, + irq_id_to_offset(ctx, irq->id), + 1 << PAGE_SHIFT, 1); + release_afu_irq(irq); + if (irq->ev_ctx) + eventfd_ctx_put(irq->ev_ctx); + ocxl_link_free_irq(ctx->afu->fn->link, irq->hw_irq); + kfree(irq); +} + +int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset) +{ + struct afu_irq *irq; + int id = irq_offset_to_id(ctx, irq_offset); + + mutex_lock(&ctx->irq_lock); + + irq = idr_find(&ctx->irq_idr, id); + if (!irq) { + mutex_unlock(&ctx->irq_lock); + return -EINVAL; + } + idr_remove(&ctx->irq_idr, irq->id); + afu_irq_free(irq, ctx); + mutex_unlock(&ctx->irq_lock); + return 0; +} + +void ocxl_afu_irq_free_all(struct ocxl_context *ctx) +{ + struct afu_irq *irq; + int id; + + mutex_lock(&ctx->irq_lock); + idr_for_each_entry(&ctx->irq_idr, irq, id) + afu_irq_free(irq, ctx); + mutex_unlock(&ctx->irq_lock); +} + +int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset, int eventfd) +{ + struct afu_irq *irq; + struct eventfd_ctx *ev_ctx; + int rc = 0, id = irq_offset_to_id(ctx, irq_offset); + + mutex_lock(&ctx->irq_lock); + irq = idr_find(&ctx->irq_idr, id); + if (!irq) { + rc = -EINVAL; + goto unlock; + } + + ev_ctx = eventfd_ctx_fdget(eventfd); + if (IS_ERR(ev_ctx)) { + rc = -EINVAL; + goto unlock; + } + + irq->ev_ctx = ev_ctx; +unlock: + mutex_unlock(&ctx->irq_lock); + return rc; +} + +u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset) +{ + struct afu_irq *irq; + int id = irq_offset_to_id(ctx, irq_offset); + u64 addr = 0; + + mutex_lock(&ctx->irq_lock); + irq = idr_find(&ctx->irq_idr, id); + if (irq) + addr = irq->trigger_page; + mutex_unlock(&ctx->irq_lock); + return addr; +} diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c index b34b836f924c..269149490063 100644 --- a/drivers/misc/ocxl/context.c +++ b/drivers/misc/ocxl/context.c @@ -31,6 +31,8 @@ int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu, mutex_init(&ctx->mapping_lock); init_waitqueue_head(&ctx->events_wq); mutex_init(&ctx->xsl_error_lock); + mutex_init(&ctx->irq_lock); + idr_init(&ctx->irq_idr); /* * Keep a reference on the AFU to make sure it's valid for the * duration of the life of the context @@ -80,6 +82,19 @@ out: return rc; } +static int map_afu_irq(struct vm_area_struct *vma, unsigned long address, + u64 offset, struct ocxl_context *ctx) +{ + u64 trigger_addr; + + trigger_addr = ocxl_afu_irq_get_addr(ctx, offset); + if (!trigger_addr) + return VM_FAULT_SIGBUS; + + vm_insert_pfn(vma, address, trigger_addr >> PAGE_SHIFT); + return VM_FAULT_NOPAGE; +} + static int map_pp_mmio(struct vm_area_struct *vma, unsigned long address, u64 offset, struct ocxl_context *ctx) { @@ -118,7 +133,10 @@ static int ocxl_mmap_fault(struct vm_fault *vmf) pr_debug("%s: pasid %d address 0x%lx offset 0x%llx\n", __func__, ctx->pasid, vmf->address, offset); - rc = map_pp_mmio(vma, vmf->address, offset, ctx); + if (offset < ctx->afu->irq_base_offset) + rc = map_pp_mmio(vma, vmf->address, offset, ctx); + else + rc = map_afu_irq(vma, vmf->address, offset, ctx); return rc; } @@ -126,6 +144,30 @@ static const struct vm_operations_struct ocxl_vmops = { .fault = ocxl_mmap_fault, }; +static int check_mmap_afu_irq(struct ocxl_context *ctx, + struct vm_area_struct *vma) +{ + /* only one page */ + if (vma_pages(vma) != 1) + return -EINVAL; + + /* check offset validty */ + if (!ocxl_afu_irq_get_addr(ctx, vma->vm_pgoff << PAGE_SHIFT)) + return -EINVAL; + + /* + * trigger page should only be accessible in write mode. + * + * It's a bit theoretical, as a page mmaped with only + * PROT_WRITE is currently readable, but it doesn't hurt. + */ + if ((vma->vm_flags & VM_READ) || (vma->vm_flags & VM_EXEC) || + !(vma->vm_flags & VM_WRITE)) + return -EINVAL; + vma->vm_flags &= ~(VM_MAYREAD | VM_MAYEXEC); + return 0; +} + static int check_mmap_mmio(struct ocxl_context *ctx, struct vm_area_struct *vma) { @@ -139,7 +181,10 @@ int ocxl_context_mmap(struct ocxl_context *ctx, struct vm_area_struct *vma) { int rc; - rc = check_mmap_mmio(ctx, vma); + if ((vma->vm_pgoff << PAGE_SHIFT) < ctx->afu->irq_base_offset) + rc = check_mmap_mmio(ctx, vma); + else + rc = check_mmap_afu_irq(ctx, vma); if (rc) return rc; @@ -224,6 +269,8 @@ void ocxl_context_free(struct ocxl_context *ctx) idr_remove(&ctx->afu->contexts_idr, ctx->pasid); mutex_unlock(&ctx->afu->contexts_lock); + ocxl_afu_irq_free_all(ctx); + idr_destroy(&ctx->irq_idr); /* reference to the AFU taken in ocxl_context_init */ ocxl_afu_put(ctx->afu); kfree(ctx); diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c index 6f0befda6a8a..c90c1a578d2f 100644 --- a/drivers/misc/ocxl/file.c +++ b/drivers/misc/ocxl/file.c @@ -103,12 +103,17 @@ static long afu_ioctl_attach(struct ocxl_context *ctx, } #define CMD_STR(x) (x == OCXL_IOCTL_ATTACH ? "ATTACH" : \ + x == OCXL_IOCTL_IRQ_ALLOC ? "IRQ_ALLOC" : \ + x == OCXL_IOCTL_IRQ_FREE ? "IRQ_FREE" : \ + x == OCXL_IOCTL_IRQ_SET_FD ? "IRQ_SET_FD" : \ "UNKNOWN") static long afu_ioctl(struct file *file, unsigned int cmd, unsigned long args) { struct ocxl_context *ctx = file->private_data; + struct ocxl_ioctl_irq_fd irq_fd; + u64 irq_offset; long rc; pr_debug("%s for context %d, command %s\n", __func__, ctx->pasid, @@ -123,6 +128,35 @@ static long afu_ioctl(struct file *file, unsigned int cmd, (struct ocxl_ioctl_attach __user *) args); break; + case OCXL_IOCTL_IRQ_ALLOC: + rc = ocxl_afu_irq_alloc(ctx, &irq_offset); + if (!rc) { + rc = copy_to_user((u64 __user *) args, &irq_offset, + sizeof(irq_offset)); + if (rc) + ocxl_afu_irq_free(ctx, irq_offset); + } + break; + + case OCXL_IOCTL_IRQ_FREE: + rc = copy_from_user(&irq_offset, (u64 __user *) args, + sizeof(irq_offset)); + if (rc) + return -EFAULT; + rc = ocxl_afu_irq_free(ctx, irq_offset); + break; + + case OCXL_IOCTL_IRQ_SET_FD: + rc = copy_from_user(&irq_fd, (u64 __user *) args, + sizeof(irq_fd)); + if (rc) + return -EFAULT; + if (irq_fd.reserved) + return -EINVAL; + rc = ocxl_afu_irq_set_fd(ctx, irq_fd.irq_offset, + irq_fd.eventfd); + break; + default: rc = -EINVAL; } diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c index 64d7a98c904a..8bdcef9c3cba 100644 --- a/drivers/misc/ocxl/link.c +++ b/drivers/misc/ocxl/link.c @@ -601,3 +601,31 @@ unlock: mutex_unlock(&spa->spa_lock); return rc; } + +int ocxl_link_irq_alloc(void *link_handle, int *hw_irq, u64 *trigger_addr) +{ + struct link *link = (struct link *) link_handle; + int rc, irq; + u64 addr; + + if (atomic_dec_if_positive(&link->irq_available) < 0) + return -ENOSPC; + + rc = pnv_ocxl_alloc_xive_irq(&irq, &addr); + if (rc) { + atomic_inc(&link->irq_available); + return rc; + } + + *hw_irq = irq; + *trigger_addr = addr; + return 0; +} + +void ocxl_link_free_irq(void *link_handle, int hw_irq) +{ + struct link *link = (struct link *) link_handle; + + pnv_ocxl_free_xive_irq(hw_irq); + atomic_inc(&link->irq_available); +} diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h index 04fc160c7bd5..a89b88ac67eb 100644 --- a/drivers/misc/ocxl/ocxl_internal.h +++ b/drivers/misc/ocxl/ocxl_internal.h @@ -190,4 +190,11 @@ extern void ocxl_context_free(struct ocxl_context *ctx); extern int ocxl_sysfs_add_afu(struct ocxl_afu *afu); extern void ocxl_sysfs_remove_afu(struct ocxl_afu *afu); +extern int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset); +extern int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset); +extern void ocxl_afu_irq_free_all(struct ocxl_context *ctx); +extern int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset, + int eventfd); +extern u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset); + #endif /* _OCXL_INTERNAL_H_ */ diff --git a/include/uapi/misc/ocxl.h b/include/uapi/misc/ocxl.h index a37e34edf52f..4b0b0b756f3e 100644 --- a/include/uapi/misc/ocxl.h +++ b/include/uapi/misc/ocxl.h @@ -32,9 +32,18 @@ struct ocxl_ioctl_attach { __u64 reserved3; }; +struct ocxl_ioctl_irq_fd { + __u64 irq_offset; + __s32 eventfd; + __u32 reserved; +}; + /* ioctl numbers */ #define OCXL_MAGIC 0xCA /* AFU devices */ #define OCXL_IOCTL_ATTACH _IOW(OCXL_MAGIC, 0x10, struct ocxl_ioctl_attach) +#define OCXL_IOCTL_IRQ_ALLOC _IOR(OCXL_MAGIC, 0x11, __u64) +#define OCXL_IOCTL_IRQ_FREE _IOW(OCXL_MAGIC, 0x12, __u64) +#define OCXL_IOCTL_IRQ_SET_FD _IOW(OCXL_MAGIC, 0x13, struct ocxl_ioctl_irq_fd) #endif /* _UAPI_MISC_OCXL_H */ -- cgit v1.2.3 From 280b983ce2b8759722d911ea4b5af66e95d84e09 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Tue, 23 Jan 2018 12:31:43 +0100 Subject: ocxl: Add a kernel API for other opencapi drivers Some of the functions done by the generic driver should also be needed by other opencapi drivers: attaching a context to an adapter, translation fault handling, AFU interrupt allocation... So to avoid code duplication, the driver provides a kernel API that other drivers can use, similar to calling a in-kernel library. It is still a bit theoretical, for lack of real hardware, and will likely need adjustements down the road. But we used the cxlflash driver as a guinea pig. Signed-off-by: Frederic Barrat Signed-off-by: Michael Ellerman --- drivers/misc/ocxl/config.c | 13 ++- drivers/misc/ocxl/link.c | 7 ++ drivers/misc/ocxl/ocxl_internal.h | 71 +------------ include/misc/ocxl.h | 214 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 234 insertions(+), 71 deletions(-) create mode 100644 include/misc/ocxl.h (limited to 'include') diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c index ea8cca50ea06..2e30de9c694a 100644 --- a/drivers/misc/ocxl/config.c +++ b/drivers/misc/ocxl/config.c @@ -2,8 +2,8 @@ // Copyright 2017 IBM Corp. #include #include +#include #include -#include "ocxl_internal.h" #define EXTRACT_BIT(val, bit) (!!(val & BIT(bit))) #define EXTRACT_BITS(val, s, e) ((val & GENMASK(e, s)) >> s) @@ -243,6 +243,7 @@ int ocxl_config_read_function(struct pci_dev *dev, struct ocxl_fn_config *fn) rc = validate_function(dev, fn); return rc; } +EXPORT_SYMBOL_GPL(ocxl_config_read_function); static int read_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn, int offset, u32 *data) @@ -301,6 +302,7 @@ int ocxl_config_check_afu_index(struct pci_dev *dev, } return 1; } +EXPORT_SYMBOL_GPL(ocxl_config_check_afu_index); static int read_afu_name(struct pci_dev *dev, struct ocxl_fn_config *fn, struct ocxl_afu_config *afu) @@ -498,6 +500,7 @@ int ocxl_config_read_afu(struct pci_dev *dev, struct ocxl_fn_config *fn, rc = validate_afu(dev, afu); return rc; } +EXPORT_SYMBOL_GPL(ocxl_config_read_afu); int ocxl_config_get_actag_info(struct pci_dev *dev, u16 *base, u16 *enabled, u16 *supported) @@ -516,6 +519,7 @@ int ocxl_config_get_actag_info(struct pci_dev *dev, u16 *base, u16 *enabled, } return 0; } +EXPORT_SYMBOL_GPL(ocxl_config_get_actag_info); void ocxl_config_set_afu_actag(struct pci_dev *dev, int pos, int actag_base, int actag_count) @@ -528,11 +532,13 @@ void ocxl_config_set_afu_actag(struct pci_dev *dev, int pos, int actag_base, val = actag_base & OCXL_DVSEC_ACTAG_MASK; pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_BASE, val); } +EXPORT_SYMBOL_GPL(ocxl_config_set_afu_actag); int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count) { return pnv_ocxl_get_pasid_count(dev, count); } +EXPORT_SYMBOL_GPL(ocxl_config_get_pasid_info); void ocxl_config_set_afu_pasid(struct pci_dev *dev, int pos, int pasid_base, u32 pasid_count_log) @@ -550,6 +556,7 @@ void ocxl_config_set_afu_pasid(struct pci_dev *dev, int pos, int pasid_base, pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE, val32); } +EXPORT_SYMBOL_GPL(ocxl_config_set_afu_pasid); void ocxl_config_set_afu_state(struct pci_dev *dev, int pos, int enable) { @@ -562,6 +569,7 @@ void ocxl_config_set_afu_state(struct pci_dev *dev, int pos, int enable) val &= 0xFE; pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, val); } +EXPORT_SYMBOL_GPL(ocxl_config_set_afu_state); int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec) { @@ -660,6 +668,7 @@ out: kfree(recv_rate); return rc; } +EXPORT_SYMBOL_GPL(ocxl_config_set_TL); int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control, int pasid) { @@ -699,6 +708,7 @@ int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control, int pasid) } return 0; } +EXPORT_SYMBOL_GPL(ocxl_config_terminate_pasid); void ocxl_config_set_actag(struct pci_dev *dev, int func_dvsec, u32 tag_first, u32 tag_count) @@ -710,3 +720,4 @@ void ocxl_config_set_actag(struct pci_dev *dev, int func_dvsec, u32 tag_first, pci_write_config_dword(dev, func_dvsec + OCXL_DVSEC_FUNC_OFF_ACTAG, val); } +EXPORT_SYMBOL_GPL(ocxl_config_set_actag); diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c index 8bdcef9c3cba..fbca3feec592 100644 --- a/drivers/misc/ocxl/link.c +++ b/drivers/misc/ocxl/link.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "ocxl_internal.h" @@ -420,6 +421,7 @@ unlock: mutex_unlock(&links_list_lock); return rc; } +EXPORT_SYMBOL_GPL(ocxl_link_setup); static void release_xsl(struct kref *ref) { @@ -439,6 +441,7 @@ void ocxl_link_release(struct pci_dev *dev, void *link_handle) kref_put(&link->ref, release_xsl); mutex_unlock(&links_list_lock); } +EXPORT_SYMBOL_GPL(ocxl_link_release); static u64 calculate_cfg_state(bool kernel) { @@ -533,6 +536,7 @@ unlock: mutex_unlock(&spa->spa_lock); return rc; } +EXPORT_SYMBOL_GPL(ocxl_link_add_pe); int ocxl_link_remove_pe(void *link_handle, int pasid) { @@ -601,6 +605,7 @@ unlock: mutex_unlock(&spa->spa_lock); return rc; } +EXPORT_SYMBOL_GPL(ocxl_link_remove_pe); int ocxl_link_irq_alloc(void *link_handle, int *hw_irq, u64 *trigger_addr) { @@ -621,6 +626,7 @@ int ocxl_link_irq_alloc(void *link_handle, int *hw_irq, u64 *trigger_addr) *trigger_addr = addr; return 0; } +EXPORT_SYMBOL_GPL(ocxl_link_irq_alloc); void ocxl_link_free_irq(void *link_handle, int hw_irq) { @@ -629,3 +635,4 @@ void ocxl_link_free_irq(void *link_handle, int hw_irq) pnv_ocxl_free_xive_irq(hw_irq); atomic_inc(&link->irq_available); } +EXPORT_SYMBOL_GPL(ocxl_link_free_irq); diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h index a89b88ac67eb..5d421824afd9 100644 --- a/drivers/misc/ocxl/ocxl_internal.h +++ b/drivers/misc/ocxl/ocxl_internal.h @@ -6,8 +6,8 @@ #include #include #include +#include -#define OCXL_AFU_NAME_SZ (24+1) /* add 1 for NULL termination */ #define MAX_IRQ_PER_LINK 2000 #define MAX_IRQ_PER_CONTEXT MAX_IRQ_PER_LINK @@ -16,38 +16,6 @@ extern struct pci_driver ocxl_pci_driver; -/* - * The following 2 structures are a fairly generic way of representing - * the configuration data for a function and AFU, as read from the - * configuration space. - */ -struct ocxl_afu_config { - u8 idx; - int dvsec_afu_control_pos; - char name[OCXL_AFU_NAME_SZ]; - u8 version_major; - u8 version_minor; - u8 afuc_type; - u8 afum_type; - u8 profile; - u8 global_mmio_bar; - u64 global_mmio_offset; - u32 global_mmio_size; - u8 pp_mmio_bar; - u64 pp_mmio_offset; - u32 pp_mmio_stride; - u8 log_mem_size; - u8 pasid_supported_log; - u16 actag_supported; -}; - -struct ocxl_fn_config { - int dvsec_tl_pos; - int dvsec_function_pos; - int dvsec_afu_info_pos; - s8 max_pasid_log; - s8 max_afu_index; -}; struct ocxl_fn { struct device dev; @@ -135,43 +103,6 @@ extern void ocxl_unregister_afu(struct ocxl_afu *afu); extern int ocxl_file_init(void); extern void ocxl_file_exit(void); -extern int ocxl_config_read_function(struct pci_dev *dev, - struct ocxl_fn_config *fn); - -extern int ocxl_config_check_afu_index(struct pci_dev *dev, - struct ocxl_fn_config *fn, int afu_idx); -extern int ocxl_config_read_afu(struct pci_dev *dev, - struct ocxl_fn_config *fn, - struct ocxl_afu_config *afu, - u8 afu_idx); -extern int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count); -extern void ocxl_config_set_afu_pasid(struct pci_dev *dev, - int afu_control, - int pasid_base, u32 pasid_count_log); -extern int ocxl_config_get_actag_info(struct pci_dev *dev, - u16 *base, u16 *enabled, u16 *supported); -extern void ocxl_config_set_actag(struct pci_dev *dev, int func_dvsec, - u32 tag_first, u32 tag_count); -extern void ocxl_config_set_afu_actag(struct pci_dev *dev, int afu_control, - int actag_base, int actag_count); -extern void ocxl_config_set_afu_state(struct pci_dev *dev, int afu_control, - int enable); -extern int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec); -extern int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control, - int pasid); - -extern int ocxl_link_setup(struct pci_dev *dev, int PE_mask, - void **link_handle); -extern void ocxl_link_release(struct pci_dev *dev, void *link_handle); -extern int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, - u64 amr, struct mm_struct *mm, - void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr), - void *xsl_err_data); -extern int ocxl_link_remove_pe(void *link_handle, int pasid); -extern int ocxl_link_irq_alloc(void *link_handle, int *hw_irq, - u64 *addr); -extern void ocxl_link_free_irq(void *link_handle, int hw_irq); - extern int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size); extern void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size); extern int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size); diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h new file mode 100644 index 000000000000..51ccf76db293 --- /dev/null +++ b/include/misc/ocxl.h @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#ifndef _MISC_OCXL_H_ +#define _MISC_OCXL_H_ + +#include + +/* + * Opencapi drivers all need some common facilities, like parsing the + * device configuration space, adding a Process Element to the Shared + * Process Area, etc... + * + * The ocxl module provides a kernel API, to allow other drivers to + * reuse common code. A bit like a in-kernel library. + */ + +#define OCXL_AFU_NAME_SZ (24+1) /* add 1 for NULL termination */ + +/* + * The following 2 structures are a fairly generic way of representing + * the configuration data for a function and AFU, as read from the + * configuration space. + */ +struct ocxl_afu_config { + u8 idx; + int dvsec_afu_control_pos; /* offset of AFU control DVSEC */ + char name[OCXL_AFU_NAME_SZ]; + u8 version_major; + u8 version_minor; + u8 afuc_type; + u8 afum_type; + u8 profile; + u8 global_mmio_bar; /* global MMIO area */ + u64 global_mmio_offset; + u32 global_mmio_size; + u8 pp_mmio_bar; /* per-process MMIO area */ + u64 pp_mmio_offset; + u32 pp_mmio_stride; + u8 log_mem_size; + u8 pasid_supported_log; + u16 actag_supported; +}; + +struct ocxl_fn_config { + int dvsec_tl_pos; /* offset of the Transaction Layer DVSEC */ + int dvsec_function_pos; /* offset of the Function DVSEC */ + int dvsec_afu_info_pos; /* offset of the AFU information DVSEC */ + s8 max_pasid_log; + s8 max_afu_index; +}; + +/* + * Read the configuration space of a function and fill in a + * ocxl_fn_config structure with all the function details + */ +extern int ocxl_config_read_function(struct pci_dev *dev, + struct ocxl_fn_config *fn); + +/* + * Check if an AFU index is valid for the given function. + * + * AFU indexes can be sparse, so a driver should check all indexes up + * to the maximum found in the function description + */ +extern int ocxl_config_check_afu_index(struct pci_dev *dev, + struct ocxl_fn_config *fn, int afu_idx); + +/* + * Read the configuration space of a function for the AFU specified by + * the index 'afu_idx'. Fills in a ocxl_afu_config structure + */ +extern int ocxl_config_read_afu(struct pci_dev *dev, + struct ocxl_fn_config *fn, + struct ocxl_afu_config *afu, + u8 afu_idx); + +/* + * Get the max PASID value that can be used by the function + */ +extern int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count); + +/* + * Tell an AFU, by writing in the configuration space, the PASIDs that + * it can use. Range starts at 'pasid_base' and its size is a multiple + * of 2 + * + * 'afu_control_offset' is the offset of the AFU control DVSEC which + * can be found in the function configuration + */ +extern void ocxl_config_set_afu_pasid(struct pci_dev *dev, + int afu_control_offset, + int pasid_base, u32 pasid_count_log); + +/* + * Get the actag configuration for the function: + * 'base' is the first actag value that can be used. + * 'enabled' it the number of actags available, starting from base. + * 'supported' is the total number of actags desired by all the AFUs + * of the function. + */ +extern int ocxl_config_get_actag_info(struct pci_dev *dev, + u16 *base, u16 *enabled, u16 *supported); + +/* + * Tell a function, by writing in the configuration space, the actags + * it can use. + * + * 'func_offset' is the offset of the Function DVSEC that can found in + * the function configuration + */ +extern void ocxl_config_set_actag(struct pci_dev *dev, int func_offset, + u32 actag_base, u32 actag_count); + +/* + * Tell an AFU, by writing in the configuration space, the actags it + * can use. + * + * 'afu_control_offset' is the offset of the AFU control DVSEC for the + * desired AFU. It can be found in the AFU configuration + */ +extern void ocxl_config_set_afu_actag(struct pci_dev *dev, + int afu_control_offset, + int actag_base, int actag_count); + +/* + * Enable/disable an AFU, by writing in the configuration space. + * + * 'afu_control_offset' is the offset of the AFU control DVSEC for the + * desired AFU. It can be found in the AFU configuration + */ +extern void ocxl_config_set_afu_state(struct pci_dev *dev, + int afu_control_offset, int enable); + +/* + * Set the Transaction Layer configuration in the configuration space. + * Only needed for function 0. + * + * It queries the host TL capabilities, find some common ground + * between the host and device, and set the Transaction Layer on both + * accordingly. + */ +extern int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec); + +/* + * Request an AFU to terminate a PASID. + * Will return once the AFU has acked the request, or an error in case + * of timeout. + * + * The hardware can only terminate one PASID at a time, so caller must + * guarantee some kind of serialization. + * + * 'afu_control_offset' is the offset of the AFU control DVSEC for the + * desired AFU. It can be found in the AFU configuration + */ +extern int ocxl_config_terminate_pasid(struct pci_dev *dev, + int afu_control_offset, int pasid); + +/* + * Set up the opencapi link for the function. + * + * When called for the first time for a link, it sets up the Shared + * Process Area for the link and the interrupt handler to process + * translation faults. + * + * Returns a 'link handle' that should be used for further calls for + * the link + */ +extern int ocxl_link_setup(struct pci_dev *dev, int PE_mask, + void **link_handle); + +/* + * Remove the association between the function and its link. + */ +extern void ocxl_link_release(struct pci_dev *dev, void *link_handle); + +/* + * Add a Process Element to the Shared Process Area for a link. + * The process is defined by its PASID, pid, tid and its mm_struct. + * + * 'xsl_err_cb' is an optional callback if the driver wants to be + * notified when the translation fault interrupt handler detects an + * address error. + * 'xsl_err_data' is an argument passed to the above callback, if + * defined + */ +extern int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, + u64 amr, struct mm_struct *mm, + void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr), + void *xsl_err_data); + +/* + * Remove a Process Element from the Shared Process Area for a link + */ +extern int ocxl_link_remove_pe(void *link_handle, int pasid); + +/* + * Allocate an AFU interrupt associated to the link. + * + * 'hw_irq' is the hardware interrupt number + * 'obj_handle' is the 64-bit object handle to be passed to the AFU to + * trigger the interrupt. + * On P9, 'obj_handle' is an address, which, if written, triggers the + * interrupt. It is an MMIO address which needs to be remapped (one + * page). + */ +extern int ocxl_link_irq_alloc(void *link_handle, int *hw_irq, + u64 *obj_handle); + +/* + * Free a previously allocated AFU interrupt + */ +extern void ocxl_link_free_irq(void *link_handle, int hw_irq); + +#endif /* _MISC_OCXL_H_ */ -- cgit v1.2.3 From 856e1eb9bdd4bd703907925be112519ff65d991f Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Fri, 5 Jan 2018 10:45:47 -0600 Subject: PCI/AER: Add uevents in AER and EEH error/resume Devices can go offline when erors reported. This patch adds a change to the kernel object and lets udev know of error. When device resumes, a change is also set reporting device as online. Therefore, EEH and AER events are better propagated to user space for PCI devices in all arches. Signed-off-by: Bryant G. Ly Signed-off-by: Juan J. Alvarez Acked-by: Bjorn Helgaas Acked-by: Russell Currey Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_driver.c | 6 ++++++ drivers/pci/pcie/aer/aerdrv_core.c | 3 +++ include/linux/pci.h | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+) (limited to 'include') diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 3c0fa99c5533..beea2182d754 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -228,6 +228,7 @@ static void *eeh_report_error(void *data, void *userdata) edev->in_error = true; eeh_pcid_put(dev); + pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); return NULL; } @@ -381,6 +382,10 @@ static void *eeh_report_resume(void *data, void *userdata) driver->err_handler->resume(dev); eeh_pcid_put(dev); + pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); +#ifdef CONFIG_PCI_IOV + eeh_ops->notify_resume(eeh_dev_to_pdn(edev)); +#endif return NULL; } @@ -416,6 +421,7 @@ static void *eeh_report_failure(void *data, void *userdata) driver->err_handler->error_detected(dev, pci_channel_io_perm_failure); eeh_pcid_put(dev); + pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); return NULL; } diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 744805232155..8d7448063fd1 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -278,6 +278,7 @@ static int report_error_detected(struct pci_dev *dev, void *data) } else { err_handler = dev->driver->err_handler; vote = err_handler->error_detected(dev, result_data->state); + pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); } result_data->result = merge_result(result_data->result, vote); @@ -341,6 +342,7 @@ static int report_resume(struct pci_dev *dev, void *data) err_handler = dev->driver->err_handler; err_handler->resume(dev); + pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); out: device_unlock(&dev->dev); return 0; @@ -541,6 +543,7 @@ static void do_recovery(struct pci_dev *dev, int severity) return; failed: + pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); /* TODO: Should kernel panic here? */ dev_info(&dev->dev, "AER: Device recovery failed\n"); } diff --git a/include/linux/pci.h b/include/linux/pci.h index e3e94467687a..405630441b74 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2277,6 +2277,42 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev) return false; } +/** + * pci_uevent_ers - emit a uevent during recovery path of pci device + * @pdev: pci device to check + * @err_type: type of error event + * + */ +static inline void pci_uevent_ers(struct pci_dev *pdev, + enum pci_ers_result err_type) +{ + int idx = 0; + char *envp[3]; + + switch (err_type) { + case PCI_ERS_RESULT_NONE: + case PCI_ERS_RESULT_CAN_RECOVER: + envp[idx++] = "ERROR_EVENT=BEGIN_RECOVERY"; + envp[idx++] = "DEVICE_ONLINE=0"; + break; + case PCI_ERS_RESULT_RECOVERED: + envp[idx++] = "ERROR_EVENT=SUCCESSFUL_RECOVERY"; + envp[idx++] = "DEVICE_ONLINE=1"; + break; + case PCI_ERS_RESULT_DISCONNECT: + envp[idx++] = "ERROR_EVENT=FAILED_RECOVERY"; + envp[idx++] = "DEVICE_ONLINE=0"; + break; + default: + break; + } + + if (idx > 0) { + envp[idx++] = NULL; + kobject_uevent_env(&pdev->dev.kobj, KOBJ_CHANGE, envp); + } +} + /* provide the legacy pci_dma_* API */ #include -- cgit v1.2.3