diff options
Diffstat (limited to 'drivers/infiniband/hw')
41 files changed, 1977 insertions, 972 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index e7e8a0f49464..793c97251588 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -677,7 +677,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) bnxt_qplib_get_guid(rdev->netdev->dev_addr, (u8 *)&ibdev->node_guid); - ibdev->num_comp_vectors = 1; + ibdev->num_comp_vectors = rdev->num_msix - 1; ibdev->dev.parent = &rdev->en_dev->pdev->dev; ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY; diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index e96bcb16bd2b..74b787a90660 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -160,10 +160,16 @@ struct efa_admin_create_qp_resp { /* Common Admin Queue completion descriptor */ struct efa_admin_acq_common_desc acq_common_desc; - /* Opaque handle to be used for consequent operations on the QP */ + /* + * Opaque handle to be used for consequent admin operations on the + * QP + */ u32 qp_handle; - /* QP number in the given EFA virtual device */ + /* + * QP number in the given EFA virtual device. Least-significant bits + * (as needed according to max_qp) carry unique QP ID + */ u16 qp_num; /* MBZ */ @@ -286,6 +292,7 @@ struct efa_admin_create_ah_cmd { /* PD number */ u16 pd; + /* MBZ */ u16 reserved; }; @@ -296,6 +303,7 @@ struct efa_admin_create_ah_resp { /* Target interface address handle (opaque) */ u16 ah; + /* MBZ */ u16 reserved; }; @@ -372,6 +380,7 @@ struct efa_admin_reg_mr_cmd { */ u8 permissions; + /* MBZ */ u16 reserved16_w5; /* number of pages in PBL (redundant, could be calculated) */ @@ -419,20 +428,20 @@ struct efa_admin_create_cq_cmd { struct efa_admin_aq_common_desc aq_common_desc; /* - * 4:0 : reserved5 + * 4:0 : reserved5 - MBZ * 5 : interrupt_mode_enabled - if set, cq operates * in interrupt mode (i.e. CQ events and MSI-X are * generated), otherwise - polling * 6 : virt - If set, ring base address is virtual * (IOVA returned by MR registration) - * 7 : reserved6 + * 7 : reserved6 - MBZ */ u8 cq_caps_1; /* * 4:0 : cq_entry_size_words - size of CQ entry in * 32-bit words, valid values: 4, 8. - * 7:5 : reserved7 + * 7:5 : reserved7 - MBZ */ u8 cq_caps_2; @@ -478,6 +487,7 @@ struct efa_admin_destroy_cq_cmd { u16 cq_idx; + /* MBZ */ u16 reserved1; }; @@ -530,7 +540,7 @@ struct efa_admin_get_set_feature_common_desc { /* * 1:0 : select - 0x1 - current value; 0x3 - default * value - * 7:3 : reserved3 + * 7:3 : reserved3 - MBZ */ u8 flags; @@ -557,10 +567,10 @@ struct efa_admin_feature_device_attr_desc { /* Bar used for SQ and RQ doorbells */ u16 db_bar; - /* Indicates how many bits are used physical address access */ + /* Indicates how many bits are used on physical address access */ u8 phys_addr_width; - /* Indicates how many bits are used virtual address access */ + /* Indicates how many bits are used on virtual address access */ u8 virt_addr_width; /* @@ -578,27 +588,28 @@ struct efa_admin_feature_queue_attr_desc { /* The maximum number of queue pairs supported */ u32 max_qp; + /* Maximum number of WQEs per Send Queue */ u32 max_sq_depth; - /* max send wr used in inline-buf */ + /* Maximum size of data that can be sent inline in a Send WQE */ u32 inline_buf_size; + /* Maximum number of buffer descriptors per Recv Queue */ u32 max_rq_depth; /* The maximum number of completion queues supported per VF */ u32 max_cq; + /* Maximum number of CQEs per Completion Queue */ u32 max_cq_depth; /* Number of sub-CQs to be created for each CQ */ u16 sub_cqs_per_cq; + /* MBZ */ u16 reserved; - /* - * Maximum number of SGEs (buffs) allowed for a single send work - * queue element (WQE) - */ + /* Maximum number of SGEs (buffers) allowed for a single send WQE */ u16 max_wr_send_sges; /* Maximum number of SGEs allowed for a single recv WQE */ diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 4822f5fa12be..ec5545870554 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -387,8 +387,7 @@ static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle) return efa_com_destroy_qp(&dev->edev, ¶ms); } -static void efa_qp_user_mmap_entries_remove(struct efa_ucontext *uctx, - struct efa_qp *qp) +static void efa_qp_user_mmap_entries_remove(struct efa_qp *qp) { rdma_user_mmap_entry_remove(qp->rq_mmap_entry); rdma_user_mmap_entry_remove(qp->rq_db_mmap_entry); @@ -398,8 +397,6 @@ static void efa_qp_user_mmap_entries_remove(struct efa_ucontext *uctx, int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { - struct efa_ucontext *ucontext = rdma_udata_to_drv_context(udata, - struct efa_ucontext, ibucontext); struct efa_dev *dev = to_edev(ibqp->pd->device); struct efa_qp *qp = to_eqp(ibqp); int err; @@ -418,7 +415,7 @@ int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) DMA_TO_DEVICE); } - efa_qp_user_mmap_entries_remove(ucontext, qp); + efa_qp_user_mmap_entries_remove(qp); kfree(qp); return 0; } @@ -510,7 +507,7 @@ static int qp_mmap_entries_setup(struct efa_qp *qp, return 0; err_remove_mmap: - efa_qp_user_mmap_entries_remove(ucontext, qp); + efa_qp_user_mmap_entries_remove(qp); return -ENOMEM; } @@ -719,7 +716,7 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, return &qp->ibqp; err_remove_mmap_entries: - efa_qp_user_mmap_entries_remove(ucontext, qp); + efa_qp_user_mmap_entries_remove(qp); err_destroy_qp: efa_destroy_qp_handle(dev, create_qp_resp.qp_handle); err_free_mapped: @@ -1370,6 +1367,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, IB_ACCESS_LOCAL_WRITE | (is_rdma_read_cap(dev) ? IB_ACCESS_REMOTE_READ : 0); + access_flags &= ~IB_ACCESS_OPTIONAL; if (access_flags & ~supp_access_flags) { ibdev_dbg(&dev->ibdev, "Unsupported access flags[%#x], supported[%#x]\n", @@ -1608,13 +1606,12 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, err = -EINVAL; } - if (err) { + if (err) ibdev_dbg( &dev->ibdev, "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n", entry->address, rdma_entry->npages * PAGE_SIZE, entry->mmap_flag, err); - } rdma_user_mmap_entry_put(rdma_entry); return err; diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 9b1fb84a3d45..e0b1238d31df 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1685,6 +1685,14 @@ static u64 access_sw_pio_drain(const struct cntr_entry *entry, return dd->verbs_dev.n_piodrain; } +static u64 access_sw_ctx0_seq_drop(const struct cntr_entry *entry, + void *context, int vl, int mode, u64 data) +{ + struct hfi1_devdata *dd = context; + + return dd->ctx0_seq_drop; +} + static u64 access_sw_vtx_wait(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { @@ -4106,6 +4114,7 @@ def_access_ibp_counter(rc_crwaits); static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { [C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH), [C_RX_LEN_ERR] = RXE32_DEV_CNTR_ELEM(RxLenErr, RCV_LENGTH_ERR_CNT, CNTR_SYNTH), +[C_RX_SHORT_ERR] = RXE32_DEV_CNTR_ELEM(RxShrErr, RCV_SHORT_ERR_CNT, CNTR_SYNTH), [C_RX_ICRC_ERR] = RXE32_DEV_CNTR_ELEM(RxICrcErr, RCV_ICRC_ERR_CNT, CNTR_SYNTH), [C_RX_EBP] = RXE32_DEV_CNTR_ELEM(RxEbpCnt, RCV_EBP_CNT, CNTR_SYNTH), [C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT, @@ -4249,6 +4258,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { access_sw_cpu_intr), [C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL, access_sw_cpu_rcv_limit), +[C_SW_CTX0_SEQ_DROP] = CNTR_ELEM("SeqDrop0", 0, 0, CNTR_NORMAL, + access_sw_ctx0_seq_drop), [C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL, access_sw_vtx_wait), [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL, @@ -6862,7 +6873,7 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd) } rcvmask = HFI1_RCVCTRL_CTXT_ENB; /* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */ - rcvmask |= rcd->rcvhdrtail_kvaddr ? + rcvmask |= hfi1_rcvhdrtail_kvaddr(rcd) ? HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS; hfi1_rcvctrl(dd, rcvmask, rcd); hfi1_rcd_put(rcd); @@ -8394,20 +8405,62 @@ void force_recv_intr(struct hfi1_ctxtdata *rcd) static inline int check_packet_present(struct hfi1_ctxtdata *rcd) { u32 tail; - int present; - if (!rcd->rcvhdrtail_kvaddr) - present = (rcd->seq_cnt == - rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd)))); - else /* is RDMA rtail */ - present = (rcd->head != get_rcvhdrtail(rcd)); - - if (present) + if (hfi1_packet_present(rcd)) return 1; /* fall back to a CSR read, correct indpendent of DMA_RTAIL */ tail = (u32)read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL); - return rcd->head != tail; + return hfi1_rcd_head(rcd) != tail; +} + +/** + * Common code for receive contexts interrupt handlers. + * Update traces, increment kernel IRQ counter and + * setup ASPM when needed. + */ +static void receive_interrupt_common(struct hfi1_ctxtdata *rcd) +{ + struct hfi1_devdata *dd = rcd->dd; + + trace_hfi1_receive_interrupt(dd, rcd); + this_cpu_inc(*dd->int_counter); + aspm_ctx_disable(rcd); +} + +/** + * __hfi1_rcd_eoi_intr() - Make HW issue receive interrupt + * when there are packets present in the queue. When calling + * with interrupts enabled please use hfi1_rcd_eoi_intr. + * + * @rcd: valid receive context + */ +static void __hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd) +{ + clear_recv_intr(rcd); + if (check_packet_present(rcd)) + force_recv_intr(rcd); +} + +/** + * hfi1_rcd_eoi_intr() - End of Interrupt processing action + * + * @rcd: Ptr to hfi1_ctxtdata of receive context + * + * Hold IRQs so we can safely clear the interrupt and + * recheck for a packet that may have arrived after the previous + * check and the interrupt clear. If a packet arrived, force another + * interrupt. This routine can be called at the end of receive packet + * processing in interrupt service routines, interrupt service thread + * and softirqs + */ +static void hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd) +{ + unsigned long flags; + + local_irq_save(flags); + __hfi1_rcd_eoi_intr(rcd); + local_irq_restore(flags); } /* @@ -8421,13 +8474,9 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd) irqreturn_t receive_context_interrupt(int irq, void *data) { struct hfi1_ctxtdata *rcd = data; - struct hfi1_devdata *dd = rcd->dd; int disposition; - int present; - trace_hfi1_receive_interrupt(dd, rcd); - this_cpu_inc(*dd->int_counter); - aspm_ctx_disable(rcd); + receive_interrupt_common(rcd); /* receive interrupt remains blocked while processing packets */ disposition = rcd->do_interrupt(rcd, 0); @@ -8440,17 +8489,7 @@ irqreturn_t receive_context_interrupt(int irq, void *data) if (disposition == RCV_PKT_LIMIT) return IRQ_WAKE_THREAD; - /* - * The packet processor detected no more packets. Clear the receive - * interrupt and recheck for a packet packet that may have arrived - * after the previous check and interrupt clear. If a packet arrived, - * force another interrupt. - */ - clear_recv_intr(rcd); - present = check_packet_present(rcd); - if (present) - force_recv_intr(rcd); - + __hfi1_rcd_eoi_intr(rcd); return IRQ_HANDLED; } @@ -8461,24 +8500,11 @@ irqreturn_t receive_context_interrupt(int irq, void *data) irqreturn_t receive_context_thread(int irq, void *data) { struct hfi1_ctxtdata *rcd = data; - int present; /* receive interrupt is still blocked from the IRQ handler */ (void)rcd->do_interrupt(rcd, 1); - /* - * The packet processor will only return if it detected no more - * packets. Hold IRQs here so we can safely clear the interrupt and - * recheck for a packet that may have arrived after the previous - * check and the interrupt clear. If a packet arrived, force another - * interrupt. - */ - local_irq_disable(); - clear_recv_intr(rcd); - present = check_packet_present(rcd); - if (present) - force_recv_intr(rcd); - local_irq_enable(); + hfi1_rcd_eoi_intr(rcd); return IRQ_HANDLED; } @@ -10049,7 +10075,7 @@ u32 lrh_max_header_bytes(struct hfi1_devdata *dd) * the first kernel context would have been allocated by now so * we are guaranteed a valid value. */ - return (dd->rcd[0]->rcvhdrqentsize - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2; + return (get_hdrqentsize(dd->rcd[0]) - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2; } /* @@ -10094,7 +10120,7 @@ static void set_send_length(struct hfi1_pportdata *ppd) thres = min(sc_percent_to_threshold(dd->vld[i].sc, 50), sc_mtu_to_threshold(dd->vld[i].sc, dd->vld[i].mtu, - dd->rcd[0]->rcvhdrqentsize)); + get_hdrqentsize(dd->rcd[0]))); for (j = 0; j < INIT_SC_PER_VL; j++) sc_set_cr_threshold( pio_select_send_context_vl(dd, j, i), @@ -11821,7 +11847,7 @@ u32 hdrqempty(struct hfi1_ctxtdata *rcd) head = (read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD) & RCV_HDR_HEAD_HEAD_SMASK) >> RCV_HDR_HEAD_HEAD_SHIFT; - if (rcd->rcvhdrtail_kvaddr) + if (hfi1_rcvhdrtail_kvaddr(rcd)) tail = get_rcvhdrtail(rcd); else tail = read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL); @@ -11865,6 +11891,84 @@ static u32 encoded_size(u32 size) return 0x1; /* if invalid, go with the minimum size */ } +/** + * encode_rcv_header_entry_size - return chip specific encoding for size + * @size: size in dwords + * + * Convert a receive header entry size that to the encoding used in the CSR. + * + * Return a zero if the given size is invalid, otherwise the encoding. + */ +u8 encode_rcv_header_entry_size(u8 size) +{ + /* there are only 3 valid receive header entry sizes */ + if (size == 2) + return 1; + if (size == 16) + return 2; + if (size == 32) + return 4; + return 0; /* invalid */ +} + +/** + * hfi1_validate_rcvhdrcnt - validate hdrcnt + * @dd: the device data + * @thecnt: the header count + */ +int hfi1_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt) +{ + if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) { + dd_dev_err(dd, "Receive header queue count too small\n"); + return -EINVAL; + } + + if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) { + dd_dev_err(dd, + "Receive header queue count cannot be greater than %u\n", + HFI1_MAX_HDRQ_EGRBUF_CNT); + return -EINVAL; + } + + if (thecnt % HDRQ_INCREMENT) { + dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n", + thecnt, HDRQ_INCREMENT); + return -EINVAL; + } + + return 0; +} + +/** + * set_hdrq_regs - set header queue registers for context + * @dd: the device data + * @ctxt: the context + * @entsize: the dword entry size + * @hdrcnt: the number of header entries + */ +void set_hdrq_regs(struct hfi1_devdata *dd, u8 ctxt, u8 entsize, u16 hdrcnt) +{ + u64 reg; + + reg = (((u64)hdrcnt >> HDRQ_SIZE_SHIFT) & RCV_HDR_CNT_CNT_MASK) << + RCV_HDR_CNT_CNT_SHIFT; + write_kctxt_csr(dd, ctxt, RCV_HDR_CNT, reg); + reg = ((u64)encode_rcv_header_entry_size(entsize) & + RCV_HDR_ENT_SIZE_ENT_SIZE_MASK) << + RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT; + write_kctxt_csr(dd, ctxt, RCV_HDR_ENT_SIZE, reg); + reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK) << + RCV_HDR_SIZE_HDR_SIZE_SHIFT; + write_kctxt_csr(dd, ctxt, RCV_HDR_SIZE, reg); + + /* + * Program dummy tail address for every receive context + * before enabling any receive context + */ + write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, + dd->rcvhdrtail_dummy_dma); +} + void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, struct hfi1_ctxtdata *rcd) { @@ -11886,13 +11990,13 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, /* reset the tail and hdr addresses, and sequence count */ write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR, rcd->rcvhdrq_dma); - if (rcd->rcvhdrtail_kvaddr) + if (hfi1_rcvhdrtail_kvaddr(rcd)) write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, rcd->rcvhdrqtailaddr_dma); - rcd->seq_cnt = 1; + hfi1_set_seq_cnt(rcd, 1); /* reset the cached receive header queue head value */ - rcd->head = 0; + hfi1_set_rcd_head(rcd, 0); /* * Zero the receive header queue so we don't get false @@ -11972,7 +12076,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, IS_RCVAVAIL_START + rcd->ctxt, false); rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK; } - if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr) + if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && hfi1_rcvhdrtail_kvaddr(rcd)) rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK; if (op & HFI1_RCVCTRL_TAILUPD_DIS) { /* See comment on RcvCtxtCtrl.TailUpd above */ diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 4ca5ac8d7e9e..725509261016 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -358,6 +358,8 @@ #define MAX_EAGER_BUFFER (256 * 1024) #define MAX_EAGER_BUFFER_TOTAL (64 * (1 << 20)) /* max per ctxt 64MB */ #define MAX_EXPECTED_BUFFER (2048 * 1024) +#define HFI1_MIN_HDRQ_EGRBUF_CNT 32 +#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352 /* * Receive expected base and count and eager base and count increment - @@ -699,6 +701,10 @@ static inline u32 chip_rcv_array_count(struct hfi1_devdata *dd) return read_csr(dd, RCV_ARRAY_CNT); } +u8 encode_rcv_header_entry_size(u8 size); +int hfi1_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt); +void set_hdrq_regs(struct hfi1_devdata *dd, u8 ctxt, u8 entsize, u16 hdrcnt); + u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl, u32 dw_len); @@ -859,6 +865,7 @@ static inline int idx_from_vl(int vl) enum { C_RCV_OVF = 0, C_RX_LEN_ERR, + C_RX_SHORT_ERR, C_RX_ICRC_ERR, C_RX_EBP, C_RX_TID_FULL, @@ -926,6 +933,7 @@ enum { C_DC_PG_STS_TX_MBE_CNT, C_SW_CPU_INTR, C_SW_CPU_RCV_LIM, + C_SW_CTX0_SEQ_DROP, C_SW_VTX_WAIT, C_SW_PIO_WAIT, C_SW_PIO_DRAIN, diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index ab3589d17aee..fb3ec9bff7a2 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -381,6 +381,7 @@ #define DC_LCB_STS_LINK_TRANSFER_ACTIVE (DC_LCB_CSRS + 0x000000000468) #define DC_LCB_STS_ROUND_TRIP_LTP_CNT (DC_LCB_CSRS + 0x0000000004B0) #define RCV_LENGTH_ERR_CNT 0 +#define RCV_SHORT_ERR_CNT 2 #define RCV_ICRC_ERR_CNT 6 #define RCV_EBP_CNT 9 #define RCV_BUF_OVFL_CNT 10 diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index d47da7b0438f..40a1ff0c8a8e 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -323,6 +323,9 @@ struct diag_pkt { /* RHF receive type error - bypass packet errors */ #define RHF_RTE_BYPASS_NO_ERR 0x0 +/* MAX RcvSEQ */ +#define RHF_MAX_SEQ 13 + /* IB - LRH header constants */ #define HFI1_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */ #define HFI1_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */ diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index d268bf9c42ee..4633a0ce1a8c 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -379,7 +379,7 @@ static void *_rcds_seq_next(struct seq_file *s, void *v, loff_t *pos) struct hfi1_devdata *dd = dd_from_dev(ibd); ++*pos; - if (!dd->rcd || *pos >= dd->n_krcv_queues) + if (!dd->rcd || *pos >= dd->num_rcv_contexts) return NULL; return pos; } diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 01aa1f132f55..049d15befe58 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -411,14 +411,14 @@ drop: static inline void init_packet(struct hfi1_ctxtdata *rcd, struct hfi1_packet *packet) { - packet->rsize = rcd->rcvhdrqentsize; /* words */ - packet->maxcnt = rcd->rcvhdrq_cnt * packet->rsize; /* words */ + packet->rsize = get_hdrqentsize(rcd); /* words */ + packet->maxcnt = get_hdrq_cnt(rcd) * packet->rsize; /* words */ packet->rcd = rcd; packet->updegr = 0; packet->etail = -1; packet->rhf_addr = get_rhf_addr(rcd); packet->rhf = rhf_to_cpu(packet->rhf_addr); - packet->rhqoff = rcd->head; + packet->rhqoff = hfi1_rcd_head(rcd); packet->numpkt = 0; } @@ -551,22 +551,22 @@ static inline void init_ps_mdata(struct ps_mdata *mdata, mdata->maxcnt = packet->maxcnt; mdata->ps_head = packet->rhqoff; - if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) { + if (get_dma_rtail_setting(rcd)) { mdata->ps_tail = get_rcvhdrtail(rcd); if (rcd->ctxt == HFI1_CTRL_CTXT) - mdata->ps_seq = rcd->seq_cnt; + mdata->ps_seq = hfi1_seq_cnt(rcd); else mdata->ps_seq = 0; /* not used with DMA_RTAIL */ } else { mdata->ps_tail = 0; /* used only with DMA_RTAIL*/ - mdata->ps_seq = rcd->seq_cnt; + mdata->ps_seq = hfi1_seq_cnt(rcd); } } static inline int ps_done(struct ps_mdata *mdata, u64 rhf, struct hfi1_ctxtdata *rcd) { - if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) + if (get_dma_rtail_setting(rcd)) return mdata->ps_head == mdata->ps_tail; return mdata->ps_seq != rhf_rcv_seq(rhf); } @@ -592,11 +592,9 @@ static inline void update_ps_mdata(struct ps_mdata *mdata, mdata->ps_head = 0; /* Control context must do seq counting */ - if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) || - (rcd->ctxt == HFI1_CTRL_CTXT)) { - if (++mdata->ps_seq > 13) - mdata->ps_seq = 1; - } + if (!get_dma_rtail_setting(rcd) || + rcd->ctxt == HFI1_CTRL_CTXT) + mdata->ps_seq = hfi1_seq_incr_wrap(mdata->ps_seq); } /* @@ -734,6 +732,7 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread) { int ret; + packet->rcd->dd->ctx0_seq_drop++; /* Set up for the next packet */ packet->rhqoff += packet->rsize; if (packet->rhqoff >= packet->maxcnt) @@ -769,7 +768,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) * The +2 is the size of the RHF. */ prefetch_range(packet->ebuf, - packet->tlen - ((packet->rcd->rcvhdrqentsize - + packet->tlen - ((get_hdrqentsize(packet->rcd) - (rhf_hdrq_offset(packet->rhf) + 2)) * 4)); } @@ -823,7 +822,7 @@ static inline void finish_packet(struct hfi1_packet *packet) * The only thing we need to do is a final update and call for an * interrupt */ - update_usrhead(packet->rcd, packet->rcd->head, packet->updegr, + update_usrhead(packet->rcd, hfi1_rcd_head(packet->rcd), packet->updegr, packet->etail, rcv_intr_dynamic, packet->numpkt); } @@ -832,13 +831,11 @@ static inline void finish_packet(struct hfi1_packet *packet) */ int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread) { - u32 seq; int last = RCV_PKT_OK; struct hfi1_packet packet; init_packet(rcd, &packet); - seq = rhf_rcv_seq(packet.rhf); - if (seq != rcd->seq_cnt) { + if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf))) { last = RCV_PKT_DONE; goto bail; } @@ -847,15 +844,12 @@ int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread) while (last == RCV_PKT_OK) { last = process_rcv_packet(&packet, thread); - seq = rhf_rcv_seq(packet.rhf); - if (++rcd->seq_cnt > 13) - rcd->seq_cnt = 1; - if (seq != rcd->seq_cnt) + if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf))) last = RCV_PKT_DONE; process_rcv_update(last, &packet); } process_rcv_qp_work(&packet); - rcd->head = packet.rhqoff; + hfi1_set_rcd_head(rcd, packet.rhqoff); bail: finish_packet(&packet); return last; @@ -884,15 +878,14 @@ int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread) process_rcv_update(last, &packet); } process_rcv_qp_work(&packet); - rcd->head = packet.rhqoff; + hfi1_set_rcd_head(rcd, packet.rhqoff); bail: finish_packet(&packet); return last; } -static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt) +static void set_all_fastpath(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) { - struct hfi1_ctxtdata *rcd; u16 i; /* @@ -900,50 +893,17 @@ static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt) * interrupt handler only for that context. Otherwise, switch * interrupt handler for all statically allocated kernel contexts. */ - if (ctxt >= dd->first_dyn_alloc_ctxt) { - rcd = hfi1_rcd_get_by_index_safe(dd, ctxt); - if (rcd) { - rcd->do_interrupt = - &handle_receive_interrupt_nodma_rtail; - hfi1_rcd_put(rcd); - } - return; - } - - for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) { - rcd = hfi1_rcd_get_by_index(dd, i); - if (rcd) - rcd->do_interrupt = - &handle_receive_interrupt_nodma_rtail; + if (rcd->ctxt >= dd->first_dyn_alloc_ctxt && !rcd->is_vnic) { + hfi1_rcd_get(rcd); + hfi1_set_fast(rcd); hfi1_rcd_put(rcd); - } -} - -static inline void set_dma_rtail(struct hfi1_devdata *dd, u16 ctxt) -{ - struct hfi1_ctxtdata *rcd; - u16 i; - - /* - * For dynamically allocated kernel contexts (like vnic) switch - * interrupt handler only for that context. Otherwise, switch - * interrupt handler for all statically allocated kernel contexts. - */ - if (ctxt >= dd->first_dyn_alloc_ctxt) { - rcd = hfi1_rcd_get_by_index_safe(dd, ctxt); - if (rcd) { - rcd->do_interrupt = - &handle_receive_interrupt_dma_rtail; - hfi1_rcd_put(rcd); - } return; } - for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) { + for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) { rcd = hfi1_rcd_get_by_index(dd, i); - if (rcd) - rcd->do_interrupt = - &handle_receive_interrupt_dma_rtail; + if (rcd && (i < dd->first_dyn_alloc_ctxt || rcd->is_vnic)) + hfi1_set_fast(rcd); hfi1_rcd_put(rcd); } } @@ -959,17 +919,14 @@ void set_all_slowpath(struct hfi1_devdata *dd) if (!rcd) continue; if (i < dd->first_dyn_alloc_ctxt || rcd->is_vnic) - rcd->do_interrupt = &handle_receive_interrupt; + rcd->do_interrupt = rcd->slow_handler; hfi1_rcd_put(rcd); } } -static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, - struct hfi1_packet *packet, - struct hfi1_devdata *dd) +static bool __set_armed_to_active(struct hfi1_packet *packet) { - struct work_struct *lsaw = &rcd->ppd->linkstate_active_work; u8 etype = rhf_rcv_type(packet->rhf); u8 sc = SC15_PACKET; @@ -984,19 +941,34 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, sc = hfi1_16B_get_sc(hdr); } if (sc != SC15_PACKET) { - int hwstate = driver_lstate(rcd->ppd); + int hwstate = driver_lstate(packet->rcd->ppd); + struct work_struct *lsaw = + &packet->rcd->ppd->linkstate_active_work; if (hwstate != IB_PORT_ACTIVE) { - dd_dev_info(dd, + dd_dev_info(packet->rcd->dd, "Unexpected link state %s\n", opa_lstate_name(hwstate)); - return 0; + return false; } - queue_work(rcd->ppd->link_wq, lsaw); - return 1; + queue_work(packet->rcd->ppd->link_wq, lsaw); + return true; } - return 0; + return false; +} + +/** + * armed to active - the fast path for armed to active + * @packet: the packet structure + * + * Return true if packet processing needs to bail. + */ +static bool set_armed_to_active(struct hfi1_packet *packet) +{ + if (likely(packet->rcd->ppd->host_link_state != HLS_UP_ARMED)) + return false; + return __set_armed_to_active(packet); } /* @@ -1019,10 +991,8 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) init_packet(rcd, &packet); - if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) { - u32 seq = rhf_rcv_seq(packet.rhf); - - if (seq != rcd->seq_cnt) { + if (!get_dma_rtail_setting(rcd)) { + if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf))) { last = RCV_PKT_DONE; goto bail; } @@ -1039,22 +1009,15 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) * Control context can potentially receive an invalid * rhf. Drop such packets. */ - if (rcd->ctxt == HFI1_CTRL_CTXT) { - u32 seq = rhf_rcv_seq(packet.rhf); - - if (seq != rcd->seq_cnt) + if (rcd->ctxt == HFI1_CTRL_CTXT) + if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf))) skip_pkt = 1; - } } prescan_rxq(rcd, &packet); while (last == RCV_PKT_OK) { - if (unlikely(dd->do_drop && - atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) == - DROP_PACKET_ON)) { - dd->do_drop = 0; - + if (hfi1_need_drop(dd)) { /* On to the next packet */ packet.rhqoff += packet.rsize; packet.rhf_addr = (__le32 *)rcd->rcvhdrq + @@ -1066,26 +1029,14 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) last = skip_rcv_packet(&packet, thread); skip_pkt = 0; } else { - /* Auto activate link on non-SC15 packet receive */ - if (unlikely(rcd->ppd->host_link_state == - HLS_UP_ARMED) && - set_armed_to_active(rcd, &packet, dd)) + if (set_armed_to_active(&packet)) goto bail; last = process_rcv_packet(&packet, thread); } - if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) { - u32 seq = rhf_rcv_seq(packet.rhf); - - if (++rcd->seq_cnt > 13) - rcd->seq_cnt = 1; - if (seq != rcd->seq_cnt) + if (!get_dma_rtail_setting(rcd)) { + if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf))) last = RCV_PKT_DONE; - if (needset) { - dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n"); - set_nodma_rtail(dd, rcd->ctxt); - needset = 0; - } } else { if (packet.rhqoff == hdrqtail) last = RCV_PKT_DONE; @@ -1094,27 +1045,24 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) * rhf. Drop such packets. */ if (rcd->ctxt == HFI1_CTRL_CTXT) { - u32 seq = rhf_rcv_seq(packet.rhf); + bool lseq; - if (++rcd->seq_cnt > 13) - rcd->seq_cnt = 1; - if (!last && (seq != rcd->seq_cnt)) + lseq = hfi1_seq_incr(rcd, + rhf_rcv_seq(packet.rhf)); + if (!last && lseq) skip_pkt = 1; } - - if (needset) { - dd_dev_info(dd, - "Switching to DMA_RTAIL\n"); - set_dma_rtail(dd, rcd->ctxt); - needset = 0; - } } + if (needset) { + needset = false; + set_all_fastpath(dd, rcd); + } process_rcv_update(last, &packet); } process_rcv_qp_work(&packet); - rcd->head = packet.rhqoff; + hfi1_set_rcd_head(rcd, packet.rhqoff); bail: /* @@ -1606,23 +1554,22 @@ void handle_eflags(struct hfi1_packet *packet) * The following functions are called by the interrupt handler. They are type * specific handlers for each packet type. */ -static int process_receive_ib(struct hfi1_packet *packet) +static void process_receive_ib(struct hfi1_packet *packet) { if (hfi1_setup_9B_packet(packet)) - return RHF_RCV_CONTINUE; + return; if (unlikely(hfi1_dbg_should_fault_rx(packet))) - return RHF_RCV_CONTINUE; + return; trace_hfi1_rcvhdr(packet); if (unlikely(rhf_err_flags(packet->rhf))) { handle_eflags(packet); - return RHF_RCV_CONTINUE; + return; } hfi1_ib_rcv(packet); - return RHF_RCV_CONTINUE; } static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet) @@ -1638,23 +1585,23 @@ static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet) return false; } -static int process_receive_bypass(struct hfi1_packet *packet) +static void process_receive_bypass(struct hfi1_packet *packet) { struct hfi1_devdata *dd = packet->rcd->dd; if (hfi1_is_vnic_packet(packet)) { hfi1_vnic_bypass_rcv(packet); - return RHF_RCV_CONTINUE; + return; } if (hfi1_setup_bypass_packet(packet)) - return RHF_RCV_CONTINUE; + return; trace_hfi1_rcvhdr(packet); if (unlikely(rhf_err_flags(packet->rhf))) { handle_eflags(packet); - return RHF_RCV_CONTINUE; + return; } if (hfi1_16B_get_l2(packet->hdr) == 0x2) { @@ -1677,17 +1624,16 @@ static int process_receive_bypass(struct hfi1_packet *packet) (OPA_EI_STATUS_SMASK | BAD_L2_ERR); } } - return RHF_RCV_CONTINUE; } -static int process_receive_error(struct hfi1_packet *packet) +static void process_receive_error(struct hfi1_packet *packet) { /* KHdrHCRCErr -- KDETH packet with a bad HCRC */ if (unlikely( hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) && (rhf_rcv_type_err(packet->rhf) == RHF_RCV_TYPE_ERROR || packet->rhf & RHF_DC_ERR))) - return RHF_RCV_CONTINUE; + return; hfi1_setup_ib_header(packet); handle_eflags(packet); @@ -1695,32 +1641,29 @@ static int process_receive_error(struct hfi1_packet *packet) if (unlikely(rhf_err_flags(packet->rhf))) dd_dev_err(packet->rcd->dd, "Unhandled error packet received. Dropping.\n"); - - return RHF_RCV_CONTINUE; } -static int kdeth_process_expected(struct hfi1_packet *packet) +static void kdeth_process_expected(struct hfi1_packet *packet) { hfi1_setup_9B_packet(packet); if (unlikely(hfi1_dbg_should_fault_rx(packet))) - return RHF_RCV_CONTINUE; + return; if (unlikely(rhf_err_flags(packet->rhf))) { struct hfi1_ctxtdata *rcd = packet->rcd; if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet)) - return RHF_RCV_CONTINUE; + return; } hfi1_kdeth_expected_rcv(packet); - return RHF_RCV_CONTINUE; } -static int kdeth_process_eager(struct hfi1_packet *packet) +static void kdeth_process_eager(struct hfi1_packet *packet) { hfi1_setup_9B_packet(packet); if (unlikely(hfi1_dbg_should_fault_rx(packet))) - return RHF_RCV_CONTINUE; + return; trace_hfi1_rcvhdr(packet); if (unlikely(rhf_err_flags(packet->rhf))) { @@ -1728,37 +1671,41 @@ static int kdeth_process_eager(struct hfi1_packet *packet) show_eflags_errs(packet); if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet)) - return RHF_RCV_CONTINUE; + return; } hfi1_kdeth_eager_rcv(packet); - return RHF_RCV_CONTINUE; } -static int process_receive_invalid(struct hfi1_packet *packet) +static void process_receive_invalid(struct hfi1_packet *packet) { dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n", rhf_rcv_type(packet->rhf)); - return RHF_RCV_CONTINUE; } +#define HFI1_RCVHDR_DUMP_MAX 5 + void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd) { struct hfi1_packet packet; struct ps_mdata mdata; + int i; - seq_printf(s, "Rcd %u: RcvHdr cnt %u entsize %u %s head %llu tail %llu\n", - rcd->ctxt, rcd->rcvhdrq_cnt, rcd->rcvhdrqentsize, - HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ? + seq_printf(s, "Rcd %u: RcvHdr cnt %u entsize %u %s ctrl 0x%08llx status 0x%08llx, head %llu tail %llu sw head %u\n", + rcd->ctxt, get_hdrq_cnt(rcd), get_hdrqentsize(rcd), + get_dma_rtail_setting(rcd) ? "dma_rtail" : "nodma_rtail", + read_kctxt_csr(rcd->dd, rcd->ctxt, RCV_CTXT_CTRL), + read_kctxt_csr(rcd->dd, rcd->ctxt, RCV_CTXT_STATUS), read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD) & RCV_HDR_HEAD_HEAD_MASK, - read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL)); + read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL), + rcd->head); init_packet(rcd, &packet); init_ps_mdata(&mdata, &packet); - while (1) { + for (i = 0; i < HFI1_RCVHDR_DUMP_MAX; i++) { __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head + rcd->rhf_offset; struct ib_header *hdr; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 7c5e3fb22413..bef6946861b2 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -505,12 +505,12 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) ret = -EINVAL; goto done; } - if ((flags & VM_WRITE) || !uctxt->rcvhdrtail_kvaddr) { + if ((flags & VM_WRITE) || !hfi1_rcvhdrtail_kvaddr(uctxt)) { ret = -EPERM; goto done; } memlen = PAGE_SIZE; - memvirt = (void *)uctxt->rcvhdrtail_kvaddr; + memvirt = (void *)hfi1_rcvhdrtail_kvaddr(uctxt); flags &= ~VM_MAYWRITE; break; case SUBCTXT_UREGS: @@ -1090,7 +1090,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt) * don't have to wait to be sure the DMA update has happened * (chip resets head/tail to 0 on transition to enable). */ - if (uctxt->rcvhdrtail_kvaddr) + if (hfi1_rcvhdrtail_kvaddr(uctxt)) clear_rcvhdrtail(uctxt); /* Setup J_KEY before enabling the context */ @@ -1154,8 +1154,8 @@ static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len) cinfo.send_ctxt = uctxt->sc->hw_context; cinfo.egrtids = uctxt->egrbufs.alloced; - cinfo.rcvhdrq_cnt = uctxt->rcvhdrq_cnt; - cinfo.rcvhdrq_entsize = uctxt->rcvhdrqentsize << 2; + cinfo.rcvhdrq_cnt = get_hdrq_cnt(uctxt); + cinfo.rcvhdrq_entsize = get_hdrqentsize(uctxt) << 2; cinfo.sdma_ring_size = fd->cq->nentries; cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size; @@ -1543,7 +1543,7 @@ static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, * always resets it's tail register back to 0 on a * transition from disabled to enabled. */ - if (uctxt->rcvhdrtail_kvaddr) + if (hfi1_rcvhdrtail_kvaddr(uctxt)) clear_rcvhdrtail(uctxt); rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB; } else { diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index fc10d65fc3e1..6365e8ffed9d 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -197,7 +197,9 @@ struct exp_tid_set { u32 count; }; -typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet); +struct hfi1_ctxtdata; +typedef int (*intr_handler)(struct hfi1_ctxtdata *rcd, int data); +typedef void (*rhf_rcv_function_ptr)(struct hfi1_packet *packet); struct tid_queue { struct list_head queue_head; @@ -226,7 +228,11 @@ struct hfi1_ctxtdata { * be valid. Worst case is we process an extra interrupt and up to 64 * packets with the wrong interrupt handler. */ - int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded); + intr_handler do_interrupt; + /** fast handler after autoactive */ + intr_handler fast_handler; + /** slow handler */ + intr_handler slow_handler; /* verbs rx_stats per rcd */ struct hfi1_opcode_stats_perctx *opstats; /* clear interrupt mask */ @@ -1153,6 +1159,8 @@ struct hfi1_devdata { char *boardname; /* human readable board info */ + u64 ctx0_seq_drop; + /* reset value */ u64 z_int_counter; u64 z_rcv_limit; @@ -1310,7 +1318,7 @@ struct hfi1_devdata { struct err_info_constraint err_info_xmit_constraint; atomic_t drop_packet; - u8 do_drop; + bool do_drop; u8 err_info_uncorrectable; u8 err_info_fmconfig; @@ -1507,12 +1515,148 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, #define RCV_PKT_LIMIT 0x1 /* stop, hit limit, start thread */ #define RCV_PKT_DONE 0x2 /* stop, no more packets detected */ +/** + * hfi1_rcd_head - add accessor for rcd head + * @rcd: the context + */ +static inline u32 hfi1_rcd_head(struct hfi1_ctxtdata *rcd) +{ + return rcd->head; +} + +/** + * hfi1_set_rcd_head - add accessor for rcd head + * @rcd: the context + * @head: the new head + */ +static inline void hfi1_set_rcd_head(struct hfi1_ctxtdata *rcd, u32 head) +{ + rcd->head = head; +} + /* calculate the current RHF address */ static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd) { return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->rhf_offset; } +/* return DMA_RTAIL configuration */ +static inline bool get_dma_rtail_setting(struct hfi1_ctxtdata *rcd) +{ + return !!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL); +} + +/** + * hfi1_seq_incr_wrap - wrapping increment for sequence + * @seq: the current sequence number + * + * Returns: the incremented seq + */ +static inline u8 hfi1_seq_incr_wrap(u8 seq) +{ + if (++seq > RHF_MAX_SEQ) + seq = 1; + return seq; +} + +/** + * hfi1_seq_cnt - return seq_cnt member + * @rcd: the receive context + * + * Return seq_cnt member + */ +static inline u8 hfi1_seq_cnt(struct hfi1_ctxtdata *rcd) +{ + return rcd->seq_cnt; +} + +/** + * hfi1_set_seq_cnt - return seq_cnt member + * @rcd: the receive context + * + * Return seq_cnt member + */ +static inline void hfi1_set_seq_cnt(struct hfi1_ctxtdata *rcd, u8 cnt) +{ + rcd->seq_cnt = cnt; +} + +/** + * last_rcv_seq - is last + * @rcd: the receive context + * @seq: sequence + * + * return true if last packet + */ +static inline bool last_rcv_seq(struct hfi1_ctxtdata *rcd, u32 seq) +{ + return seq != rcd->seq_cnt; +} + +/** + * rcd_seq_incr - increment context sequence number + * @rcd: the receive context + * @seq: the current sequence number + * + * Returns: true if the this was the last packet + */ +static inline bool hfi1_seq_incr(struct hfi1_ctxtdata *rcd, u32 seq) +{ + rcd->seq_cnt = hfi1_seq_incr_wrap(rcd->seq_cnt); + return last_rcv_seq(rcd, seq); +} + +/** + * get_hdrqentsize - return hdrq entry size + * @rcd: the receive context + */ +static inline u8 get_hdrqentsize(struct hfi1_ctxtdata *rcd) +{ + return rcd->rcvhdrqentsize; +} + +/** + * get_hdrq_cnt - return hdrq count + * @rcd: the receive context + */ +static inline u16 get_hdrq_cnt(struct hfi1_ctxtdata *rcd) +{ + return rcd->rcvhdrq_cnt; +} + +/** + * hfi1_is_slowpath - check if this context is slow path + * @rcd: the receive context + */ +static inline bool hfi1_is_slowpath(struct hfi1_ctxtdata *rcd) +{ + return rcd->do_interrupt == rcd->slow_handler; +} + +/** + * hfi1_is_fastpath - check if this context is fast path + * @rcd: the receive context + */ +static inline bool hfi1_is_fastpath(struct hfi1_ctxtdata *rcd) +{ + if (rcd->ctxt == HFI1_CTRL_CTXT) + return false; + + return rcd->do_interrupt == rcd->fast_handler; +} + +/** + * hfi1_set_fast - change to the fast handler + * @rcd: the receive context + */ +static inline void hfi1_set_fast(struct hfi1_ctxtdata *rcd) +{ + if (unlikely(!rcd)) + return; + if (unlikely(!hfi1_is_fastpath(rcd))) + rcd->do_interrupt = rcd->fast_handler; +} + int hfi1_reset_device(int); void receive_interrupt_work(struct work_struct *work); @@ -2015,9 +2159,21 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, void hfi1_release_user_pages(struct mm_struct *mm, struct page **p, size_t npages, bool dirty); +/** + * hfi1_rcvhdrtail_kvaddr - return tail kvaddr + * @rcd - the receive context + */ +static inline __le64 *hfi1_rcvhdrtail_kvaddr(const struct hfi1_ctxtdata *rcd) +{ + return (__le64 *)rcd->rcvhdrtail_kvaddr; +} + static inline void clear_rcvhdrtail(const struct hfi1_ctxtdata *rcd) { - *((u64 *)rcd->rcvhdrtail_kvaddr) = 0ULL; + u64 *kv = (u64 *)hfi1_rcvhdrtail_kvaddr(rcd); + + if (kv) + *kv = 0ULL; } static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd) @@ -2026,7 +2182,17 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd) * volatile because it's a DMA target from the chip, routine is * inlined, and don't want register caching or reordering. */ - return (u32)le64_to_cpu(*rcd->rcvhdrtail_kvaddr); + return (u32)le64_to_cpu(*hfi1_rcvhdrtail_kvaddr(rcd)); +} + +static inline bool hfi1_packet_present(struct hfi1_ctxtdata *rcd) +{ + if (likely(!rcd->rcvhdrtail_kvaddr)) { + u32 seq = rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))); + + return !last_rcv_seq(rcd, seq); + } + return hfi1_rcd_head(rcd) != get_rcvhdrtail(rcd); } /* @@ -2298,6 +2464,25 @@ static inline bool is_integrated(struct hfi1_devdata *dd) return dd->pcidev->device == PCI_DEVICE_ID_INTEL1; } +/** + * hfi1_need_drop - detect need for drop + * @dd: - the device + * + * In some cases, the first packet needs to be dropped. + * + * Return true is the current packet needs to be dropped and false otherwise. + */ +static inline bool hfi1_need_drop(struct hfi1_devdata *dd) +{ + if (unlikely(dd->do_drop && + atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) == + DROP_PACKET_ON)) { + dd->do_drop = false; + return true; + } + return false; +} + int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); #define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev)) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 26b792bb1027..e3acda7a0800 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -78,8 +78,6 @@ */ #define HFI1_MIN_USER_CTXT_BUFCNT 7 -#define HFI1_MIN_HDRQ_EGRBUF_CNT 2 -#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352 #define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */ #define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */ @@ -122,8 +120,6 @@ unsigned int user_credit_return_threshold = 33; /* default is 33% */ module_param(user_credit_return_threshold, uint, S_IRUGO); MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user send contexts, return when unreturned credits passes this many blocks (in percent of allocated blocks, 0 is off)"); -static inline u64 encode_rcv_header_entry_size(u16 size); - DEFINE_XARRAY_FLAGS(hfi1_dev_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); static int hfi1_create_kctxt(struct hfi1_devdata *dd, @@ -154,7 +150,12 @@ static int hfi1_create_kctxt(struct hfi1_devdata *dd, /* Control context must use DMA_RTAIL */ if (rcd->ctxt == HFI1_CTRL_CTXT) rcd->flags |= HFI1_CAP_DMA_RTAIL; - rcd->seq_cnt = 1; + rcd->fast_handler = get_dma_rtail_setting(rcd) ? + handle_receive_interrupt_dma_rtail : + handle_receive_interrupt_nodma_rtail; + rcd->slow_handler = handle_receive_interrupt; + + hfi1_set_seq_cnt(rcd, 1); rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node); if (!rcd->sc) { @@ -511,23 +512,6 @@ void hfi1_free_ctxt(struct hfi1_ctxtdata *rcd) } /* - * Convert a receive header entry size that to the encoding used in the CSR. - * - * Return a zero if the given size is invalid. - */ -static inline u64 encode_rcv_header_entry_size(u16 size) -{ - /* there are only 3 valid receive header entry sizes */ - if (size == 2) - return 1; - if (size == 16) - return 2; - else if (size == 32) - return 4; - return 0; /* invalid */ -} - -/* * Select the largest ccti value over all SLs to determine the intra- * packet gap for the link. * @@ -892,10 +876,10 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) if (is_ax(dd)) { atomic_set(&dd->drop_packet, DROP_PACKET_ON); - dd->do_drop = 1; + dd->do_drop = true; } else { atomic_set(&dd->drop_packet, DROP_PACKET_OFF); - dd->do_drop = 0; + dd->do_drop = false; } /* make sure the link is not "up" */ @@ -1149,9 +1133,9 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) dma_free_coherent(&dd->pcidev->dev, rcvhdrq_size(rcd), rcd->rcvhdrq, rcd->rcvhdrq_dma); rcd->rcvhdrq = NULL; - if (rcd->rcvhdrtail_kvaddr) { + if (hfi1_rcvhdrtail_kvaddr(rcd)) { dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, - (void *)rcd->rcvhdrtail_kvaddr, + (void *)hfi1_rcvhdrtail_kvaddr(rcd), rcd->rcvhdrqtailaddr_dma); rcd->rcvhdrtail_kvaddr = NULL; } @@ -1611,29 +1595,6 @@ static void postinit_cleanup(struct hfi1_devdata *dd) hfi1_free_devdata(dd); } -static int init_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt) -{ - if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) { - dd_dev_err(dd, "Receive header queue count too small\n"); - return -EINVAL; - } - - if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) { - dd_dev_err(dd, - "Receive header queue count cannot be greater than %u\n", - HFI1_MAX_HDRQ_EGRBUF_CNT); - return -EINVAL; - } - - if (thecnt % HDRQ_INCREMENT) { - dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n", - thecnt, HDRQ_INCREMENT); - return -EINVAL; - } - - return 0; -} - static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int ret = 0, j, pidx, initfail; @@ -1661,7 +1622,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } /* Validate some global module parameters */ - ret = init_validate_rcvhdrcnt(dd, rcvhdrcnt); + ret = hfi1_validate_rcvhdrcnt(dd, rcvhdrcnt); if (ret) goto bail; @@ -1842,7 +1803,6 @@ static void shutdown_one(struct pci_dev *pdev) int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) { unsigned amt; - u64 reg; if (!rcd->rcvhdrq) { gfp_t gfp_flags; @@ -1874,30 +1834,9 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) goto bail_free; } } - /* - * These values are per-context: - * RcvHdrCnt - * RcvHdrEntSize - * RcvHdrSize - */ - reg = ((u64)(rcd->rcvhdrq_cnt >> HDRQ_SIZE_SHIFT) - & RCV_HDR_CNT_CNT_MASK) - << RCV_HDR_CNT_CNT_SHIFT; - write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_CNT, reg); - reg = (encode_rcv_header_entry_size(rcd->rcvhdrqentsize) - & RCV_HDR_ENT_SIZE_ENT_SIZE_MASK) - << RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT; - write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_ENT_SIZE, reg); - reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK) - << RCV_HDR_SIZE_HDR_SIZE_SHIFT; - write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_SIZE, reg); - /* - * Program dummy tail address for every receive context - * before enabling any receive context - */ - write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_TAIL_ADDR, - dd->rcvhdrtail_dummy_dma); + set_hdrq_regs(rcd->dd, rcd->ctxt, rcd->rcvhdrqentsize, + rcd->rcvhdrq_cnt); return 0; diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c index d920b165d696..db82db497b2c 100644 --- a/drivers/infiniband/hw/hfi1/msix.c +++ b/drivers/infiniband/hw/hfi1/msix.c @@ -115,13 +115,11 @@ int msix_initialize(struct hfi1_devdata *dd) */ static int msix_request_irq(struct hfi1_devdata *dd, void *arg, irq_handler_t handler, irq_handler_t thread, - u32 idx, enum irq_type type) + enum irq_type type, const char *name) { unsigned long nr; int irq; int ret; - const char *err_info; - char name[MAX_NAME_SIZE]; struct hfi1_msix_entry *me; /* Allocate an MSIx vector */ @@ -135,43 +133,15 @@ static int msix_request_irq(struct hfi1_devdata *dd, void *arg, if (nr == dd->msix_info.max_requested) return -ENOSPC; - /* Specific verification and determine the name */ - switch (type) { - case IRQ_GENERAL: - /* general interrupt must be MSIx vector 0 */ - if (nr) { - spin_lock(&dd->msix_info.msix_lock); - __clear_bit(nr, dd->msix_info.in_use_msix); - spin_unlock(&dd->msix_info.msix_lock); - dd_dev_err(dd, "Invalid index %lu for GENERAL IRQ\n", - nr); - return -EINVAL; - } - snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit); - err_info = "general"; - break; - case IRQ_SDMA: - snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d", - dd->unit, idx); - err_info = "sdma"; - break; - case IRQ_RCVCTXT: - snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d", - dd->unit, idx); - err_info = "receive context"; - break; - case IRQ_OTHER: - default: + if (type < IRQ_SDMA || type >= IRQ_OTHER) return -EINVAL; - } - name[sizeof(name) - 1] = 0; irq = pci_irq_vector(dd->pcidev, nr); ret = pci_request_irq(dd->pcidev, nr, handler, thread, arg, name); if (ret) { dd_dev_err(dd, - "%s: request for IRQ %d failed, MSIx %d, err %d\n", - err_info, irq, idx, ret); + "%s: request for IRQ %d failed, MSIx %lu, err %d\n", + name, irq, nr, ret); spin_lock(&dd->msix_info.msix_lock); __clear_bit(nr, dd->msix_info.in_use_msix); spin_unlock(&dd->msix_info.msix_lock); @@ -195,17 +165,13 @@ static int msix_request_irq(struct hfi1_devdata *dd, void *arg, return nr; } -/** - * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs - * @rcd: valid rcd context - * - */ -int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd) +static int msix_request_rcd_irq_common(struct hfi1_ctxtdata *rcd, + irq_handler_t handler, + irq_handler_t thread, + const char *name) { - int nr; - - nr = msix_request_irq(rcd->dd, rcd, receive_context_interrupt, - receive_context_thread, rcd->ctxt, IRQ_RCVCTXT); + int nr = msix_request_irq(rcd->dd, rcd, handler, thread, + IRQ_RCVCTXT, name); if (nr < 0) return nr; @@ -222,6 +188,22 @@ int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd) } /** + * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs + * @rcd: valid rcd context + * + */ +int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd) +{ + char name[MAX_NAME_SIZE]; + + snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d", + rcd->dd->unit, rcd->ctxt); + + return msix_request_rcd_irq_common(rcd, receive_context_interrupt, + receive_context_thread, name); +} + +/** * msix_request_smda_ira() - Helper for getting SDMA IRQ resources * @sde: valid sdma engine * @@ -229,9 +211,12 @@ int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd) int msix_request_sdma_irq(struct sdma_engine *sde) { int nr; + char name[MAX_NAME_SIZE]; + snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d", + sde->dd->unit, sde->this_idx); nr = msix_request_irq(sde->dd, sde, sdma_interrupt, NULL, - sde->this_idx, IRQ_SDMA); + IRQ_SDMA, name); if (nr < 0) return nr; sde->msix_intr = nr; @@ -241,6 +226,32 @@ int msix_request_sdma_irq(struct sdma_engine *sde) } /** + * msix_request_general_irq(void) - Helper for getting general IRQ + * resources + * @dd: valid device data + */ +int msix_request_general_irq(struct hfi1_devdata *dd) +{ + int nr; + char name[MAX_NAME_SIZE]; + + snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit); + nr = msix_request_irq(dd, dd, general_interrupt, NULL, IRQ_GENERAL, + name); + if (nr < 0) + return nr; + + /* general interrupt must be MSIx vector 0 */ + if (nr) { + msix_free_irq(dd, (u8)nr); + dd_dev_err(dd, "Invalid index %d for GENERAL IRQ\n", nr); + return -EINVAL; + } + + return 0; +} + +/** * enable_sdma_src() - Helper to enable SDMA IRQ srcs * @dd: valid devdata structure * @i: index of SDMA engine @@ -265,10 +276,9 @@ static void enable_sdma_srcs(struct hfi1_devdata *dd, int i) int msix_request_irqs(struct hfi1_devdata *dd) { int i; - int ret; + int ret = msix_request_general_irq(dd); - ret = msix_request_irq(dd, dd, general_interrupt, NULL, 0, IRQ_GENERAL); - if (ret < 0) + if (ret) return ret; for (i = 0; i < dd->num_sdma; i++) { diff --git a/drivers/infiniband/hw/hfi1/msix.h b/drivers/infiniband/hw/hfi1/msix.h index a514881632a4..1a02ab7971c8 100644 --- a/drivers/infiniband/hw/hfi1/msix.h +++ b/drivers/infiniband/hw/hfi1/msix.h @@ -54,6 +54,7 @@ int msix_initialize(struct hfi1_devdata *dd); int msix_request_irqs(struct hfi1_devdata *dd); void msix_clean_up_interrupts(struct hfi1_devdata *dd); +int msix_request_general_irq(struct hfi1_devdata *dd); int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd); int msix_request_sdma_irq(struct sdma_engine *sde); void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr); diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 1a3c647675a7..f1734e5e9ac4 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -2599,7 +2599,7 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data, * to be sent before sending this one. */ e = NULL; - old_req = 1; + old_req = true; ibp->rvp.n_rc_dupreq++; spin_lock_irqsave(&qp->s_lock, flags); diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h index e00c8a7d559c..b5fc5c6cd52f 100644 --- a/drivers/infiniband/hw/hfi1/trace_ctxts.h +++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h @@ -80,7 +80,7 @@ TRACE_EVENT(hfi1_uctxtdata, __entry->credits = uctxt->sc->credits; __entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free); __entry->piobase = uctxt->sc->base_addr; - __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt; + __entry->rcvhdrq_cnt = get_hdrq_cnt(uctxt); __entry->rcvhdrq_dma = uctxt->rcvhdrq_dma; __entry->eager_cnt = uctxt->egrbufs.alloced; __entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma; diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index 3cec960e9674..168079ed122c 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -106,19 +106,8 @@ TRACE_EVENT(hfi1_receive_interrupt, ), TP_fast_assign(DD_DEV_ASSIGN(dd); __entry->ctxt = rcd->ctxt; - if (rcd->do_interrupt == - &handle_receive_interrupt) { - __entry->slow_path = 1; - __entry->dma_rtail = 0xFF; - } else if (rcd->do_interrupt == - &handle_receive_interrupt_dma_rtail){ - __entry->dma_rtail = 1; - __entry->slow_path = 0; - } else if (rcd->do_interrupt == - &handle_receive_interrupt_nodma_rtail) { - __entry->dma_rtail = 0; - __entry->slow_path = 0; - } + __entry->slow_path = hfi1_is_slowpath(rcd); + __entry->dma_rtail = get_dma_rtail_setting(rcd); ), TP_printk("[%s] ctxt %d SlowPath: %d DmaRtail: %d", __get_str(dev), diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index b49e60e8397d..6b14581b9965 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -78,7 +78,7 @@ static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) if (ret) goto done; - if (uctxt->rcvhdrtail_kvaddr) + if (hfi1_rcvhdrtail_kvaddr(uctxt)) clear_rcvhdrtail(uctxt); rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index a2d1e5331bf1..5ffe4c996ed3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -370,6 +370,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, hr_cq->buf.size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz; hr_cq->buf.page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; spin_lock_init(&hr_cq->lock); + INIT_LIST_HEAD(&hr_cq->sq_list); + INIT_LIST_HEAD(&hr_cq->rq_list); if (udata) { ret = create_user_cq(hr_dev, hr_cq, udata, &resp); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 5617434cbfb4..a7c4ff975c28 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -45,8 +45,6 @@ #define HNS_ROCE_MAX_MSG_LEN 0x80000000 -#define HNS_ROCE_ALIGN_UP(a, b) ((((a) + (b) - 1) / (b)) * (b)) - #define HNS_ROCE_IB_MIN_SQ_STRIDE 6 #define HNS_ROCE_BA_SIZE (32 * 4096) @@ -107,11 +105,6 @@ #define NODE_DESC_SIZE 64 #define DB_REG_OFFSET 0x1000 -#define SERV_TYPE_RC 0 -#define SERV_TYPE_RD 1 -#define SERV_TYPE_UC 2 -#define SERV_TYPE_UD 3 - /* Configure to HW for PAGE_SIZE larger than 4KB */ #define PG_SHIFT_OFFSET (PAGE_SHIFT - 12) @@ -131,6 +124,13 @@ #define EQ_DEPTH_COEFF 2 enum { + SERV_TYPE_RC, + SERV_TYPE_UC, + SERV_TYPE_RD, + SERV_TYPE_UD, +}; + +enum { HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0, HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1, }; @@ -423,7 +423,7 @@ struct hns_roce_mr_table { struct hns_roce_wq { u64 *wrid; /* Work request ID */ spinlock_t lock; - int wqe_cnt; /* WQE num */ + u32 wqe_cnt; /* WQE num */ int max_gs; int offset; int wqe_shift; /* WQE size */ @@ -498,6 +498,10 @@ struct hns_roce_cq { u32 vector; atomic_t refcount; struct completion free; + struct list_head sq_list; /* all qps on this send cq */ + struct list_head rq_list; /* all qps on this recv cq */ + int is_armed; /* cq is armed */ + struct list_head node; /* all armed cqs are on a list */ }; struct hns_roce_idx_que { @@ -647,7 +651,6 @@ struct hns_roce_qp { u8 sdb_en; u32 doorbell_qpn; u32 sq_signal_bits; - u32 sq_next_wqe; struct hns_roce_wq sq; struct ib_umem *umem; @@ -682,6 +685,9 @@ struct hns_roce_qp { u32 next_sge; struct hns_roce_rinl_buf rq_inl_buf; + struct list_head node; /* all qps are on a list */ + struct list_head rq_node; /* all recv qps are on a list */ + struct list_head sq_node; /* all send qps are on a list */ }; struct hns_roce_ib_iboe { @@ -794,10 +800,8 @@ struct hns_roce_caps { int reserved_qps; int num_qpc_timer; int num_cqc_timer; - u32 max_srq_sg; int num_srqs; u32 max_wqes; - u32 max_srqs; u32 max_srq_wrs; u32 max_srq_sges; u32 max_sq_desc_sz; @@ -811,7 +815,6 @@ struct hns_roce_caps { u32 min_wqes; int reserved_cqs; int reserved_srqs; - u32 max_srqwqes; int num_aeq_vectors; int num_comp_vectors; int num_other_vectors; @@ -895,6 +898,12 @@ struct hns_roce_caps { u32 tpq_buf_pg_sz; u32 chunk_sz; /* chunk size in non multihop mode */ u64 flags; + u16 default_ceq_max_cnt; + u16 default_ceq_period; + u16 default_aeq_max_cnt; + u16 default_aeq_period; + u16 default_aeq_arm_st; + u16 default_ceq_arm_st; }; struct hns_roce_work { @@ -911,6 +920,12 @@ struct hns_roce_dfx_hw { int *buffer); }; +enum hns_roce_device_state { + HNS_ROCE_DEVICE_STATE_INITED, + HNS_ROCE_DEVICE_STATE_RST_DOWN, + HNS_ROCE_DEVICE_STATE_UNINIT, +}; + struct hns_roce_hw { int (*reset)(struct hns_roce_dev *hr_dev, bool enable); int (*cmq_init)(struct hns_roce_dev *hr_dev); @@ -993,6 +1008,9 @@ struct hns_roce_dev { bool dis_db; unsigned long reset_cnt; struct hns_roce_ib_iboe iboe; + enum hns_roce_device_state state; + struct list_head qp_list; /* list of all qps on this dev */ + spinlock_t qp_list_lock; /* protect qp_list */ struct list_head pgdir_list; struct mutex pgdir_mutex; @@ -1133,7 +1151,6 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev); int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev); -int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev); int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev); int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev); int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev); @@ -1257,6 +1274,7 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type); void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type); int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); +void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 2a2b2112f886..c6e66586e533 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -74,8 +74,8 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, unsigned long flags = 0; void *wqe = NULL; __le32 doorbell[2]; + u32 wqe_idx = 0; int nreq = 0; - u32 ind = 0; int ret = 0; u8 *smac; int loopback; @@ -88,7 +88,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, } spin_lock_irqsave(&qp->sq.lock, flags); - ind = qp->sq_next_wqe; + for (nreq = 0; wr; ++nreq, wr = wr->next) { if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { ret = -ENOMEM; @@ -96,6 +96,8 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, goto out; } + wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1); + if (unlikely(wr->num_sge > qp->sq.max_gs)) { dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n", wr->num_sge, qp->sq.max_gs); @@ -104,9 +106,8 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, goto out; } - wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); - qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = - wr->wr_id; + wqe = get_send_wqe(qp, wqe_idx); + qp->sq.wrid[wqe_idx] = wr->wr_id; /* Corresponding to the RC and RD type wqe process separately */ if (ibqp->qp_type == IB_QPT_GSI) { @@ -210,7 +211,6 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, cpu_to_le32((wr->sg_list[1].addr) >> 32); ud_sq_wqe->l_key1 = cpu_to_le32(wr->sg_list[1].lkey); - ind++; } else if (ibqp->qp_type == IB_QPT_RC) { u32 tmp_len = 0; @@ -308,7 +308,6 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, ctrl->flag |= cpu_to_le32(wr->num_sge << HNS_ROCE_WQE_SGE_NUM_BIT); } - ind++; } } @@ -336,7 +335,6 @@ out: doorbell[1] = sq_db.u32_8; hns_roce_write64_k(doorbell, qp->sq.db_reg_l); - qp->sq_next_wqe = ind; } spin_unlock_irqrestore(&qp->sq.lock, flags); @@ -348,12 +346,6 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { - int ret = 0; - int nreq = 0; - int ind = 0; - int i = 0; - u32 reg_val; - unsigned long flags = 0; struct hns_roce_rq_wqe_ctrl *ctrl = NULL; struct hns_roce_wqe_data_seg *scat = NULL; struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); @@ -361,9 +353,14 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, struct device *dev = &hr_dev->pdev->dev; struct hns_roce_rq_db rq_db; __le32 doorbell[2] = {0}; + unsigned long flags = 0; + unsigned int wqe_idx; + int ret = 0; + int nreq = 0; + int i = 0; + u32 reg_val; spin_lock_irqsave(&hr_qp->rq.lock, flags); - ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1); for (nreq = 0; wr; ++nreq, wr = wr->next) { if (hns_roce_wq_overflow(&hr_qp->rq, nreq, @@ -373,6 +370,8 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, goto out; } + wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); + if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n", wr->num_sge, hr_qp->rq.max_gs); @@ -381,7 +380,7 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, goto out; } - ctrl = get_recv_wqe(hr_qp, ind); + ctrl = get_recv_wqe(hr_qp, wqe_idx); roce_set_field(ctrl->rwqe_byte_12, RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M, @@ -393,9 +392,7 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, for (i = 0; i < wr->num_sge; i++) set_data_seg(scat + i, wr->sg_list + i); - hr_qp->rq.wrid[ind] = wr->wr_id; - - ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1); + hr_qp->rq.wrid[wqe_idx] = wr->wr_id; } out: @@ -2701,7 +2698,6 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, hr_qp->rq.tail = 0; hr_qp->sq.head = 0; hr_qp->sq.tail = 0; - hr_qp->sq_next_wqe = 0; } kfree(context); @@ -3315,7 +3311,6 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, hr_qp->rq.tail = 0; hr_qp->sq.head = 0; hr_qp->sq.tail = 0; - hr_qp->sq_next_wqe = 0; } out: kfree(context); @@ -3614,14 +3609,18 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) if (ret) return ret; - send_cq = to_hr_cq(hr_qp->ibqp.send_cq); - recv_cq = to_hr_cq(hr_qp->ibqp.recv_cq); + send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL; + recv_cq = hr_qp->ibqp.recv_cq ? to_hr_cq(hr_qp->ibqp.recv_cq) : NULL; hns_roce_lock_cqs(send_cq, recv_cq); if (!udata) { - __hns_roce_v1_cq_clean(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ? - to_hr_srq(hr_qp->ibqp.srq) : NULL); - if (send_cq != recv_cq) + if (recv_cq) + __hns_roce_v1_cq_clean(recv_cq, hr_qp->qpn, + (hr_qp->ibqp.srq ? + to_hr_srq(hr_qp->ibqp.srq) : + NULL)); + + if (send_cq && send_cq != recv_cq) __hns_roce_v1_cq_clean(send_cq, hr_qp->qpn, NULL); } hns_roce_unlock_cqs(send_cq, recv_cq); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index cb8071a3e0d5..12c4cd8e9378 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -63,20 +63,15 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, struct hns_roce_mr *mr = to_hr_mr(wr->mr); /* use ib_access_flags */ - roce_set_bit(rc_sq_wqe->byte_4, - V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S, + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S, wr->access & IB_ACCESS_MW_BIND ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, - V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S, + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S, wr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, - V2_RC_FRMR_WQE_BYTE_4_RR_S, + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_RR_S, wr->access & IB_ACCESS_REMOTE_READ ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, - V2_RC_FRMR_WQE_BYTE_4_RW_S, + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_RW_S, wr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, - V2_RC_FRMR_WQE_BYTE_4_LW_S, + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_LW_S, wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0); /* Data structure reuse may lead to confusion */ @@ -110,7 +105,7 @@ static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg, } static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, - unsigned int *sge_ind) + unsigned int *sge_ind, int valid_num_sge) { struct hns_roce_v2_wqe_data_seg *dseg; struct ib_sge *sg; @@ -123,7 +118,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; - extend_sge_num = wr->num_sge - num_in_wqe; + extend_sge_num = valid_num_sge - num_in_wqe; sg = wr->sg_list + num_in_wqe; shift = qp->hr_buf.page_shift; @@ -159,14 +154,16 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, void *wqe, unsigned int *sge_ind, + int valid_num_sge, const struct ib_send_wr **bad_wr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_v2_wqe_data_seg *dseg = wqe; struct hns_roce_qp *qp = to_hr_qp(ibqp); + int j = 0; int i; - if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) { + if (wr->send_flags & IB_SEND_INLINE && valid_num_sge) { if (le32_to_cpu(rc_sq_wqe->msg_len) > hr_dev->caps.max_sq_inline) { *bad_wr = wr; @@ -190,7 +187,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S, 1); } else { - if (wr->num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) { + if (valid_num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) { for (i = 0; i < wr->num_sge; i++) { if (likely(wr->sg_list[i].length)) { set_data_seg_v2(dseg, wr->sg_list + i); @@ -203,19 +200,21 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, (*sge_ind) & (qp->sge.sge_cnt - 1)); - for (i = 0; i < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) { + for (i = 0; i < wr->num_sge && + j < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) { if (likely(wr->sg_list[i].length)) { set_data_seg_v2(dseg, wr->sg_list + i); dseg++; + j++; } } - set_extend_sge(qp, wr, sge_ind); + set_extend_sge(qp, wr, sge_ind, valid_num_sge); } roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, wr->num_sge); + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); } return 0; @@ -226,6 +225,30 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state); +static int check_send_valid(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + struct ib_qp *ibqp = &hr_qp->ibqp; + struct device *dev = hr_dev->dev; + + if (unlikely(ibqp->qp_type != IB_QPT_RC && + ibqp->qp_type != IB_QPT_GSI && + ibqp->qp_type != IB_QPT_UD)) { + dev_err(dev, "Not supported QP(0x%x)type!\n", ibqp->qp_type); + return -EOPNOTSUPP; + } else if (unlikely(hr_qp->state == IB_QPS_RESET || + hr_qp->state == IB_QPS_INIT || + hr_qp->state == IB_QPS_RTR)) { + dev_err(dev, "Post WQE fail, QP state %d!\n", hr_qp->state); + return -EINVAL; + } else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) { + dev_err(dev, "Post WQE fail, dev state %d!\n", hr_dev->state); + return -EIO; + } + + return 0; +} + static int hns_roce_v2_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) @@ -239,38 +262,31 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct device *dev = hr_dev->dev; struct hns_roce_v2_db sq_db; struct ib_qp_attr attr; - unsigned int sge_ind; unsigned int owner_bit; + unsigned int sge_idx; + unsigned int wqe_idx; unsigned long flags; - unsigned int ind; + int valid_num_sge; void *wqe = NULL; bool loopback; int attr_mask; u32 tmp_len; - int ret = 0; u32 hr_op; u8 *smac; int nreq; + int ret; int i; - if (unlikely(ibqp->qp_type != IB_QPT_RC && - ibqp->qp_type != IB_QPT_GSI && - ibqp->qp_type != IB_QPT_UD)) { - dev_err(dev, "Not supported QP(0x%x)type!\n", ibqp->qp_type); - *bad_wr = wr; - return -EOPNOTSUPP; - } + spin_lock_irqsave(&qp->sq.lock, flags); - if (unlikely(qp->state == IB_QPS_RESET || qp->state == IB_QPS_INIT || - qp->state == IB_QPS_RTR)) { - dev_err(dev, "Post WQE fail, QP state %d err!\n", qp->state); + ret = check_send_valid(hr_dev, qp); + if (ret) { *bad_wr = wr; - return -EINVAL; + nreq = 0; + goto out; } - spin_lock_irqsave(&qp->sq.lock, flags); - ind = qp->sq_next_wqe; - sge_ind = qp->next_sge; + sge_idx = qp->next_sge; for (nreq = 0; wr; ++nreq, wr = wr->next) { if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { @@ -279,6 +295,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, goto out; } + wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1); + if (unlikely(wr->num_sge > qp->sq.max_gs)) { dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n", wr->num_sge, qp->sq.max_gs); @@ -287,14 +305,20 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, goto out; } - wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); - qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = - wr->wr_id; - + wqe = get_send_wqe(qp, wqe_idx); + qp->sq.wrid[wqe_idx] = wr->wr_id; owner_bit = ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1); + valid_num_sge = 0; tmp_len = 0; + for (i = 0; i < wr->num_sge; i++) { + if (likely(wr->sg_list[i].length)) { + tmp_len += wr->sg_list[i].length; + valid_num_sge++; + } + } + /* Corresponding to the QP type, wqe process separately */ if (ibqp->qp_type == IB_QPT_GSI) { ud_sq_wqe = wqe; @@ -330,9 +354,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, V2_UD_SEND_WQE_BYTE_4_OPCODE_S, HNS_ROCE_V2_WQE_OP_SEND); - for (i = 0; i < wr->num_sge; i++) - tmp_len += wr->sg_list[i].length; - ud_sq_wqe->msg_len = cpu_to_le32(le32_to_cpu(ud_sq_wqe->msg_len) + tmp_len); @@ -368,12 +389,12 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M, V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S, - wr->num_sge); + valid_num_sge); roce_set_field(ud_sq_wqe->byte_20, V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, - sge_ind & (qp->sge.sge_cnt - 1)); + sge_idx & (qp->sge.sge_cnt - 1)); roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, @@ -423,13 +444,10 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN_V2); - set_extend_sge(qp, wr, &sge_ind); - ind++; + set_extend_sge(qp, wr, &sge_idx, valid_num_sge); } else if (ibqp->qp_type == IB_QPT_RC) { rc_sq_wqe = wqe; memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe)); - for (i = 0; i < wr->num_sge; i++) - tmp_len += wr->sg_list[i].length; rc_sq_wqe->msg_len = cpu_to_le32(le32_to_cpu(rc_sq_wqe->msg_len) + tmp_len); @@ -550,15 +568,14 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, - wr->num_sge); + valid_num_sge); } else if (wr->opcode != IB_WR_REG_MR) { ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, - wqe, &sge_ind, bad_wr); + wqe, &sge_idx, + valid_num_sge, bad_wr); if (ret) goto out; } - - ind++; } else { dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type); spin_unlock_irqrestore(&qp->sq.lock, flags); @@ -588,8 +605,7 @@ out: hns_roce_write64(hr_dev, (__le32 *)&sq_db, qp->sq.db_reg_l); - qp->sq_next_wqe = ind; - qp->next_sge = sge_ind; + qp->next_sge = sge_idx; if (qp->state == IB_QPS_ERR) { attr_mask = IB_QP_STATE; @@ -610,6 +626,17 @@ out: return ret; } +static int check_recv_valid(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) + return -EIO; + else if (hr_qp->state == IB_QPS_RESET) + return -EINVAL; + + return 0; +} + static int hns_roce_v2_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) @@ -623,18 +650,18 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, unsigned long flags; void *wqe = NULL; int attr_mask; - int ret = 0; + u32 wqe_idx; int nreq; - int ind; + int ret; int i; spin_lock_irqsave(&hr_qp->rq.lock, flags); - ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1); - if (hr_qp->state == IB_QPS_RESET) { - spin_unlock_irqrestore(&hr_qp->rq.lock, flags); + ret = check_recv_valid(hr_dev, hr_qp); + if (ret) { *bad_wr = wr; - return -EINVAL; + nreq = 0; + goto out; } for (nreq = 0; wr; ++nreq, wr = wr->next) { @@ -645,6 +672,8 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, goto out; } + wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); + if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n", wr->num_sge, hr_qp->rq.max_gs); @@ -653,7 +682,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, goto out; } - wqe = get_recv_wqe(hr_qp, ind); + wqe = get_recv_wqe(hr_qp, wqe_idx); dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; for (i = 0; i < wr->num_sge; i++) { if (!wr->sg_list[i].length) @@ -669,8 +698,8 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, /* rq support inline data */ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) { - sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list; - hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt = + sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list; + hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt = (u32)wr->num_sge; for (i = 0; i < wr->num_sge; i++) { sge_list[i].addr = @@ -679,9 +708,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, } } - hr_qp->rq.wrid[ind] = wr->wr_id; - - ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1); + hr_qp->rq.wrid[wqe_idx] = wr->wr_id; } out: @@ -1254,7 +1281,6 @@ static void hns_roce_function_clear(struct hns_roce_dev *hr_dev) } out: - dev_err(hr_dev->dev, "Func clear fail.\n"); hns_roce_func_clr_rst_prc(hr_dev, ret, fclr_write_fail_flag); } @@ -1378,8 +1404,7 @@ static int hns_roce_query_pf_timer_resource(struct hns_roce_dev *hr_dev) return 0; } -static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev, - int vf_id) +static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev, int vf_id) { struct hns_roce_cmq_desc desc; struct hns_roce_vf_switch *swt; @@ -1388,13 +1413,12 @@ static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev, swt = (struct hns_roce_vf_switch *)desc.data; hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true); swt->rocee_sel |= cpu_to_le32(HNS_ICL_SWITCH_CMD_ROCEE_SEL); - roce_set_field(swt->fun_id, - VF_SWITCH_DATA_FUN_ID_VF_ID_M, - VF_SWITCH_DATA_FUN_ID_VF_ID_S, - vf_id); + roce_set_field(swt->fun_id, VF_SWITCH_DATA_FUN_ID_VF_ID_M, + VF_SWITCH_DATA_FUN_ID_VF_ID_S, vf_id); ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) return ret; + desc.flag = cpu_to_le16(HNS_ROCE_CMD_FLAG_NO_INTR | HNS_ROCE_CMD_FLAG_IN); desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR); @@ -1574,69 +1598,9 @@ static int hns_roce_v2_set_bt(struct hns_roce_dev *hr_dev) return hns_roce_cmq_send(hr_dev, &desc, 1); } -static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) +static void set_default_caps(struct hns_roce_dev *hr_dev) { struct hns_roce_caps *caps = &hr_dev->caps; - int ret; - - ret = hns_roce_cmq_query_hw_info(hr_dev); - if (ret) { - dev_err(hr_dev->dev, "Query hardware version fail, ret = %d.\n", - ret); - return ret; - } - - ret = hns_roce_query_fw_ver(hr_dev); - if (ret) { - dev_err(hr_dev->dev, "Query firmware version fail, ret = %d.\n", - ret); - return ret; - } - - ret = hns_roce_config_global_param(hr_dev); - if (ret) { - dev_err(hr_dev->dev, "Configure global param fail, ret = %d.\n", - ret); - return ret; - } - - /* Get pf resource owned by every pf */ - ret = hns_roce_query_pf_resource(hr_dev); - if (ret) { - dev_err(hr_dev->dev, "Query pf resource fail, ret = %d.\n", - ret); - return ret; - } - - if (hr_dev->pci_dev->revision == 0x21) { - ret = hns_roce_query_pf_timer_resource(hr_dev); - if (ret) { - dev_err(hr_dev->dev, - "Query pf timer resource fail, ret = %d.\n", - ret); - return ret; - } - } - - ret = hns_roce_alloc_vf_resource(hr_dev); - if (ret) { - dev_err(hr_dev->dev, "Allocate vf resource fail, ret = %d.\n", - ret); - return ret; - } - - if (hr_dev->pci_dev->revision == 0x21) { - ret = hns_roce_set_vf_switch_param(hr_dev, 0); - if (ret) { - dev_err(hr_dev->dev, - "Set function switch param fail, ret = %d.\n", - ret); - return ret; - } - } - - hr_dev->vendor_part_id = hr_dev->pci_dev->device; - hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid); caps->num_qps = HNS_ROCE_V2_MAX_QP_NUM; caps->max_wqes = HNS_ROCE_V2_MAX_WQE_NUM; @@ -1644,17 +1608,15 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->num_srqs = HNS_ROCE_V2_MAX_SRQ_NUM; caps->min_cqes = HNS_ROCE_MIN_CQE_NUM; caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM; - caps->max_srqwqes = HNS_ROCE_V2_MAX_SRQWQE_NUM; caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM; caps->max_extend_sg = HNS_ROCE_V2_MAX_EXTEND_SGE_NUM; caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM; caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE; - caps->max_srq_sg = HNS_ROCE_V2_MAX_SRQ_SGE_NUM; caps->num_uars = HNS_ROCE_V2_UAR_NUM; caps->phy_num_uars = HNS_ROCE_V2_PHY_UAR_NUM; caps->num_aeq_vectors = HNS_ROCE_V2_AEQE_VEC_NUM; caps->num_comp_vectors = HNS_ROCE_V2_COMP_VEC_NUM; - caps->num_other_vectors = HNS_ROCE_V2_ABNORMAL_VEC_NUM; + caps->num_other_vectors = HNS_ROCE_V2_ABNORMAL_VEC_NUM; caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM; caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS; caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS; @@ -1668,12 +1630,12 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->max_srq_desc_sz = HNS_ROCE_V2_MAX_SRQ_DESC_SZ; caps->qpc_entry_sz = HNS_ROCE_V2_QPC_ENTRY_SZ; caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ; - caps->trrl_entry_sz = HNS_ROCE_V2_TRRL_ENTRY_SZ; + caps->trrl_entry_sz = HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ; caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ; caps->srqc_entry_sz = HNS_ROCE_V2_SRQC_ENTRY_SZ; caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ; caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ; - caps->idx_entry_sz = 4; + caps->idx_entry_sz = HNS_ROCE_V2_IDX_ENTRY_SZ; caps->cq_entry_sz = HNS_ROCE_V2_CQE_ENTRY_SIZE; caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED; caps->reserved_lkey = 0; @@ -1696,16 +1658,13 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->mpt_ba_pg_sz = 0; caps->mpt_buf_pg_sz = 0; caps->mpt_hop_num = HNS_ROCE_CONTEXT_HOP_NUM; - caps->pbl_ba_pg_sz = 2; - caps->pbl_buf_pg_sz = 0; - caps->pbl_hop_num = HNS_ROCE_PBL_HOP_NUM; caps->mtt_ba_pg_sz = 0; caps->mtt_buf_pg_sz = 0; caps->mtt_hop_num = HNS_ROCE_MTT_HOP_NUM; - caps->wqe_sq_hop_num = 2; - caps->wqe_sge_hop_num = 1; - caps->wqe_rq_hop_num = 2; - caps->cqe_ba_pg_sz = 6; + caps->wqe_sq_hop_num = HNS_ROCE_SQWQE_HOP_NUM; + caps->wqe_sge_hop_num = HNS_ROCE_EXT_SGE_HOP_NUM; + caps->wqe_rq_hop_num = HNS_ROCE_RQWQE_HOP_NUM; + caps->cqe_ba_pg_sz = HNS_ROCE_BA_PG_SZ_SUPPORTED_256K; caps->cqe_buf_pg_sz = 0; caps->cqe_hop_num = HNS_ROCE_CQE_HOP_NUM; caps->srqwqe_ba_pg_sz = 0; @@ -1714,10 +1673,6 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->idx_ba_pg_sz = 0; caps->idx_buf_pg_sz = 0; caps->idx_hop_num = HNS_ROCE_IDX_HOP_NUM; - caps->eqe_ba_pg_sz = 0; - caps->eqe_buf_pg_sz = 0; - caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM; - caps->tsq_buf_pg_sz = 0; caps->chunk_sz = HNS_ROCE_V2_TABLE_CHUNK_SIZE; caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR | @@ -1726,24 +1681,19 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) HNS_ROCE_CAP_FLAG_RECORD_DB | HNS_ROCE_CAP_FLAG_SQ_RECORD_DB; - if (hr_dev->pci_dev->revision == 0x21) - caps->flags |= HNS_ROCE_CAP_FLAG_MW | - HNS_ROCE_CAP_FLAG_FRMR; - caps->pkey_table_len[0] = 1; - caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM; + caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM; caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM; caps->aeqe_depth = HNS_ROCE_V2_ASYNC_EQE_NUM; caps->local_ca_ack_delay = 0; caps->max_mtu = IB_MTU_4096; - caps->max_srqs = HNS_ROCE_V2_MAX_SRQ; caps->max_srq_wrs = HNS_ROCE_V2_MAX_SRQ_WR; caps->max_srq_sges = HNS_ROCE_V2_MAX_SRQ_SGE; - if (hr_dev->pci_dev->revision == 0x21) { - caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | - HNS_ROCE_CAP_FLAG_SRQ | + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_B) { + caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW | + HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR | HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL; caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM; @@ -1757,12 +1707,345 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->cqc_timer_buf_pg_sz = 0; caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ; - caps->sccc_ba_pg_sz = 0; - caps->sccc_buf_pg_sz = 0; - caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM; + caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ; + caps->sccc_ba_pg_sz = 0; + caps->sccc_buf_pg_sz = 0; + caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM; + } +} + +static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num, + int *buf_page_size, int *bt_page_size, u32 hem_type) +{ + u64 obj_per_chunk; + int bt_chunk_size = 1 << PAGE_SHIFT; + int buf_chunk_size = 1 << PAGE_SHIFT; + int obj_per_chunk_default = buf_chunk_size / obj_size; + + *buf_page_size = 0; + *bt_page_size = 0; + + switch (hop_num) { + case 3: + obj_per_chunk = ctx_bt_num * (bt_chunk_size / BA_BYTE_LEN) * + (bt_chunk_size / BA_BYTE_LEN) * + (bt_chunk_size / BA_BYTE_LEN) * + obj_per_chunk_default; + break; + case 2: + obj_per_chunk = ctx_bt_num * (bt_chunk_size / BA_BYTE_LEN) * + (bt_chunk_size / BA_BYTE_LEN) * + obj_per_chunk_default; + break; + case 1: + obj_per_chunk = ctx_bt_num * (bt_chunk_size / BA_BYTE_LEN) * + obj_per_chunk_default; + break; + case HNS_ROCE_HOP_NUM_0: + obj_per_chunk = ctx_bt_num * obj_per_chunk_default; + break; + default: + pr_err("Table %d not support hop_num = %d!\n", hem_type, + hop_num); + return; + } + + if (hem_type >= HEM_TYPE_MTT) + *bt_page_size = ilog2(DIV_ROUND_UP(obj_num, obj_per_chunk)); + else + *buf_page_size = ilog2(DIV_ROUND_UP(obj_num, obj_per_chunk)); +} + +static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_cmq_desc desc[HNS_ROCE_QUERY_PF_CAPS_CMD_NUM]; + struct hns_roce_caps *caps = &hr_dev->caps; + struct hns_roce_query_pf_caps_a *resp_a; + struct hns_roce_query_pf_caps_b *resp_b; + struct hns_roce_query_pf_caps_c *resp_c; + struct hns_roce_query_pf_caps_d *resp_d; + struct hns_roce_query_pf_caps_e *resp_e; + int ctx_hop_num; + int pbl_hop_num; + int ret; + int i; + + for (i = 0; i < HNS_ROCE_QUERY_PF_CAPS_CMD_NUM; i++) { + hns_roce_cmq_setup_basic_desc(&desc[i], + HNS_ROCE_OPC_QUERY_PF_CAPS_NUM, + true); + if (i < (HNS_ROCE_QUERY_PF_CAPS_CMD_NUM - 1)) + desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + else + desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + } + + ret = hns_roce_cmq_send(hr_dev, desc, HNS_ROCE_QUERY_PF_CAPS_CMD_NUM); + if (ret) + return ret; + + resp_a = (struct hns_roce_query_pf_caps_a *)desc[0].data; + resp_b = (struct hns_roce_query_pf_caps_b *)desc[1].data; + resp_c = (struct hns_roce_query_pf_caps_c *)desc[2].data; + resp_d = (struct hns_roce_query_pf_caps_d *)desc[3].data; + resp_e = (struct hns_roce_query_pf_caps_e *)desc[4].data; + + caps->local_ca_ack_delay = resp_a->local_ca_ack_delay; + caps->max_sq_sg = le16_to_cpu(resp_a->max_sq_sg); + caps->max_sq_inline = le16_to_cpu(resp_a->max_sq_inline); + caps->max_rq_sg = le16_to_cpu(resp_a->max_rq_sg); + caps->max_extend_sg = le32_to_cpu(resp_a->max_extend_sg); + caps->num_qpc_timer = le16_to_cpu(resp_a->num_qpc_timer); + caps->num_cqc_timer = le16_to_cpu(resp_a->num_cqc_timer); + caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges); + caps->num_aeq_vectors = resp_a->num_aeq_vectors; + caps->num_other_vectors = resp_a->num_other_vectors; + caps->max_sq_desc_sz = resp_a->max_sq_desc_sz; + caps->max_rq_desc_sz = resp_a->max_rq_desc_sz; + caps->max_srq_desc_sz = resp_a->max_srq_desc_sz; + caps->cq_entry_sz = resp_a->cq_entry_sz; + + caps->mtpt_entry_sz = resp_b->mtpt_entry_sz; + caps->irrl_entry_sz = resp_b->irrl_entry_sz; + caps->trrl_entry_sz = resp_b->trrl_entry_sz; + caps->cqc_entry_sz = resp_b->cqc_entry_sz; + caps->srqc_entry_sz = resp_b->srqc_entry_sz; + caps->idx_entry_sz = resp_b->idx_entry_sz; + caps->sccc_entry_sz = resp_b->scc_ctx_entry_sz; + caps->max_mtu = resp_b->max_mtu; + caps->qpc_entry_sz = le16_to_cpu(resp_b->qpc_entry_sz); + caps->min_cqes = resp_b->min_cqes; + caps->min_wqes = resp_b->min_wqes; + caps->page_size_cap = le32_to_cpu(resp_b->page_size_cap); + caps->pkey_table_len[0] = resp_b->pkey_table_len; + caps->phy_num_uars = resp_b->phy_num_uars; + ctx_hop_num = resp_b->ctx_hop_num; + pbl_hop_num = resp_b->pbl_hop_num; + + caps->num_pds = 1 << roce_get_field(resp_c->cap_flags_num_pds, + V2_QUERY_PF_CAPS_C_NUM_PDS_M, + V2_QUERY_PF_CAPS_C_NUM_PDS_S); + caps->flags = roce_get_field(resp_c->cap_flags_num_pds, + V2_QUERY_PF_CAPS_C_CAP_FLAGS_M, + V2_QUERY_PF_CAPS_C_CAP_FLAGS_S); + caps->num_cqs = 1 << roce_get_field(resp_c->max_gid_num_cqs, + V2_QUERY_PF_CAPS_C_NUM_CQS_M, + V2_QUERY_PF_CAPS_C_NUM_CQS_S); + caps->gid_table_len[0] = roce_get_field(resp_c->max_gid_num_cqs, + V2_QUERY_PF_CAPS_C_MAX_GID_M, + V2_QUERY_PF_CAPS_C_MAX_GID_S); + caps->max_cqes = 1 << roce_get_field(resp_c->cq_depth, + V2_QUERY_PF_CAPS_C_CQ_DEPTH_M, + V2_QUERY_PF_CAPS_C_CQ_DEPTH_S); + caps->num_mtpts = 1 << roce_get_field(resp_c->num_mrws, + V2_QUERY_PF_CAPS_C_NUM_MRWS_M, + V2_QUERY_PF_CAPS_C_NUM_MRWS_S); + caps->num_qps = 1 << roce_get_field(resp_c->ord_num_qps, + V2_QUERY_PF_CAPS_C_NUM_QPS_M, + V2_QUERY_PF_CAPS_C_NUM_QPS_S); + caps->max_qp_init_rdma = roce_get_field(resp_c->ord_num_qps, + V2_QUERY_PF_CAPS_C_MAX_ORD_M, + V2_QUERY_PF_CAPS_C_MAX_ORD_S); + caps->max_qp_dest_rdma = caps->max_qp_init_rdma; + caps->max_wqes = 1 << le16_to_cpu(resp_c->sq_depth); + caps->num_srqs = 1 << roce_get_field(resp_d->wq_hop_num_max_srqs, + V2_QUERY_PF_CAPS_D_NUM_SRQS_M, + V2_QUERY_PF_CAPS_D_NUM_SRQS_S); + caps->max_srq_wrs = 1 << le16_to_cpu(resp_d->srq_depth); + caps->ceqe_depth = 1 << roce_get_field(resp_d->num_ceqs_ceq_depth, + V2_QUERY_PF_CAPS_D_CEQ_DEPTH_M, + V2_QUERY_PF_CAPS_D_CEQ_DEPTH_S); + caps->num_comp_vectors = roce_get_field(resp_d->num_ceqs_ceq_depth, + V2_QUERY_PF_CAPS_D_NUM_CEQS_M, + V2_QUERY_PF_CAPS_D_NUM_CEQS_S); + caps->aeqe_depth = 1 << roce_get_field(resp_d->arm_st_aeq_depth, + V2_QUERY_PF_CAPS_D_AEQ_DEPTH_M, + V2_QUERY_PF_CAPS_D_AEQ_DEPTH_S); + caps->default_aeq_arm_st = roce_get_field(resp_d->arm_st_aeq_depth, + V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_M, + V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_S); + caps->default_ceq_arm_st = roce_get_field(resp_d->arm_st_aeq_depth, + V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_M, + V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_S); + caps->reserved_pds = roce_get_field(resp_d->num_uars_rsv_pds, + V2_QUERY_PF_CAPS_D_RSV_PDS_M, + V2_QUERY_PF_CAPS_D_RSV_PDS_S); + caps->num_uars = 1 << roce_get_field(resp_d->num_uars_rsv_pds, + V2_QUERY_PF_CAPS_D_NUM_UARS_M, + V2_QUERY_PF_CAPS_D_NUM_UARS_S); + caps->reserved_qps = roce_get_field(resp_d->rsv_uars_rsv_qps, + V2_QUERY_PF_CAPS_D_RSV_QPS_M, + V2_QUERY_PF_CAPS_D_RSV_QPS_S); + caps->reserved_uars = roce_get_field(resp_d->rsv_uars_rsv_qps, + V2_QUERY_PF_CAPS_D_RSV_UARS_M, + V2_QUERY_PF_CAPS_D_RSV_UARS_S); + caps->reserved_mrws = roce_get_field(resp_e->chunk_size_shift_rsv_mrws, + V2_QUERY_PF_CAPS_E_RSV_MRWS_M, + V2_QUERY_PF_CAPS_E_RSV_MRWS_S); + caps->chunk_sz = 1 << roce_get_field(resp_e->chunk_size_shift_rsv_mrws, + V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_M, + V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_S); + caps->reserved_cqs = roce_get_field(resp_e->rsv_cqs, + V2_QUERY_PF_CAPS_E_RSV_CQS_M, + V2_QUERY_PF_CAPS_E_RSV_CQS_S); + caps->reserved_srqs = roce_get_field(resp_e->rsv_srqs, + V2_QUERY_PF_CAPS_E_RSV_SRQS_M, + V2_QUERY_PF_CAPS_E_RSV_SRQS_S); + caps->reserved_lkey = roce_get_field(resp_e->rsv_lkey, + V2_QUERY_PF_CAPS_E_RSV_LKEYS_M, + V2_QUERY_PF_CAPS_E_RSV_LKEYS_S); + caps->default_ceq_max_cnt = le16_to_cpu(resp_e->ceq_max_cnt); + caps->default_ceq_period = le16_to_cpu(resp_e->ceq_period); + caps->default_aeq_max_cnt = le16_to_cpu(resp_e->aeq_max_cnt); + caps->default_aeq_period = le16_to_cpu(resp_e->aeq_period); + + caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ; + caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ; + caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ; + caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS; + caps->mtt_ba_pg_sz = 0; + caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS; + caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS; + caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS; + + caps->qpc_hop_num = ctx_hop_num; + caps->srqc_hop_num = ctx_hop_num; + caps->cqc_hop_num = ctx_hop_num; + caps->mpt_hop_num = ctx_hop_num; + caps->mtt_hop_num = pbl_hop_num; + caps->cqe_hop_num = pbl_hop_num; + caps->srqwqe_hop_num = pbl_hop_num; + caps->idx_hop_num = pbl_hop_num; + caps->wqe_sq_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs, + V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_M, + V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_S); + caps->wqe_sge_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs, + V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_M, + V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_S); + caps->wqe_rq_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs, + V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M, + V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S); + + calc_pg_sz(caps->num_qps, caps->qpc_entry_sz, caps->qpc_hop_num, + caps->qpc_bt_num, &caps->qpc_buf_pg_sz, &caps->qpc_ba_pg_sz, + HEM_TYPE_QPC); + calc_pg_sz(caps->num_mtpts, caps->mtpt_entry_sz, caps->mpt_hop_num, + caps->mpt_bt_num, &caps->mpt_buf_pg_sz, &caps->mpt_ba_pg_sz, + HEM_TYPE_MTPT); + calc_pg_sz(caps->num_cqs, caps->cqc_entry_sz, caps->cqc_hop_num, + caps->cqc_bt_num, &caps->cqc_buf_pg_sz, &caps->cqc_ba_pg_sz, + HEM_TYPE_CQC); + calc_pg_sz(caps->num_srqs, caps->srqc_entry_sz, caps->srqc_hop_num, + caps->srqc_bt_num, &caps->srqc_buf_pg_sz, + &caps->srqc_ba_pg_sz, HEM_TYPE_SRQC); + + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_B) { + caps->sccc_hop_num = ctx_hop_num; + caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; + caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; + + calc_pg_sz(caps->num_qps, caps->sccc_entry_sz, + caps->sccc_hop_num, caps->sccc_bt_num, + &caps->sccc_buf_pg_sz, &caps->sccc_ba_pg_sz, + HEM_TYPE_SCCC); + calc_pg_sz(caps->num_cqc_timer, caps->cqc_timer_entry_sz, + caps->cqc_timer_hop_num, caps->cqc_timer_bt_num, + &caps->cqc_timer_buf_pg_sz, + &caps->cqc_timer_ba_pg_sz, HEM_TYPE_CQC_TIMER); + } + + calc_pg_sz(caps->num_cqe_segs, caps->mtt_entry_sz, caps->cqe_hop_num, + 1, &caps->cqe_buf_pg_sz, &caps->cqe_ba_pg_sz, HEM_TYPE_CQE); + calc_pg_sz(caps->num_srqwqe_segs, caps->mtt_entry_sz, + caps->srqwqe_hop_num, 1, &caps->srqwqe_buf_pg_sz, + &caps->srqwqe_ba_pg_sz, HEM_TYPE_SRQWQE); + calc_pg_sz(caps->num_idx_segs, caps->idx_entry_sz, caps->idx_hop_num, + 1, &caps->idx_buf_pg_sz, &caps->idx_ba_pg_sz, HEM_TYPE_IDX); + + return 0; +} + +static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_caps *caps = &hr_dev->caps; + int ret; + + ret = hns_roce_cmq_query_hw_info(hr_dev); + if (ret) { + dev_err(hr_dev->dev, "Query hardware version fail, ret = %d.\n", + ret); + return ret; + } + + ret = hns_roce_query_fw_ver(hr_dev); + if (ret) { + dev_err(hr_dev->dev, "Query firmware version fail, ret = %d.\n", + ret); + return ret; + } + + ret = hns_roce_config_global_param(hr_dev); + if (ret) { + dev_err(hr_dev->dev, "Configure global param fail, ret = %d.\n", + ret); + return ret; + } + + /* Get pf resource owned by every pf */ + ret = hns_roce_query_pf_resource(hr_dev); + if (ret) { + dev_err(hr_dev->dev, "Query pf resource fail, ret = %d.\n", + ret); + return ret; + } + + if (hr_dev->pci_dev->revision == 0x21) { + ret = hns_roce_query_pf_timer_resource(hr_dev); + if (ret) { + dev_err(hr_dev->dev, + "Query pf timer resource fail, ret = %d.\n", + ret); + return ret; + } + } + + ret = hns_roce_alloc_vf_resource(hr_dev); + if (ret) { + dev_err(hr_dev->dev, "Allocate vf resource fail, ret = %d.\n", + ret); + return ret; + } + + if (hr_dev->pci_dev->revision == 0x21) { + ret = hns_roce_set_vf_switch_param(hr_dev, 0); + if (ret) { + dev_err(hr_dev->dev, + "Set function switch param fail, ret = %d.\n", + ret); + return ret; + } } + hr_dev->vendor_part_id = hr_dev->pci_dev->device; + hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid); + + caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS; + caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS; + caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS; + caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS; + + caps->pbl_ba_pg_sz = HNS_ROCE_BA_PG_SZ_SUPPORTED_16K; + caps->pbl_buf_pg_sz = 0; + caps->pbl_hop_num = HNS_ROCE_PBL_HOP_NUM; + caps->eqe_ba_pg_sz = 0; + caps->eqe_buf_pg_sz = 0; + caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM; + caps->tsq_buf_pg_sz = 0; + + ret = hns_roce_query_pf_caps(hr_dev); + if (ret) + set_default_caps(hr_dev); + ret = hns_roce_v2_set_bt(hr_dev); if (ret) dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n", @@ -1818,37 +2101,32 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev, req_a->base_addr_h = cpu_to_le32(link_tbl->table.map >> 32); roce_set_field(req_a->depth_pgsz_init_en, - CFG_LLM_QUE_DEPTH_M, - CFG_LLM_QUE_DEPTH_S, + CFG_LLM_QUE_DEPTH_M, CFG_LLM_QUE_DEPTH_S, link_tbl->npages); roce_set_field(req_a->depth_pgsz_init_en, - CFG_LLM_QUE_PGSZ_M, - CFG_LLM_QUE_PGSZ_S, + CFG_LLM_QUE_PGSZ_M, CFG_LLM_QUE_PGSZ_S, link_tbl->pg_sz); req_a->head_ba_l = cpu_to_le32(entry[0].blk_ba0); req_a->head_ba_h_nxtptr = cpu_to_le32(entry[0].blk_ba1_nxt_ptr); - roce_set_field(req_a->head_ptr, - CFG_LLM_HEAD_PTR_M, + roce_set_field(req_a->head_ptr, CFG_LLM_HEAD_PTR_M, CFG_LLM_HEAD_PTR_S, 0); } else { req_b->tail_ba_l = cpu_to_le32(entry[page_num - 1].blk_ba0); - roce_set_field(req_b->tail_ba_h, - CFG_LLM_TAIL_BA_H_M, + roce_set_field(req_b->tail_ba_h, CFG_LLM_TAIL_BA_H_M, CFG_LLM_TAIL_BA_H_S, entry[page_num - 1].blk_ba1_nxt_ptr & - HNS_ROCE_LINK_TABLE_BA1_M); - roce_set_field(req_b->tail_ptr, - CFG_LLM_TAIL_PTR_M, + HNS_ROCE_LINK_TABLE_BA1_M); + roce_set_field(req_b->tail_ptr, CFG_LLM_TAIL_PTR_M, CFG_LLM_TAIL_PTR_S, (entry[page_num - 2].blk_ba1_nxt_ptr & - HNS_ROCE_LINK_TABLE_NXT_PTR_M) >> - HNS_ROCE_LINK_TABLE_NXT_PTR_S); + HNS_ROCE_LINK_TABLE_NXT_PTR_M) >> + HNS_ROCE_LINK_TABLE_NXT_PTR_S); } } - roce_set_field(req_a->depth_pgsz_init_en, - CFG_LLM_INIT_EN_M, CFG_LLM_INIT_EN_S, 1); + roce_set_field(req_a->depth_pgsz_init_en, CFG_LLM_INIT_EN_M, + CFG_LLM_INIT_EN_S, 1); return hns_roce_cmq_send(hr_dev, desc, 2); } @@ -2141,11 +2419,9 @@ static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev, hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false); - roce_set_field(sgid_tb->table_idx_rsv, - CFG_SGID_TB_TABLE_IDX_M, + roce_set_field(sgid_tb->table_idx_rsv, CFG_SGID_TB_TABLE_IDX_M, CFG_SGID_TB_TABLE_IDX_S, gid_index); - roce_set_field(sgid_tb->vf_sgid_type_rsv, - CFG_SGID_TB_VF_SGID_TYPE_M, + roce_set_field(sgid_tb->vf_sgid_type_rsv, CFG_SGID_TB_VF_SGID_TYPE_M, CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type); p = (u32 *)&gid->raw[0]; @@ -2416,11 +2692,10 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw) V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE); roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, V2_MPT_BYTE_4_PD_S, mw->pdn); - roce_set_field(mpt_entry->byte_4_pd_hop_st, - V2_MPT_BYTE_4_PBL_HOP_NUM_M, + roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M, V2_MPT_BYTE_4_PBL_HOP_NUM_S, - mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ? - 0 : mw->pbl_hop_num); + mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : + mw->pbl_hop_num); roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, @@ -2561,8 +2836,7 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_ARM_ST_M, V2_CQC_BYTE_4_ARM_ST_S, REG_NXT_CEQE); roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_SHIFT_M, - V2_CQC_BYTE_4_SHIFT_S, - ilog2(hr_cq->cq_depth)); + V2_CQC_BYTE_4_SHIFT_S, ilog2(hr_cq->cq_depth)); roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_CEQN_M, V2_CQC_BYTE_4_CEQN_S, hr_cq->vector); @@ -2687,6 +2961,55 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, return 0; } +static int sw_comp(struct hns_roce_qp *hr_qp, struct hns_roce_wq *wq, + int num_entries, struct ib_wc *wc) +{ + unsigned int left; + int npolled = 0; + + left = wq->head - wq->tail; + if (left == 0) + return 0; + + left = min_t(unsigned int, (unsigned int)num_entries, left); + while (npolled < left) { + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + wc->status = IB_WC_WR_FLUSH_ERR; + wc->vendor_err = 0; + wc->qp = &hr_qp->ibqp; + + wq->tail++; + wc++; + npolled++; + } + + return npolled; +} + +static int hns_roce_v2_sw_poll_cq(struct hns_roce_cq *hr_cq, int num_entries, + struct ib_wc *wc) +{ + struct hns_roce_qp *hr_qp; + int npolled = 0; + + list_for_each_entry(hr_qp, &hr_cq->sq_list, sq_node) { + npolled += sw_comp(hr_qp, &hr_qp->sq, + num_entries - npolled, wc + npolled); + if (npolled >= num_entries) + goto out; + } + + list_for_each_entry(hr_qp, &hr_cq->rq_list, rq_node) { + npolled += sw_comp(hr_qp, &hr_qp->rq, + num_entries - npolled, wc + npolled); + if (npolled >= num_entries) + goto out; + } + +out: + return npolled; +} + static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, struct hns_roce_qp **cur_qp, struct ib_wc *wc) { @@ -2967,6 +3290,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) { + struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); struct hns_roce_qp *cur_qp = NULL; unsigned long flags; @@ -2974,6 +3298,18 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries, spin_lock_irqsave(&hr_cq->lock, flags); + /* + * When the device starts to reset, the state is RST_DOWN. At this time, + * there may still be some valid CQEs in the hardware that are not + * polled. Therefore, it is not allowed to switch to the software mode + * immediately. When the state changes to UNINIT, CQE no longer exists + * in the hardware, and then switch to software mode. + */ + if (hr_dev->state == HNS_ROCE_DEVICE_STATE_UNINIT) { + npolled = hns_roce_v2_sw_poll_cq(hr_cq, num_entries, wc); + goto out; + } + for (npolled = 0; npolled < num_entries; ++npolled) { if (hns_roce_v2_poll_one(hr_cq, &cur_qp, wc + npolled)) break; @@ -2985,6 +3321,7 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries, hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index); } +out: spin_unlock_irqrestore(&hr_cq->lock, flags); return npolled; @@ -3159,8 +3496,6 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev, } static int hns_roce_v2_qp_modify(struct hns_roce_dev *hr_dev, - enum ib_qp_state cur_state, - enum ib_qp_state new_state, struct hns_roce_v2_qp_context *context, struct hns_roce_qp *hr_qp) { @@ -3210,6 +3545,9 @@ static void set_access_flags(struct hns_roce_qp *hr_qp, roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0); + roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S, + !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); + roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S, 0); } static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp, @@ -3577,6 +3915,12 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, IB_ACCESS_REMOTE_ATOMIC)); roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0); + roce_set_bit(context->byte_76_srqn_op_en, + V2_QPC_BYTE_76_EXT_ATE_S, + !!(attr->qp_access_flags & + IB_ACCESS_REMOTE_ATOMIC)); + roce_set_bit(qpc_mask->byte_76_srqn_op_en, + V2_QPC_BYTE_76_EXT_ATE_S, 0); } else { roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, !!(hr_qp->access_flags & IB_ACCESS_REMOTE_READ)); @@ -3592,6 +3936,11 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC)); roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0); + roce_set_bit(context->byte_76_srqn_op_en, + V2_QPC_BYTE_76_EXT_ATE_S, + !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC)); + roce_set_bit(qpc_mask->byte_76_srqn_op_en, + V2_QPC_BYTE_76_EXT_ATE_S, 0); } roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M, @@ -3838,13 +4187,11 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, /* Configure GID index */ port_num = rdma_ah_get_port_num(&attr->ah_attr); roce_set_field(context->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_SGID_IDX_M, - V2_QPC_BYTE_20_SGID_IDX_S, + V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, hns_get_gid_index(hr_dev, port_num - 1, grh->sgid_index)); roce_set_field(qpc_mask->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_SGID_IDX_M, - V2_QPC_BYTE_20_SGID_IDX_S, 0); + V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, 0); memcpy(&(context->dmac), dmac, sizeof(u32)); roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M, V2_QPC_BYTE_52_DMAC_S, *((u16 *)(&dmac[4]))); @@ -4234,8 +4581,7 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, roce_set_field(context->byte_212_lsn, V2_QPC_BYTE_212_RETRY_CNT_M, - V2_QPC_BYTE_212_RETRY_CNT_S, - attr->retry_cnt); + V2_QPC_BYTE_212_RETRY_CNT_S, attr->retry_cnt); roce_set_field(qpc_mask->byte_212_lsn, V2_QPC_BYTE_212_RETRY_CNT_M, V2_QPC_BYTE_212_RETRY_CNT_S, 0); @@ -4443,7 +4789,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, V2_QPC_BYTE_60_QP_ST_S, 0); /* SW pass context to HW */ - ret = hns_roce_v2_qp_modify(hr_dev, cur_state, new_state, ctx, hr_qp); + ret = hns_roce_v2_qp_modify(hr_dev, ctx, hr_qp); if (ret) { dev_err(dev, "hns_roce_qp_modify failed(%d)\n", ret); goto out; @@ -4464,7 +4810,6 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, hr_qp->rq.tail = 0; hr_qp->sq.head = 0; hr_qp->sq.tail = 0; - hr_qp->sq_next_wqe = 0; hr_qp->next_sge = 0; if (hr_qp->rq.wqe_cnt) *hr_qp->rdb.db_record = 0; @@ -4649,6 +4994,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, { struct hns_roce_cq *send_cq, *recv_cq; struct ib_device *ibdev = &hr_dev->ib_dev; + unsigned long flags; int ret = 0; if (hr_qp->ibqp.qp_type == IB_QPT_RC && hr_qp->state != IB_QPS_RESET) { @@ -4659,21 +5005,32 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, ibdev_err(ibdev, "modify QP to Reset failed.\n"); } - send_cq = to_hr_cq(hr_qp->ibqp.send_cq); - recv_cq = to_hr_cq(hr_qp->ibqp.recv_cq); + send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL; + recv_cq = hr_qp->ibqp.recv_cq ? to_hr_cq(hr_qp->ibqp.recv_cq) : NULL; + spin_lock_irqsave(&hr_dev->qp_list_lock, flags); hns_roce_lock_cqs(send_cq, recv_cq); + list_del(&hr_qp->node); + list_del(&hr_qp->sq_node); + list_del(&hr_qp->rq_node); + if (!udata) { - __hns_roce_v2_cq_clean(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ? - to_hr_srq(hr_qp->ibqp.srq) : NULL); - if (send_cq != recv_cq) + if (recv_cq) + __hns_roce_v2_cq_clean(recv_cq, hr_qp->qpn, + (hr_qp->ibqp.srq ? + to_hr_srq(hr_qp->ibqp.srq) : + NULL)); + + if (send_cq && send_cq != recv_cq) __hns_roce_v2_cq_clean(send_cq, hr_qp->qpn, NULL); + } hns_roce_qp_remove(hr_dev, hr_qp); hns_roce_unlock_cqs(send_cq, recv_cq); + spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags); hns_roce_qp_free(hr_dev, hr_qp); @@ -5155,8 +5512,7 @@ static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev, */ dma_rmb(); - cqn = roce_get_field(ceqe->comp, - HNS_ROCE_V2_CEQE_COMP_CQN_M, + cqn = roce_get_field(ceqe->comp, HNS_ROCE_V2_CEQE_COMP_CQN_M, HNS_ROCE_V2_CEQE_COMP_CQN_S); hns_roce_cq_completion(hr_dev, cqn); @@ -5409,126 +5765,98 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, eq->eqe_ba = eq->l0_dma; /* set eqc state */ - roce_set_field(eqc->byte_4, - HNS_ROCE_EQC_EQ_ST_M, - HNS_ROCE_EQC_EQ_ST_S, + roce_set_field(eqc->byte_4, HNS_ROCE_EQC_EQ_ST_M, HNS_ROCE_EQC_EQ_ST_S, HNS_ROCE_V2_EQ_STATE_VALID); /* set eqe hop num */ - roce_set_field(eqc->byte_4, - HNS_ROCE_EQC_HOP_NUM_M, + roce_set_field(eqc->byte_4, HNS_ROCE_EQC_HOP_NUM_M, HNS_ROCE_EQC_HOP_NUM_S, eq->hop_num); /* set eqc over_ignore */ - roce_set_field(eqc->byte_4, - HNS_ROCE_EQC_OVER_IGNORE_M, + roce_set_field(eqc->byte_4, HNS_ROCE_EQC_OVER_IGNORE_M, HNS_ROCE_EQC_OVER_IGNORE_S, eq->over_ignore); /* set eqc coalesce */ - roce_set_field(eqc->byte_4, - HNS_ROCE_EQC_COALESCE_M, + roce_set_field(eqc->byte_4, HNS_ROCE_EQC_COALESCE_M, HNS_ROCE_EQC_COALESCE_S, eq->coalesce); /* set eqc arm_state */ - roce_set_field(eqc->byte_4, - HNS_ROCE_EQC_ARM_ST_M, + roce_set_field(eqc->byte_4, HNS_ROCE_EQC_ARM_ST_M, HNS_ROCE_EQC_ARM_ST_S, eq->arm_st); /* set eqn */ - roce_set_field(eqc->byte_4, - HNS_ROCE_EQC_EQN_M, - HNS_ROCE_EQC_EQN_S, eq->eqn); + roce_set_field(eqc->byte_4, HNS_ROCE_EQC_EQN_M, HNS_ROCE_EQC_EQN_S, + eq->eqn); /* set eqe_cnt */ - roce_set_field(eqc->byte_4, - HNS_ROCE_EQC_EQE_CNT_M, - HNS_ROCE_EQC_EQE_CNT_S, - HNS_ROCE_EQ_INIT_EQE_CNT); + roce_set_field(eqc->byte_4, HNS_ROCE_EQC_EQE_CNT_M, + HNS_ROCE_EQC_EQE_CNT_S, HNS_ROCE_EQ_INIT_EQE_CNT); /* set eqe_ba_pg_sz */ - roce_set_field(eqc->byte_8, - HNS_ROCE_EQC_BA_PG_SZ_M, + roce_set_field(eqc->byte_8, HNS_ROCE_EQC_BA_PG_SZ_M, HNS_ROCE_EQC_BA_PG_SZ_S, eq->eqe_ba_pg_sz + PG_SHIFT_OFFSET); /* set eqe_buf_pg_sz */ - roce_set_field(eqc->byte_8, - HNS_ROCE_EQC_BUF_PG_SZ_M, + roce_set_field(eqc->byte_8, HNS_ROCE_EQC_BUF_PG_SZ_M, HNS_ROCE_EQC_BUF_PG_SZ_S, eq->eqe_buf_pg_sz + PG_SHIFT_OFFSET); /* set eq_producer_idx */ - roce_set_field(eqc->byte_8, - HNS_ROCE_EQC_PROD_INDX_M, - HNS_ROCE_EQC_PROD_INDX_S, - HNS_ROCE_EQ_INIT_PROD_IDX); + roce_set_field(eqc->byte_8, HNS_ROCE_EQC_PROD_INDX_M, + HNS_ROCE_EQC_PROD_INDX_S, HNS_ROCE_EQ_INIT_PROD_IDX); /* set eq_max_cnt */ - roce_set_field(eqc->byte_12, - HNS_ROCE_EQC_MAX_CNT_M, + roce_set_field(eqc->byte_12, HNS_ROCE_EQC_MAX_CNT_M, HNS_ROCE_EQC_MAX_CNT_S, eq->eq_max_cnt); /* set eq_period */ - roce_set_field(eqc->byte_12, - HNS_ROCE_EQC_PERIOD_M, + roce_set_field(eqc->byte_12, HNS_ROCE_EQC_PERIOD_M, HNS_ROCE_EQC_PERIOD_S, eq->eq_period); /* set eqe_report_timer */ - roce_set_field(eqc->eqe_report_timer, - HNS_ROCE_EQC_REPORT_TIMER_M, + roce_set_field(eqc->eqe_report_timer, HNS_ROCE_EQC_REPORT_TIMER_M, HNS_ROCE_EQC_REPORT_TIMER_S, HNS_ROCE_EQ_INIT_REPORT_TIMER); /* set eqe_ba [34:3] */ - roce_set_field(eqc->eqe_ba0, - HNS_ROCE_EQC_EQE_BA_L_M, + roce_set_field(eqc->eqe_ba0, HNS_ROCE_EQC_EQE_BA_L_M, HNS_ROCE_EQC_EQE_BA_L_S, eq->eqe_ba >> 3); /* set eqe_ba [64:35] */ - roce_set_field(eqc->eqe_ba1, - HNS_ROCE_EQC_EQE_BA_H_M, + roce_set_field(eqc->eqe_ba1, HNS_ROCE_EQC_EQE_BA_H_M, HNS_ROCE_EQC_EQE_BA_H_S, eq->eqe_ba >> 35); /* set eq shift */ - roce_set_field(eqc->byte_28, - HNS_ROCE_EQC_SHIFT_M, - HNS_ROCE_EQC_SHIFT_S, eq->shift); + roce_set_field(eqc->byte_28, HNS_ROCE_EQC_SHIFT_M, HNS_ROCE_EQC_SHIFT_S, + eq->shift); /* set eq MSI_IDX */ - roce_set_field(eqc->byte_28, - HNS_ROCE_EQC_MSI_INDX_M, - HNS_ROCE_EQC_MSI_INDX_S, - HNS_ROCE_EQ_INIT_MSI_IDX); + roce_set_field(eqc->byte_28, HNS_ROCE_EQC_MSI_INDX_M, + HNS_ROCE_EQC_MSI_INDX_S, HNS_ROCE_EQ_INIT_MSI_IDX); /* set cur_eqe_ba [27:12] */ - roce_set_field(eqc->byte_28, - HNS_ROCE_EQC_CUR_EQE_BA_L_M, + roce_set_field(eqc->byte_28, HNS_ROCE_EQC_CUR_EQE_BA_L_M, HNS_ROCE_EQC_CUR_EQE_BA_L_S, eq->cur_eqe_ba >> 12); /* set cur_eqe_ba [59:28] */ - roce_set_field(eqc->byte_32, - HNS_ROCE_EQC_CUR_EQE_BA_M_M, + roce_set_field(eqc->byte_32, HNS_ROCE_EQC_CUR_EQE_BA_M_M, HNS_ROCE_EQC_CUR_EQE_BA_M_S, eq->cur_eqe_ba >> 28); /* set cur_eqe_ba [63:60] */ - roce_set_field(eqc->byte_36, - HNS_ROCE_EQC_CUR_EQE_BA_H_M, + roce_set_field(eqc->byte_36, HNS_ROCE_EQC_CUR_EQE_BA_H_M, HNS_ROCE_EQC_CUR_EQE_BA_H_S, eq->cur_eqe_ba >> 60); /* set eq consumer idx */ - roce_set_field(eqc->byte_36, - HNS_ROCE_EQC_CONS_INDX_M, - HNS_ROCE_EQC_CONS_INDX_S, - HNS_ROCE_EQ_INIT_CONS_IDX); + roce_set_field(eqc->byte_36, HNS_ROCE_EQC_CONS_INDX_M, + HNS_ROCE_EQC_CONS_INDX_S, HNS_ROCE_EQ_INIT_CONS_IDX); /* set nex_eqe_ba[43:12] */ - roce_set_field(eqc->nxt_eqe_ba0, - HNS_ROCE_EQC_NXT_EQE_BA_L_M, + roce_set_field(eqc->nxt_eqe_ba0, HNS_ROCE_EQC_NXT_EQE_BA_L_M, HNS_ROCE_EQC_NXT_EQE_BA_L_S, eq->nxt_eqe_ba >> 12); /* set nex_eqe_ba[63:44] */ - roce_set_field(eqc->nxt_eqe_ba1, - HNS_ROCE_EQC_NXT_EQE_BA_H_M, + roce_set_field(eqc->nxt_eqe_ba1, HNS_ROCE_EQC_NXT_EQE_BA_H_M, HNS_ROCE_EQC_NXT_EQE_BA_H_S, eq->nxt_eqe_ba >> 44); } @@ -5828,18 +6156,16 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num, /* irq contains: abnormal + AEQ + CEQ */ for (j = 0; j < other_num; j++) - snprintf((char *)hr_dev->irq_names[j], - HNS_ROCE_INT_NAME_LEN, "hns-abn-%d", j); + snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN, + "hns-abn-%d", j); for (j = other_num; j < (other_num + aeq_num); j++) - snprintf((char *)hr_dev->irq_names[j], - HNS_ROCE_INT_NAME_LEN, "hns-aeq-%d", - j - other_num); + snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN, + "hns-aeq-%d", j - other_num); for (j = (other_num + aeq_num); j < irq_num; j++) - snprintf((char *)hr_dev->irq_names[j], - HNS_ROCE_INT_NAME_LEN, "hns-ceq-%d", - j - other_num - aeq_num); + snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN, + "hns-ceq-%d", j - other_num - aeq_num); for (j = 0; j < irq_num; j++) { if (j < other_num) @@ -6448,6 +6774,10 @@ static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, return; handle->priv = NULL; + + hr_dev->state = HNS_ROCE_DEVICE_STATE_UNINIT; + hns_roce_handle_device_err(hr_dev); + hns_roce_exit(hr_dev); kfree(hr_dev->priv); ib_dealloc_device(&hr_dev->ib_dev); @@ -6509,7 +6839,6 @@ static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) { struct hns_roce_dev *hr_dev; - struct ib_event event; if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) { set_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state); @@ -6527,10 +6856,7 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) hr_dev->active = false; hr_dev->dis_db = true; - event.event = IB_EVENT_DEVICE_FATAL; - event.device = &hr_dev->ib_dev; - event.element.port_num = 1; - ib_dispatch_event(&event); + hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN; return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 76a14db7028d..2a117ff6a6be 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -81,10 +81,12 @@ #define HNS_ROCE_V2_QPC_ENTRY_SZ 256 #define HNS_ROCE_V2_IRRL_ENTRY_SZ 64 #define HNS_ROCE_V2_TRRL_ENTRY_SZ 48 +#define HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ 100 #define HNS_ROCE_V2_CQC_ENTRY_SZ 64 #define HNS_ROCE_V2_SRQC_ENTRY_SZ 64 #define HNS_ROCE_V2_MTPT_ENTRY_SZ 64 #define HNS_ROCE_V2_MTT_ENTRY_SZ 64 +#define HNS_ROCE_V2_IDX_ENTRY_SZ 4 #define HNS_ROCE_V2_CQE_ENTRY_SIZE 32 #define HNS_ROCE_V2_SCCC_ENTRY_SZ 32 #define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE @@ -109,7 +111,12 @@ #define HNS_ROCE_PBL_HOP_NUM 2 #define HNS_ROCE_EQE_HOP_NUM 2 #define HNS_ROCE_IDX_HOP_NUM 1 +#define HNS_ROCE_SQWQE_HOP_NUM 2 +#define HNS_ROCE_EXT_SGE_HOP_NUM 1 +#define HNS_ROCE_RQWQE_HOP_NUM 2 +#define HNS_ROCE_BA_PG_SZ_SUPPORTED_256K 6 +#define HNS_ROCE_BA_PG_SZ_SUPPORTED_16K 2 #define HNS_ROCE_V2_GID_INDEX_NUM 256 #define HNS_ROCE_V2_TABLE_CHUNK_SIZE (1 << 18) @@ -237,6 +244,7 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_CFG_EXT_LLM = 0x8403, HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404, HNS_ROCE_OPC_QUERY_PF_TIMER_RES = 0x8406, + HNS_ROCE_OPC_QUERY_PF_CAPS_NUM = 0x8408, HNS_ROCE_OPC_CFG_SGID_TB = 0x8500, HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501, HNS_ROCE_OPC_POST_MB = 0x8504, @@ -643,7 +651,7 @@ struct hns_roce_v2_qp_context { #define V2_QPC_BYTE_76_ATE_S 27 #define V2_QPC_BYTE_76_RQIE_S 28 - +#define V2_QPC_BYTE_76_EXT_ATE_S 29 #define V2_QPC_BYTE_76_RQ_VLAN_EN_S 30 #define V2_QPC_BYTE_80_RX_CQN_S 0 #define V2_QPC_BYTE_80_RX_CQN_M GENMASK(23, 0) @@ -1569,6 +1577,155 @@ struct hns_roce_cfg_smac_tb { #define CFG_SMAC_TB_VF_SMAC_H_S 0 #define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0) +#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5 +struct hns_roce_query_pf_caps_a { + u8 number_ports; + u8 local_ca_ack_delay; + __le16 max_sq_sg; + __le16 max_sq_inline; + __le16 max_rq_sg; + __le32 max_extend_sg; + __le16 num_qpc_timer; + __le16 num_cqc_timer; + __le16 max_srq_sges; + u8 num_aeq_vectors; + u8 num_other_vectors; + u8 max_sq_desc_sz; + u8 max_rq_desc_sz; + u8 max_srq_desc_sz; + u8 cq_entry_sz; +}; + +struct hns_roce_query_pf_caps_b { + u8 mtpt_entry_sz; + u8 irrl_entry_sz; + u8 trrl_entry_sz; + u8 cqc_entry_sz; + u8 srqc_entry_sz; + u8 idx_entry_sz; + u8 scc_ctx_entry_sz; + u8 max_mtu; + __le16 qpc_entry_sz; + __le16 qpc_timer_entry_sz; + __le16 cqc_timer_entry_sz; + u8 min_cqes; + u8 min_wqes; + __le32 page_size_cap; + u8 pkey_table_len; + u8 phy_num_uars; + u8 ctx_hop_num; + u8 pbl_hop_num; +}; + +struct hns_roce_query_pf_caps_c { + __le32 cap_flags_num_pds; + __le32 max_gid_num_cqs; + __le32 cq_depth; + __le32 num_mrws; + __le32 ord_num_qps; + __le16 sq_depth; + __le16 rq_depth; +}; + +#define V2_QUERY_PF_CAPS_C_NUM_PDS_S 0 +#define V2_QUERY_PF_CAPS_C_NUM_PDS_M GENMASK(19, 0) + +#define V2_QUERY_PF_CAPS_C_CAP_FLAGS_S 20 +#define V2_QUERY_PF_CAPS_C_CAP_FLAGS_M GENMASK(31, 20) + +#define V2_QUERY_PF_CAPS_C_NUM_CQS_S 0 +#define V2_QUERY_PF_CAPS_C_NUM_CQS_M GENMASK(19, 0) + +#define V2_QUERY_PF_CAPS_C_MAX_GID_S 20 +#define V2_QUERY_PF_CAPS_C_MAX_GID_M GENMASK(28, 20) + +#define V2_QUERY_PF_CAPS_C_CQ_DEPTH_S 0 +#define V2_QUERY_PF_CAPS_C_CQ_DEPTH_M GENMASK(22, 0) + +#define V2_QUERY_PF_CAPS_C_NUM_MRWS_S 0 +#define V2_QUERY_PF_CAPS_C_NUM_MRWS_M GENMASK(19, 0) + +#define V2_QUERY_PF_CAPS_C_NUM_QPS_S 0 +#define V2_QUERY_PF_CAPS_C_NUM_QPS_M GENMASK(19, 0) + +#define V2_QUERY_PF_CAPS_C_MAX_ORD_S 20 +#define V2_QUERY_PF_CAPS_C_MAX_ORD_M GENMASK(27, 20) + +struct hns_roce_query_pf_caps_d { + __le32 wq_hop_num_max_srqs; + __le16 srq_depth; + __le16 rsv; + __le32 num_ceqs_ceq_depth; + __le32 arm_st_aeq_depth; + __le32 num_uars_rsv_pds; + __le32 rsv_uars_rsv_qps; +}; +#define V2_QUERY_PF_CAPS_D_NUM_SRQS_S 0 +#define V2_QUERY_PF_CAPS_D_NUM_SRQS_M GENMASK(20, 0) + +#define V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S 20 +#define V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M GENMASK(21, 20) + +#define V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_S 22 +#define V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_M GENMASK(23, 22) + +#define V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_S 24 +#define V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_M GENMASK(25, 24) + + +#define V2_QUERY_PF_CAPS_D_CEQ_DEPTH_S 0 +#define V2_QUERY_PF_CAPS_D_CEQ_DEPTH_M GENMASK(21, 0) + +#define V2_QUERY_PF_CAPS_D_NUM_CEQS_S 22 +#define V2_QUERY_PF_CAPS_D_NUM_CEQS_M GENMASK(31, 22) + +#define V2_QUERY_PF_CAPS_D_AEQ_DEPTH_S 0 +#define V2_QUERY_PF_CAPS_D_AEQ_DEPTH_M GENMASK(21, 0) + +#define V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_S 22 +#define V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_M GENMASK(23, 22) + +#define V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_S 24 +#define V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_M GENMASK(25, 24) + +#define V2_QUERY_PF_CAPS_D_RSV_PDS_S 0 +#define V2_QUERY_PF_CAPS_D_RSV_PDS_M GENMASK(19, 0) + +#define V2_QUERY_PF_CAPS_D_NUM_UARS_S 20 +#define V2_QUERY_PF_CAPS_D_NUM_UARS_M GENMASK(27, 20) + +#define V2_QUERY_PF_CAPS_D_RSV_QPS_S 0 +#define V2_QUERY_PF_CAPS_D_RSV_QPS_M GENMASK(19, 0) + +#define V2_QUERY_PF_CAPS_D_RSV_UARS_S 20 +#define V2_QUERY_PF_CAPS_D_RSV_UARS_M GENMASK(27, 20) + +struct hns_roce_query_pf_caps_e { + __le32 chunk_size_shift_rsv_mrws; + __le32 rsv_cqs; + __le32 rsv_srqs; + __le32 rsv_lkey; + __le16 ceq_max_cnt; + __le16 ceq_period; + __le16 aeq_max_cnt; + __le16 aeq_period; +}; + +#define V2_QUERY_PF_CAPS_E_RSV_MRWS_S 0 +#define V2_QUERY_PF_CAPS_E_RSV_MRWS_M GENMASK(19, 0) + +#define V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_S 20 +#define V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_M GENMASK(31, 20) + +#define V2_QUERY_PF_CAPS_E_RSV_CQS_S 0 +#define V2_QUERY_PF_CAPS_E_RSV_CQS_M GENMASK(19, 0) + +#define V2_QUERY_PF_CAPS_E_RSV_SRQS_S 0 +#define V2_QUERY_PF_CAPS_E_RSV_SRQS_M GENMASK(19, 0) + +#define V2_QUERY_PF_CAPS_E_RSV_LKEYS_S 0 +#define V2_QUERY_PF_CAPS_E_RSV_LKEYS_M GENMASK(19, 0) + struct hns_roce_cmq_desc { __le16 opcode; __le16 flag; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 854ef6e74788..d0031d559213 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -90,7 +90,7 @@ static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context) static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context) { struct hns_roce_dev *hr_dev = to_hr_dev(attr->device); - struct ib_gid_attr zattr = { }; + struct ib_gid_attr zattr = {}; u8 port = attr->port_num - 1; int ret; @@ -210,7 +210,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev, props->max_pkeys = 1; props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay; if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { - props->max_srq = hr_dev->caps.max_srqs; + props->max_srq = hr_dev->caps.num_srqs; props->max_srq_wr = hr_dev->caps.max_srq_wrs; props->max_srq_sge = hr_dev->caps.max_srq_sges; } @@ -259,11 +259,12 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num, mtu = iboe_get_mtu(net_dev->mtu); props->active_mtu = mtu ? min(props->max_mtu, mtu) : IB_MTU_256; - props->state = (netif_running(net_dev) && netif_carrier_ok(net_dev)) ? - IB_PORT_ACTIVE : IB_PORT_DOWN; - props->phys_state = (props->state == IB_PORT_ACTIVE) ? - IB_PORT_PHYS_STATE_LINK_UP : - IB_PORT_PHYS_STATE_DISABLED; + props->state = netif_running(net_dev) && netif_carrier_ok(net_dev) ? + IB_PORT_ACTIVE : + IB_PORT_DOWN; + props->phys_state = props->state == IB_PORT_ACTIVE ? + IB_PORT_PHYS_STATE_LINK_UP : + IB_PORT_PHYS_STATE_DISABLED; spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); @@ -481,13 +482,13 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_dev = &hr_dev->ib_dev; - ib_dev->node_type = RDMA_NODE_IB_CA; - ib_dev->dev.parent = dev; + ib_dev->node_type = RDMA_NODE_IB_CA; + ib_dev->dev.parent = dev; - ib_dev->phys_port_cnt = hr_dev->caps.num_ports; - ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey; - ib_dev->num_comp_vectors = hr_dev->caps.num_comp_vectors; - ib_dev->uverbs_cmd_mask = + ib_dev->phys_port_cnt = hr_dev->caps.num_ports; + ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey; + ib_dev->num_comp_vectors = hr_dev->caps.num_comp_vectors; + ib_dev->uverbs_cmd_mask = (1ULL << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ULL << IB_USER_VERBS_CMD_QUERY_DEVICE) | (1ULL << IB_USER_VERBS_CMD_QUERY_PORT) | @@ -503,8 +504,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) (1ULL << IB_USER_VERBS_CMD_QUERY_QP) | (1ULL << IB_USER_VERBS_CMD_DESTROY_QP); - ib_dev->uverbs_ex_cmd_mask |= - (1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ); + ib_dev->uverbs_ex_cmd_mask |= (1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ); if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) { ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR); @@ -589,11 +589,13 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) { ret = hns_roce_init_hem_table(hr_dev, - &hr_dev->mr_table.mtt_cqe_table, - HEM_TYPE_CQE, hr_dev->caps.mtt_entry_sz, - hr_dev->caps.num_cqe_segs, 1); + &hr_dev->mr_table.mtt_cqe_table, + HEM_TYPE_CQE, + hr_dev->caps.mtt_entry_sz, + hr_dev->caps.num_cqe_segs, 1); if (ret) { - dev_err(dev, "Failed to init MTT CQE context memory, aborting.\n"); + dev_err(dev, + "Failed to init CQE context memory, aborting.\n"); goto err_unmap_cqe; } } @@ -633,7 +635,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) hr_dev->caps.num_qps, 1); if (ret) { dev_err(dev, - "Failed to init trrl_table memory, aborting.\n"); + "Failed to init trrl_table memory, aborting.\n"); goto err_unmap_irrl; } } @@ -653,7 +655,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) hr_dev->caps.num_srqs, 1); if (ret) { dev_err(dev, - "Failed to init SRQ context memory, aborting.\n"); + "Failed to init SRQ context memory, aborting.\n"); goto err_unmap_cq; } } @@ -692,33 +694,31 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) hr_dev->caps.num_qps, 1); if (ret) { dev_err(dev, - "Failed to init SCC context memory, aborting.\n"); + "Failed to init SCC context memory, aborting.\n"); goto err_unmap_idx; } } if (hr_dev->caps.qpc_timer_entry_sz) { - ret = hns_roce_init_hem_table(hr_dev, - &hr_dev->qpc_timer_table, + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qpc_timer_table, HEM_TYPE_QPC_TIMER, hr_dev->caps.qpc_timer_entry_sz, hr_dev->caps.num_qpc_timer, 1); if (ret) { dev_err(dev, - "Failed to init QPC timer memory, aborting.\n"); + "Failed to init QPC timer memory, aborting.\n"); goto err_unmap_ctx; } } if (hr_dev->caps.cqc_timer_entry_sz) { - ret = hns_roce_init_hem_table(hr_dev, - &hr_dev->cqc_timer_table, + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->cqc_timer_table, HEM_TYPE_CQC_TIMER, hr_dev->caps.cqc_timer_entry_sz, hr_dev->caps.num_cqc_timer, 1); if (ret) { dev_err(dev, - "Failed to init CQC timer memory, aborting.\n"); + "Failed to init CQC timer memory, aborting.\n"); goto err_unmap_qpc_timer; } } @@ -727,8 +727,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) err_unmap_qpc_timer: if (hr_dev->caps.qpc_timer_entry_sz) - hns_roce_cleanup_hem_table(hr_dev, - &hr_dev->qpc_timer_table); + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qpc_timer_table); err_unmap_ctx: if (hr_dev->caps.sccc_entry_sz) @@ -863,6 +862,50 @@ err_uar_table_free: return ret; } +static void check_and_get_armed_cq(struct list_head *cq_list, struct ib_cq *cq) +{ + struct hns_roce_cq *hr_cq = to_hr_cq(cq); + unsigned long flags; + + spin_lock_irqsave(&hr_cq->lock, flags); + if (cq->comp_handler) { + if (!hr_cq->is_armed) { + hr_cq->is_armed = 1; + list_add_tail(&hr_cq->node, cq_list); + } + } + spin_unlock_irqrestore(&hr_cq->lock, flags); +} + +void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_qp *hr_qp; + struct hns_roce_cq *hr_cq; + struct list_head cq_list; + unsigned long flags_qp; + unsigned long flags; + + INIT_LIST_HEAD(&cq_list); + + spin_lock_irqsave(&hr_dev->qp_list_lock, flags); + list_for_each_entry(hr_qp, &hr_dev->qp_list, node) { + spin_lock_irqsave(&hr_qp->sq.lock, flags_qp); + if (hr_qp->sq.tail != hr_qp->sq.head) + check_and_get_armed_cq(&cq_list, hr_qp->ibqp.send_cq); + spin_unlock_irqrestore(&hr_qp->sq.lock, flags_qp); + + spin_lock_irqsave(&hr_qp->rq.lock, flags_qp); + if ((!hr_qp->ibqp.srq) && (hr_qp->rq.tail != hr_qp->rq.head)) + check_and_get_armed_cq(&cq_list, hr_qp->ibqp.recv_cq); + spin_unlock_irqrestore(&hr_qp->rq.lock, flags_qp); + } + + list_for_each_entry(hr_cq, &cq_list, node) + hns_roce_cq_completion(hr_dev, hr_cq->cqn); + + spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags); +} + int hns_roce_init(struct hns_roce_dev *hr_dev) { int ret; @@ -933,6 +976,9 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) } } + INIT_LIST_HEAD(&hr_dev->qp_list); + spin_lock_init(&hr_dev->qp_list_lock); + ret = hns_roce_register_device(hr_dev); if (ret) goto error_failed_register_device; diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 3ff610549c74..b9898e71655a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -1064,8 +1064,8 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) { if (page_addr & ((1 << mtt->page_shift) - 1)) { dev_err(dev, - "page_addr 0x%llx is not page_shift %d alignment!\n", - page_addr, mtt->page_shift); + "page_addr is not page_shift %d alignment!\n", + mtt->page_shift); ret = -EINVAL; goto out; } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index eb2ee6a581aa..3257ad11be48 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -393,40 +393,38 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, /* Get buf size, SQ and RQ are aligned to page_szie */ if (hr_dev->caps.max_sq_sg <= 2) { - hr_qp->buff_size = HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << + hr_qp->buff_size = round_up((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), PAGE_SIZE) + - HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << + round_up((hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), PAGE_SIZE); hr_qp->sq.offset = 0; - hr_qp->rq.offset = HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << + hr_qp->rq.offset = round_up((hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), PAGE_SIZE); } else { page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); hr_qp->sge.sge_cnt = ex_sge_num ? max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num) : 0; - hr_qp->buff_size = HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << + hr_qp->buff_size = round_up((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), page_size) + - HNS_ROCE_ALIGN_UP((hr_qp->sge.sge_cnt << + round_up((hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift), page_size) + - HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << + round_up((hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), page_size); hr_qp->sq.offset = 0; if (ex_sge_num) { - hr_qp->sge.offset = HNS_ROCE_ALIGN_UP( - (hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), - page_size); + hr_qp->sge.offset = round_up((hr_qp->sq.wqe_cnt << + hr_qp->sq.wqe_shift), + page_size); hr_qp->rq.offset = hr_qp->sge.offset + - HNS_ROCE_ALIGN_UP((hr_qp->sge.sge_cnt << - hr_qp->sge.sge_shift), - page_size); + round_up((hr_qp->sge.sge_cnt << + hr_qp->sge.sge_shift), + page_size); } else { - hr_qp->rq.offset = HNS_ROCE_ALIGN_UP( - (hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), - page_size); + hr_qp->rq.offset = round_up((hr_qp->sq.wqe_cnt << + hr_qp->sq.wqe_shift), + page_size); } } @@ -593,20 +591,18 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */ page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); hr_qp->sq.offset = 0; - size = HNS_ROCE_ALIGN_UP(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, - page_size); + size = round_up(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, page_size); if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) { hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift), - (u32)hr_qp->sge.sge_cnt); + (u32)hr_qp->sge.sge_cnt); hr_qp->sge.offset = size; - size += HNS_ROCE_ALIGN_UP(hr_qp->sge.sge_cnt << - hr_qp->sge.sge_shift, page_size); + size += round_up(hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift, + page_size); } hr_qp->rq.offset = size; - size += HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), - page_size); + size += round_up((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), page_size); hr_qp->buff_size = size; /* Get wr and sge number which send */ @@ -681,6 +677,29 @@ static void free_rq_inline_buf(struct hns_roce_qp *hr_qp) kfree(hr_qp->rq_inl_buf.wqe_list); } +static void add_qp_to_list(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct ib_cq *send_cq, struct ib_cq *recv_cq) +{ + struct hns_roce_cq *hr_send_cq, *hr_recv_cq; + unsigned long flags; + + hr_send_cq = send_cq ? to_hr_cq(send_cq) : NULL; + hr_recv_cq = recv_cq ? to_hr_cq(recv_cq) : NULL; + + spin_lock_irqsave(&hr_dev->qp_list_lock, flags); + hns_roce_lock_cqs(hr_send_cq, hr_recv_cq); + + list_add_tail(&hr_qp->node, &hr_dev->qp_list); + if (hr_send_cq) + list_add_tail(&hr_qp->sq_node, &hr_send_cq->sq_list); + if (hr_recv_cq) + list_add_tail(&hr_qp->rq_node, &hr_recv_cq->rq_list); + + hns_roce_unlock_cqs(hr_send_cq, hr_recv_cq); + spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags); +} + static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, struct ib_pd *ib_pd, struct ib_qp_init_attr *init_attr, @@ -950,6 +969,9 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, } hr_qp->event = hns_roce_ib_qp_event; + + add_qp_to_list(hr_dev, hr_qp, init_attr->send_cq, init_attr->recv_cq); + hns_roce_free_buf_list(buf_list, hr_qp->region_cnt); return 0; @@ -1232,7 +1254,16 @@ out: void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq) __acquires(&send_cq->lock) __acquires(&recv_cq->lock) { - if (send_cq == recv_cq) { + if (unlikely(send_cq == NULL && recv_cq == NULL)) { + __acquire(&send_cq->lock); + __acquire(&recv_cq->lock); + } else if (unlikely(send_cq != NULL && recv_cq == NULL)) { + spin_lock_irq(&send_cq->lock); + __acquire(&recv_cq->lock); + } else if (unlikely(send_cq == NULL && recv_cq != NULL)) { + spin_lock_irq(&recv_cq->lock); + __acquire(&send_cq->lock); + } else if (send_cq == recv_cq) { spin_lock_irq(&send_cq->lock); __acquire(&recv_cq->lock); } else if (send_cq->cqn < recv_cq->cqn) { @@ -1248,7 +1279,16 @@ void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq) __releases(&send_cq->lock) __releases(&recv_cq->lock) { - if (send_cq == recv_cq) { + if (unlikely(send_cq == NULL && recv_cq == NULL)) { + __release(&recv_cq->lock); + __release(&send_cq->lock); + } else if (unlikely(send_cq != NULL && recv_cq == NULL)) { + __release(&recv_cq->lock); + spin_unlock(&send_cq->lock); + } else if (unlikely(send_cq == NULL && recv_cq != NULL)) { + __release(&send_cq->lock); + spin_unlock(&recv_cq->lock); + } else if (send_cq == recv_cq) { __release(&recv_cq->lock); spin_unlock_irq(&send_cq->lock); } else if (send_cq->cqn < recv_cq->cqn) { diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index d44cf33df81a..238614370927 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -1225,6 +1225,8 @@ static void i40iw_add_ipv4_addr(struct i40iw_device *iwdev) const struct in_ifaddr *ifa; idev = in_dev_get(dev); + if (!idev) + continue; in_dev_for_each_ifa_rtnl(ifa, idev) { i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, "IP=%pI4, vlan_id=%d, MAC=%pM\n", &ifa->ifa_address, diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index ecd6cadd529a..b591861934b3 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -186,23 +186,6 @@ out: kfree(ent); } -static void id_map_find_del(struct ib_device *ibdev, int pv_cm_id) -{ - struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; - struct rb_root *sl_id_map = &sriov->sl_id_map; - struct id_map_entry *ent, *found_ent; - - spin_lock(&sriov->id_map_lock); - ent = xa_erase(&sriov->pv_id_table, pv_cm_id); - if (!ent) - goto out; - found_ent = id_map_find_by_sl_id(ibdev, ent->slave_id, ent->sl_cm_id); - if (found_ent && found_ent == ent) - rb_erase(&found_ent->node, sl_id_map); -out: - spin_unlock(&sriov->id_map_lock); -} - static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new) { struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map; @@ -294,7 +277,7 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id) spin_lock(&sriov->id_map_lock); spin_lock_irqsave(&sriov->going_down_lock, flags); /*make sure that there is no schedule inside the scheduled work.*/ - if (!sriov->is_going_down) { + if (!sriov->is_going_down && !id->scheduled_delete) { id->scheduled_delete = 1; schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT); } @@ -341,9 +324,6 @@ cont: if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID) schedule_delayed(ibdev, id); - else if (mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) - id_map_find_del(ibdev, pv_cm_id); - return 0; } @@ -382,12 +362,9 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, *slave = id->slave_id; set_remote_comm_id(mad, id->sl_cm_id); - if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID) + if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID || + mad->mad_hdr.attr_id == CM_REJ_ATTR_ID) schedule_delayed(ibdev, id); - else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID || - mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) { - id_map_find_del(ibdev, (int) pv_cm_id); - } return 0; } diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index a57033d4b0e5..f8b936b76dcd 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -568,18 +568,13 @@ static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe, wc->vendor_err = cqe->vendor_err_syndrome; } -static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum) +static int mlx4_ib_ipoib_csum_ok(__be16 status, u8 badfcs_enc, __be16 checksum) { - return ((status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 | - MLX4_CQE_STATUS_IPV4F | - MLX4_CQE_STATUS_IPV4OPT | - MLX4_CQE_STATUS_IPV6 | - MLX4_CQE_STATUS_IPOK)) == - cpu_to_be16(MLX4_CQE_STATUS_IPV4 | - MLX4_CQE_STATUS_IPOK)) && - (status & cpu_to_be16(MLX4_CQE_STATUS_UDP | - MLX4_CQE_STATUS_TCP)) && - checksum == cpu_to_be16(0xffff); + return ((badfcs_enc & MLX4_CQE_STATUS_L4_CSUM) || + ((status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && + (status & cpu_to_be16(MLX4_CQE_STATUS_TCP | + MLX4_CQE_STATUS_UDP)) && + (checksum == cpu_to_be16(0xffff)))); } static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc, @@ -855,6 +850,7 @@ repoll: wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0; wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f; wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status, + cqe->badfcs_enc, cqe->checksum) ? IB_WC_IP_CSUM_OK : 0; if (is_eth) { wc->slid = 0; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 34055cbab38c..2f5d9b181848 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -246,6 +246,13 @@ static int mlx4_ib_update_gids(struct gid_entry *gids, return mlx4_ib_update_gids_v1(gids, ibdev, port_num); } +static void free_gid_entry(struct gid_entry *entry) +{ + memset(&entry->gid, 0, sizeof(entry->gid)); + kfree(entry->ctx); + entry->ctx = NULL; +} + static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) { struct mlx4_ib_dev *ibdev = to_mdev(attr->device); @@ -313,6 +320,8 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) GFP_ATOMIC); if (!gids) { ret = -ENOMEM; + *context = NULL; + free_gid_entry(&port_gid_table->gids[free]); } else { for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) { memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid)); @@ -324,6 +333,12 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) if (!ret && hw_update) { ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num); + if (ret) { + spin_lock_bh(&iboe->lock); + *context = NULL; + free_gid_entry(&port_gid_table->gids[free]); + spin_unlock_bh(&iboe->lock); + } kfree(gids); } @@ -353,10 +368,7 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context) if (!ctx->refcount) { unsigned int real_index = ctx->real_index; - memset(&port_gid_table->gids[real_index].gid, 0, - sizeof(port_gid_table->gids[real_index].gid)); - kfree(port_gid_table->gids[real_index].ctx); - port_gid_table->gids[real_index].ctx = NULL; + free_gid_entry(&port_gid_table->gids[real_index]); hw_update = 1; } } diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index cb23cdb9389a..26425dd2d960 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -849,7 +849,7 @@ static void mlx4_ib_release_wqn(struct mlx4_ib_ucontext *context, * reused for further WQN allocations. * The next created WQ will allocate a new range. */ - range->dirty = 1; + range->dirty = true; } mutex_unlock(&context->wqn_ranges_mutex); @@ -3085,7 +3085,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, } if (ah->av.eth.vlan != cpu_to_be16(0xffff)) { vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff; - is_vlan = 1; + is_vlan = true; } } err = ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 685b8ed96b4e..d7efc9f6daf0 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -30,7 +30,7 @@ enum devx_obj_flags { struct devx_async_data { struct mlx5_ib_dev *mdev; struct list_head list; - struct ib_uobject *fd_uobj; + struct devx_async_cmd_event_file *ev_file; struct mlx5_async_work cb_work; u16 cmd_out_len; /* must be last field in this structure */ @@ -72,7 +72,6 @@ struct devx_event_subscription { struct rcu_head rcu; u64 cookie; struct devx_async_event_file *ev_file; - struct file *filp; /* Upon hot unplug we need a direct access to */ struct eventfd_ctx *eventfd; }; @@ -1674,21 +1673,20 @@ static void devx_query_callback(int status, struct mlx5_async_work *context) { struct devx_async_data *async_data = container_of(context, struct devx_async_data, cb_work); - struct ib_uobject *fd_uobj = async_data->fd_uobj; - struct devx_async_cmd_event_file *ev_file; - struct devx_async_event_queue *ev_queue; + struct devx_async_cmd_event_file *ev_file = async_data->ev_file; + struct devx_async_event_queue *ev_queue = &ev_file->ev_queue; unsigned long flags; - ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file, - uobj); - ev_queue = &ev_file->ev_queue; - + /* + * Note that if the struct devx_async_cmd_event_file uobj begins to be + * destroyed it will block at mlx5_cmd_cleanup_async_ctx() until this + * routine returns, ensuring that it always remains valid here. + */ spin_lock_irqsave(&ev_queue->lock, flags); list_add_tail(&async_data->list, &ev_queue->event_list); spin_unlock_irqrestore(&ev_queue->lock, flags); wake_up_interruptible(&ev_queue->poll_wait); - fput(fd_uobj->object); } #define MAX_ASYNC_BYTES_IN_USE (1024 * 1024) /* 1MB */ @@ -1757,9 +1755,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)( async_data->cmd_out_len = cmd_out_len; async_data->mdev = mdev; - async_data->fd_uobj = fd_uobj; + async_data->ev_file = ev_file; - get_file(fd_uobj->object); MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); err = mlx5_cmd_exec_cb(&ev_file->async_ctx, cmd_in, uverbs_attr_get_len(attrs, @@ -1769,12 +1766,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)( devx_query_callback, &async_data->cb_work); if (err) - goto cb_err; + goto free_async; return 0; -cb_err: - fput(fd_uobj->object); free_async: kvfree(async_data); sub_bytes: @@ -2032,6 +2027,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)( goto err; list_add_tail(&event_sub->event_list, &sub_list); + uverbs_uobject_get(&ev_file->uobj); if (use_eventfd) { event_sub->eventfd = eventfd_ctx_fdget(redirect_fd); @@ -2045,7 +2041,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)( event_sub->cookie = cookie; event_sub->ev_file = ev_file; - event_sub->filp = fd_uobj->object; /* May be needed upon cleanup the devx object/subscription */ event_sub->xa_key_level1 = key_level1; event_sub->xa_key_level2 = obj_id; @@ -2099,7 +2094,7 @@ err: if (event_sub->eventfd) eventfd_ctx_put(event_sub->eventfd); - + uverbs_uobject_put(&event_sub->ev_file->uobj); kfree(event_sub); } @@ -2329,6 +2324,9 @@ static int deliver_event(struct devx_event_subscription *event_sub, return 0; } + /* is_destroyed is ignored here because we don't have any memory + * allocation to clean up for the omit_data case + */ list_add_tail(&event_sub->event_list, &ev_file->event_list); spin_unlock_irqrestore(&ev_file->lock, flags); wake_up_interruptible(&ev_file->poll_wait); @@ -2348,7 +2346,10 @@ static int deliver_event(struct devx_event_subscription *event_sub, memcpy(event_data->hdr.out_data, data, sizeof(struct mlx5_eqe)); spin_lock_irqsave(&ev_file->lock, flags); - list_add_tail(&event_data->list, &ev_file->event_list); + if (!ev_file->is_destroyed) + list_add_tail(&event_data->list, &ev_file->event_list); + else + kfree(event_data); spin_unlock_irqrestore(&ev_file->lock, flags); wake_up_interruptible(&ev_file->poll_wait); @@ -2361,17 +2362,10 @@ static void dispatch_event_fd(struct list_head *fd_list, struct devx_event_subscription *item; list_for_each_entry_rcu(item, fd_list, xa_list) { - if (!get_file_rcu(item->filp)) - continue; - - if (item->eventfd) { + if (item->eventfd) eventfd_signal(item->eventfd, 1); - fput(item->filp); - continue; - } - - deliver_event(item, data); - fput(item->filp); + else + deliver_event(item, data); } } @@ -2509,23 +2503,6 @@ static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf, return ret; } -static int devx_async_cmd_event_close(struct inode *inode, struct file *filp) -{ - struct ib_uobject *uobj = filp->private_data; - struct devx_async_cmd_event_file *comp_ev_file = container_of( - uobj, struct devx_async_cmd_event_file, uobj); - struct devx_async_data *entry, *tmp; - - spin_lock_irq(&comp_ev_file->ev_queue.lock); - list_for_each_entry_safe(entry, tmp, - &comp_ev_file->ev_queue.event_list, list) - kvfree(entry); - spin_unlock_irq(&comp_ev_file->ev_queue.lock); - - uverbs_close_fd(filp); - return 0; -} - static __poll_t devx_async_cmd_event_poll(struct file *filp, struct poll_table_struct *wait) { @@ -2549,7 +2526,7 @@ static const struct file_operations devx_async_cmd_event_fops = { .owner = THIS_MODULE, .read = devx_async_cmd_event_read, .poll = devx_async_cmd_event_poll, - .release = devx_async_cmd_event_close, + .release = uverbs_uobject_fd_release, .llseek = no_llseek, }; @@ -2653,81 +2630,85 @@ static __poll_t devx_async_event_poll(struct file *filp, return pollflags; } -static int devx_async_event_close(struct inode *inode, struct file *filp) +static void devx_free_subscription(struct rcu_head *rcu) { - struct devx_async_event_file *ev_file = filp->private_data; - struct devx_event_subscription *event_sub, *event_sub_tmp; - struct devx_async_event_data *entry, *tmp; - struct mlx5_ib_dev *dev = ev_file->dev; - - mutex_lock(&dev->devx_event_table.event_xa_lock); - /* delete the subscriptions which are related to this FD */ - list_for_each_entry_safe(event_sub, event_sub_tmp, - &ev_file->subscribed_events_list, file_list) { - devx_cleanup_subscription(dev, event_sub); - if (event_sub->eventfd) - eventfd_ctx_put(event_sub->eventfd); - - list_del_rcu(&event_sub->file_list); - /* subscription may not be used by the read API any more */ - kfree_rcu(event_sub, rcu); - } - - mutex_unlock(&dev->devx_event_table.event_xa_lock); + struct devx_event_subscription *event_sub = + container_of(rcu, struct devx_event_subscription, rcu); - /* free the pending events allocation */ - if (!ev_file->omit_data) { - spin_lock_irq(&ev_file->lock); - list_for_each_entry_safe(entry, tmp, - &ev_file->event_list, list) - kfree(entry); /* read can't come any more */ - spin_unlock_irq(&ev_file->lock); - } - - uverbs_close_fd(filp); - put_device(&dev->ib_dev.dev); - return 0; + if (event_sub->eventfd) + eventfd_ctx_put(event_sub->eventfd); + uverbs_uobject_put(&event_sub->ev_file->uobj); + kfree(event_sub); } static const struct file_operations devx_async_event_fops = { .owner = THIS_MODULE, .read = devx_async_event_read, .poll = devx_async_event_poll, - .release = devx_async_event_close, + .release = uverbs_uobject_fd_release, .llseek = no_llseek, }; -static int devx_hot_unplug_async_cmd_event_file(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct devx_async_cmd_event_file *comp_ev_file = container_of(uobj, struct devx_async_cmd_event_file, uobj); struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue; + struct devx_async_data *entry, *tmp; spin_lock_irq(&ev_queue->lock); ev_queue->is_destroyed = 1; spin_unlock_irq(&ev_queue->lock); - - if (why == RDMA_REMOVE_DRIVER_REMOVE) - wake_up_interruptible(&ev_queue->poll_wait); + wake_up_interruptible(&ev_queue->poll_wait); mlx5_cmd_cleanup_async_ctx(&comp_ev_file->async_ctx); + + spin_lock_irq(&comp_ev_file->ev_queue.lock); + list_for_each_entry_safe(entry, tmp, + &comp_ev_file->ev_queue.event_list, list) + kvfree(entry); + spin_unlock_irq(&comp_ev_file->ev_queue.lock); return 0; }; -static int devx_hot_unplug_async_event_file(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static int devx_async_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct devx_async_event_file *ev_file = container_of(uobj, struct devx_async_event_file, uobj); + struct devx_event_subscription *event_sub, *event_sub_tmp; + struct devx_async_event_data *entry, *tmp; + struct mlx5_ib_dev *dev = ev_file->dev; spin_lock_irq(&ev_file->lock); ev_file->is_destroyed = 1; spin_unlock_irq(&ev_file->lock); - wake_up_interruptible(&ev_file->poll_wait); + + mutex_lock(&dev->devx_event_table.event_xa_lock); + /* delete the subscriptions which are related to this FD */ + list_for_each_entry_safe(event_sub, event_sub_tmp, + &ev_file->subscribed_events_list, file_list) { + devx_cleanup_subscription(dev, event_sub); + list_del_rcu(&event_sub->file_list); + /* subscription may not be used by the read API any more */ + call_rcu(&event_sub->rcu, devx_free_subscription); + } + mutex_unlock(&dev->devx_event_table.event_xa_lock); + + /* free the pending events allocation */ + if (!ev_file->omit_data) { + spin_lock_irq(&ev_file->lock); + list_for_each_entry_safe(entry, tmp, + &ev_file->event_list, list) + kfree(entry); /* read can't come any more */ + spin_unlock_irq(&ev_file->lock); + } + + put_device(&dev->ib_dev.dev); return 0; }; @@ -2913,7 +2894,7 @@ DECLARE_UVERBS_NAMED_METHOD( DECLARE_UVERBS_NAMED_OBJECT( MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_cmd_event_file), - devx_hot_unplug_async_cmd_event_file, + devx_async_cmd_event_destroy_uobj, &devx_async_cmd_event_fops, "[devx_async_cmd]", O_RDONLY), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)); @@ -2931,7 +2912,7 @@ DECLARE_UVERBS_NAMED_METHOD( DECLARE_UVERBS_NAMED_OBJECT( MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD, UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_event_file), - devx_hot_unplug_async_event_file, + devx_async_event_destroy_uobj, &devx_async_event_fops, "[devx_async_event]", O_RDONLY), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC)); diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index ac4d8d1b9a07..1ae6fd95acaa 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -507,8 +507,7 @@ int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr, ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr); if (ret) { /* Undo the effect of adding the outstanding wr */ - gsi->outstanding_pi = (gsi->outstanding_pi - 1) % - gsi->cap.max_send_wr; + gsi->outstanding_pi--; goto err; } spin_unlock_irqrestore(&gsi->lock, flags); diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c index 4f0edd4832bd..b61165359954 100644 --- a/drivers/infiniband/hw/mlx5/ib_virt.c +++ b/drivers/infiniband/hw/mlx5/ib_virt.c @@ -164,8 +164,10 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid) in->field_select = MLX5_HCA_VPORT_SEL_NODE_GUID; in->node_guid = guid; err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in); - if (!err) + if (!err) { vfs_ctx[vf].node_guid = guid; + vfs_ctx[vf].node_guid_valid = 1; + } kfree(in); return err; } @@ -185,8 +187,10 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid) in->field_select = MLX5_HCA_VPORT_SEL_PORT_GUID; in->port_guid = guid; err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in); - if (!err) + if (!err) { vfs_ctx[vf].port_guid = guid; + vfs_ctx[vf].port_guid_valid = 1; + } kfree(in); return err; } @@ -208,20 +212,12 @@ int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u8 port, { struct mlx5_ib_dev *dev = to_mdev(device); struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_hca_vport_context *rep; - int err; - - rep = kzalloc(sizeof(*rep), GFP_KERNEL); - if (!rep) - return -ENOMEM; + struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx; - err = mlx5_query_hca_vport_context(mdev, 1, 1, vf+1, rep); - if (err) - goto ex; + node_guid->guid = + vfs_ctx[vf].node_guid_valid ? vfs_ctx[vf].node_guid : 0; + port_guid->guid = + vfs_ctx[vf].port_guid_valid ? vfs_ctx[vf].port_guid : 0; - port_guid->guid = rep->port_guid; - node_guid->guid = rep->node_guid; -ex: - kfree(rep); - return err; + return 0; } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ab4ec33409b8..e874d688d040 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2094,6 +2094,7 @@ static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry) { struct mlx5_user_mmap_entry *mentry = to_mmmap(entry); struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device); + struct mlx5_var_table *var_table = &dev->var_table; struct mlx5_ib_dm *mdm; switch (mentry->mmap_flag) { @@ -2103,6 +2104,12 @@ static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry) mdm->size); kfree(mdm); break; + case MLX5_IB_MMAP_TYPE_VAR: + mutex_lock(&var_table->bitmap_lock); + clear_bit(mentry->page_idx, var_table->bitmap); + mutex_unlock(&var_table->bitmap_lock); + kfree(mentry); + break; default: WARN_ON(true); } @@ -2262,7 +2269,10 @@ static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev, mentry = to_mmmap(entry); pfn = (mentry->address >> PAGE_SHIFT); - prot = pgprot_writecombine(vma->vm_page_prot); + if (mentry->mmap_flag == MLX5_IB_MMAP_TYPE_VAR) + prot = pgprot_noncached(vma->vm_page_prot); + else + prot = pgprot_writecombine(vma->vm_page_prot); ret = rdma_user_mmap_io(ucontext, vma, pfn, entry->npages * PAGE_SIZE, prot, @@ -2271,6 +2281,15 @@ static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev, return ret; } +static u64 mlx5_entry_to_mmap_offset(struct mlx5_user_mmap_entry *entry) +{ + u16 cmd = entry->rdma_entry.start_pgoff >> 16; + u16 index = entry->rdma_entry.start_pgoff & 0xFFFF; + + return (((index >> 8) << 16) | (cmd << MLX5_IB_MMAP_CMD_SHIFT) | + (index & 0xFF)) << PAGE_SHIFT; +} + static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) { struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); @@ -5368,6 +5387,14 @@ static const struct mlx5_ib_counter extended_err_cnts[] = { INIT_Q_COUNTER(req_cqe_flush_error), }; +static const struct mlx5_ib_counter roce_accl_cnts[] = { + INIT_Q_COUNTER(roce_adp_retrans), + INIT_Q_COUNTER(roce_adp_retrans_to), + INIT_Q_COUNTER(roce_slow_restart), + INIT_Q_COUNTER(roce_slow_restart_cnps), + INIT_Q_COUNTER(roce_slow_restart_trans), +}; + #define INIT_EXT_PPCNT_COUNTER(_name) \ { .name = #_name, .offset = \ MLX5_BYTE_OFF(ppcnt_reg, \ @@ -5416,6 +5443,9 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) num_counters += ARRAY_SIZE(extended_err_cnts); + if (MLX5_CAP_GEN(dev->mdev, roce_accl)) + num_counters += ARRAY_SIZE(roce_accl_cnts); + cnts->num_q_counters = num_counters; if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { @@ -5476,6 +5506,13 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, } } + if (MLX5_CAP_GEN(dev->mdev, roce_accl)) { + for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) { + names[j] = roce_accl_cnts[i].name; + offsets[j] = roce_accl_cnts[i].offset; + } + } + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { names[j] = cong_cnts[i].name; @@ -6051,6 +6088,145 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev) mlx5_nic_vport_disable_roce(dev->mdev); } +static int var_obj_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_user_mmap_entry *obj = uobject->object; + + rdma_user_mmap_entry_remove(&obj->rdma_entry); + return 0; +} + +static struct mlx5_user_mmap_entry * +alloc_var_entry(struct mlx5_ib_ucontext *c) +{ + struct mlx5_user_mmap_entry *entry; + struct mlx5_var_table *var_table; + u32 page_idx; + int err; + + var_table = &to_mdev(c->ibucontext.device)->var_table; + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return ERR_PTR(-ENOMEM); + + mutex_lock(&var_table->bitmap_lock); + page_idx = find_first_zero_bit(var_table->bitmap, + var_table->num_var_hw_entries); + if (page_idx >= var_table->num_var_hw_entries) { + err = -ENOSPC; + mutex_unlock(&var_table->bitmap_lock); + goto end; + } + + set_bit(page_idx, var_table->bitmap); + mutex_unlock(&var_table->bitmap_lock); + + entry->address = var_table->hw_start_addr + + (page_idx * var_table->stride_size); + entry->page_idx = page_idx; + entry->mmap_flag = MLX5_IB_MMAP_TYPE_VAR; + + err = rdma_user_mmap_entry_insert_range( + &c->ibucontext, &entry->rdma_entry, var_table->stride_size, + MLX5_IB_MMAP_OFFSET_START << 16, + (MLX5_IB_MMAP_OFFSET_END << 16) + (1UL << 16) - 1); + if (err) + goto err_insert; + + return entry; + +err_insert: + mutex_lock(&var_table->bitmap_lock); + clear_bit(page_idx, var_table->bitmap); + mutex_unlock(&var_table->bitmap_lock); +end: + kfree(entry); + return ERR_PTR(err); +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_VAR_OBJ_ALLOC)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE); + struct mlx5_ib_ucontext *c; + struct mlx5_user_mmap_entry *entry; + u64 mmap_offset; + u32 length; + int err; + + c = to_mucontext(ib_uverbs_get_ucontext(attrs)); + if (IS_ERR(c)) + return PTR_ERR(c); + + entry = alloc_var_entry(c); + if (IS_ERR(entry)) + return PTR_ERR(entry); + + mmap_offset = mlx5_entry_to_mmap_offset(entry); + length = entry->rdma_entry.npages * PAGE_SIZE; + uobj->object = entry; + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET, + &mmap_offset, sizeof(mmap_offset)); + if (err) + goto err; + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID, + &entry->page_idx, sizeof(entry->page_idx)); + if (err) + goto err; + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH, + &length, sizeof(length)); + if (err) + goto err; + + return 0; + +err: + rdma_user_mmap_entry_remove(&entry->rdma_entry); + return err; +} + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_VAR_OBJ_ALLOC, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE, + MLX5_IB_OBJECT_VAR, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_VAR_OBJ_DESTROY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_VAR_OBJ_DESTROY_HANDLE, + MLX5_IB_OBJECT_VAR, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_VAR, + UVERBS_TYPE_ALLOC_IDR(var_obj_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_ALLOC), + &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_DESTROY)); + +static bool var_is_supported(struct ib_device *device) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + + return (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q); +} + ADD_UVERBS_ATTRIBUTES_SIMPLE( mlx5_ib_dm, UVERBS_OBJECT_DM, @@ -6073,14 +6249,14 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE( enum mlx5_ib_uapi_flow_action_flags)); static const struct uapi_definition mlx5_ib_defs[] = { -#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) UAPI_DEF_CHAIN(mlx5_ib_devx_defs), UAPI_DEF_CHAIN(mlx5_ib_flow_defs), -#endif UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, &mlx5_ib_flow_action), UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR, + UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)), {} }; @@ -6352,6 +6528,35 @@ static const struct ib_device_ops mlx5_ib_dev_dm_ops = { .reg_dm_mr = mlx5_ib_reg_dm_mr, }; +static int mlx5_ib_init_var_table(struct mlx5_ib_dev *dev) +{ + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_var_table *var_table = &dev->var_table; + u8 log_doorbell_bar_size; + u8 log_doorbell_stride; + u64 bar_size; + + log_doorbell_bar_size = MLX5_CAP_DEV_VDPA_EMULATION(mdev, + log_doorbell_bar_size); + log_doorbell_stride = MLX5_CAP_DEV_VDPA_EMULATION(mdev, + log_doorbell_stride); + var_table->hw_start_addr = dev->mdev->bar_addr + + MLX5_CAP64_DEV_VDPA_EMULATION(mdev, + doorbell_bar_offset); + bar_size = (1ULL << log_doorbell_bar_size) * 4096; + var_table->stride_size = 1ULL << log_doorbell_stride; + var_table->num_var_hw_entries = bar_size / var_table->stride_size; + mutex_init(&var_table->bitmap_lock); + var_table->bitmap = bitmap_zalloc(var_table->num_var_hw_entries, + GFP_KERNEL); + return (var_table->bitmap) ? 0 : -ENOMEM; +} + +static void mlx5_ib_stage_caps_cleanup(struct mlx5_ib_dev *dev) +{ + bitmap_free(dev->var_table.bitmap); +} + static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; @@ -6439,6 +6644,13 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc))) mutex_init(&dev->lb.mutex); + if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) { + err = mlx5_ib_init_var_table(dev); + if (err) + return err; + } + dev->ib_dev.use_cq_dim = true; return 0; @@ -6742,6 +6954,8 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, int stage) { + dev->ib_active = false; + /* Number of stages to cleanup */ while (stage) { stage--; @@ -6787,7 +7001,7 @@ static const struct mlx5_ib_profile pf_profile = { mlx5_ib_stage_flow_db_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CAPS, mlx5_ib_stage_caps_init, - NULL), + mlx5_ib_stage_caps_cleanup), STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB, mlx5_ib_stage_non_default_cb, NULL), @@ -6844,7 +7058,7 @@ const struct mlx5_ib_profile raw_eth_profile = { mlx5_ib_stage_flow_db_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CAPS, mlx5_ib_stage_caps_init, - NULL), + mlx5_ib_stage_caps_cleanup), STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB, mlx5_ib_stage_raw_eth_non_default_cb, NULL), diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 048f4e974a61..b90a3649e7d1 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -101,18 +101,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, *count = i; } -static u64 umem_dma_to_mtt(dma_addr_t umem_dma) -{ - u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK; - - if (umem_dma & ODP_READ_ALLOWED_BIT) - mtt_entry |= MLX5_IB_MTT_READ; - if (umem_dma & ODP_WRITE_ALLOWED_BIT) - mtt_entry |= MLX5_IB_MTT_WRITE; - - return mtt_entry; -} - /* * Populate the given array with bus addresses from the umem. * @@ -139,19 +127,6 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, struct scatterlist *sg; int entry; - if (umem->is_odp) { - WARN_ON(shift != 0); - WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)); - - for (i = 0; i < num_pages; ++i) { - dma_addr_t pa = - to_ib_umem_odp(umem)->dma_list[offset + i]; - - pas[i] = cpu_to_be64(umem_dma_to_mtt(pa)); - } - return; - } - i = 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { len = sg_dma_len(sg) >> PAGE_SHIFT; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 77d495b2032d..d9bffcc93587 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -72,6 +72,11 @@ #define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size) enum { + MLX5_IB_MMAP_OFFSET_START = 9, + MLX5_IB_MMAP_OFFSET_END = 255, +}; + +enum { MLX5_IB_MMAP_CMD_SHIFT = 8, MLX5_IB_MMAP_CMD_MASK = 0xff, }; @@ -120,6 +125,7 @@ enum { enum mlx5_ib_mmap_type { MLX5_IB_MMAP_TYPE_MEMIC = 1, + MLX5_IB_MMAP_TYPE_VAR = 2, }; #define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) \ @@ -563,6 +569,7 @@ struct mlx5_user_mmap_entry { struct rdma_user_mmap_entry rdma_entry; u8 mmap_flag; u64 address; + u32 page_idx; }; struct mlx5_ib_dm { @@ -959,6 +966,15 @@ struct mlx5_devx_event_table { struct xarray event_xa; }; +struct mlx5_var_table { + /* serialize updating the bitmap */ + struct mutex bitmap_lock; + unsigned long *bitmap; + u64 hw_start_addr; + u32 stride_size; + u64 num_var_hw_entries; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; @@ -1013,6 +1029,7 @@ struct mlx5_ib_dev { struct mlx5_srq_table srq_table; struct mlx5_async_ctx async_ctx; struct mlx5_devx_event_table devx_event_table; + struct mlx5_var_table var_table; struct xarray sig_mrs; }; @@ -1276,8 +1293,8 @@ void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev); int __init mlx5_ib_odp_init(void); void mlx5_ib_odp_cleanup(void); void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent); -void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, - size_t nentries, struct mlx5_ib_mr *mr, int flags); +void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, + struct mlx5_ib_mr *mr, int flags); int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, @@ -1293,9 +1310,8 @@ static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {} static inline int mlx5_ib_odp_init(void) { return 0; } static inline void mlx5_ib_odp_cleanup(void) {} static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {} -static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, - size_t nentries, struct mlx5_ib_mr *mr, - int flags) {} +static inline void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, + struct mlx5_ib_mr *mr, int flags) {} static inline int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, @@ -1363,14 +1379,14 @@ int mlx5_ib_fill_res_entry(struct sk_buff *msg, int mlx5_ib_fill_stat_entry(struct sk_buff *msg, struct rdma_restrack_entry *res); +extern const struct uapi_definition mlx5_ib_devx_defs[]; +extern const struct uapi_definition mlx5_ib_flow_defs[]; + #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid); void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev); void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev); -const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void); -extern const struct uapi_definition mlx5_ib_devx_defs[]; -extern const struct uapi_definition mlx5_ib_flow_defs[]; struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, struct mlx5_flow_context *flow_context, @@ -1378,7 +1394,6 @@ struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( void *cmd_in, int inlen, int dest_id, int dest_type); bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id); -int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root); void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction); #else static inline int @@ -1507,7 +1522,7 @@ int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter); u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num); static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, - bool do_modify_atomic) + bool do_modify_atomic, int access_flags) { if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) return false; @@ -1517,6 +1532,9 @@ static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) return false; + if (access_flags & IB_ACCESS_RELAXED_ORDERING) + return false; + return true; } diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 44a0ee6bd9f1..6fa0a83c19de 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -147,7 +147,7 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) break; } mr->order = ent->order; - mr->allocated_from_cache = 1; + mr->allocated_from_cache = true; mr->dev = dev; MLX5_SET(mkc, mkc, free, 1); @@ -661,12 +661,21 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, struct ib_pd *pd) { + struct mlx5_ib_dev *dev = to_mdev(pd->device); + MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); MLX5_SET(mkc, mkc, lr, 1); + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) + MLX5_SET(mkc, mkc, relaxed_ordering_write, + !!(acc & IB_ACCESS_RELAXED_ORDERING)); + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) + MLX5_SET(mkc, mkc, relaxed_ordering_read, + !!(acc & IB_ACCESS_RELAXED_ORDERING)); + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET64(mkc, mkc, start_addr, start_addr); @@ -867,36 +876,6 @@ static struct mlx5_ib_mr *alloc_mr_from_cache( return mr; } -static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages, - void *xlt, int page_shift, size_t size, - int flags) -{ - struct mlx5_ib_dev *dev = mr->dev; - struct ib_umem *umem = mr->umem; - - if (flags & MLX5_IB_UPD_XLT_INDIRECT) { - if (!umr_can_use_indirect_mkey(dev)) - return -EPERM; - mlx5_odp_populate_klm(xlt, idx, npages, mr, flags); - return npages; - } - - npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx); - - if (!(flags & MLX5_IB_UPD_XLT_ZAP)) { - __mlx5_ib_populate_pas(dev, umem, page_shift, - idx, npages, xlt, - MLX5_IB_MTT_PRESENT); - /* Clear padding after the pages - * brought from the umem. - */ - memset(xlt + (npages * sizeof(struct mlx5_mtt)), 0, - size - npages * sizeof(struct mlx5_mtt)); - } - - return npages; -} - #define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \ MLX5_UMR_MTT_ALIGNMENT) #define MLX5_SPARE_UMR_CHUNK 0x10000 @@ -920,6 +899,7 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, size_t pages_mapped = 0; size_t pages_to_map = 0; size_t pages_iter = 0; + size_t size_to_map = 0; gfp_t gfp; bool use_emergency_page = false; @@ -966,6 +946,15 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, goto free_xlt; } + if (mr->umem->is_odp) { + if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + size_t max_pages = ib_umem_odp_num_pages(odp) - idx; + + pages_to_map = min_t(size_t, pages_to_map, max_pages); + } + } + sg.addr = dma; sg.lkey = dev->umrc.pd->local_dma_lkey; @@ -988,14 +977,22 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, pages_mapped < pages_to_map && !err; pages_mapped += pages_iter, idx += pages_iter) { npages = min_t(int, pages_iter, pages_to_map - pages_mapped); + size_to_map = npages * desc_size; dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); - npages = populate_xlt(mr, idx, npages, xlt, - page_shift, size, flags); - + if (mr->umem->is_odp) { + mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); + } else { + __mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx, + npages, xlt, + MLX5_IB_MTT_PRESENT); + /* Clear padding after the pages + * brought from the umem. + */ + memset(xlt + size_to_map, 0, size - size_to_map); + } dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); - sg.length = ALIGN(npages * desc_size, - MLX5_UMR_MTT_ALIGNMENT); + sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); if (pages_mapped + pages_iter >= pages_to_map) { if (flags & MLX5_IB_UPD_XLT_ENABLE) @@ -1074,6 +1071,12 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, free, !populate); MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) + MLX5_SET(mkc, mkc, relaxed_ordering_write, + !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) + MLX5_SET(mkc, mkc, relaxed_ordering_read, + !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); @@ -1264,7 +1267,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (err < 0) return ERR_PTR(err); - use_umr = mlx5_ib_can_use_umr(dev, true); + use_umr = mlx5_ib_can_use_umr(dev, true, access_flags); if (order <= mr_cache_max_order(dev) && use_umr) { mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, @@ -1431,7 +1434,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, goto err; } - if (!mlx5_ib_can_use_umr(dev, true) || + if (!mlx5_ib_can_use_umr(dev, true, access_flags) || (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len))) { /* * UMR can't be used - MKey needs to be replaced. @@ -1452,7 +1455,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, goto err; } - mr->allocated_from_cache = 0; + mr->allocated_from_cache = false; } else { /* * Send a UMR WQE diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 0afb0042bd53..4216814ba871 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -93,8 +93,8 @@ struct mlx5_pagefault { static u64 mlx5_imr_ksm_entries; -void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, - struct mlx5_ib_mr *imr, int flags) +static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, + struct mlx5_ib_mr *imr, int flags) { struct mlx5_klm *end = pklm + nentries; @@ -144,6 +144,44 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, } } +static u64 umem_dma_to_mtt(dma_addr_t umem_dma) +{ + u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK; + + if (umem_dma & ODP_READ_ALLOWED_BIT) + mtt_entry |= MLX5_IB_MTT_READ; + if (umem_dma & ODP_WRITE_ALLOWED_BIT) + mtt_entry |= MLX5_IB_MTT_WRITE; + + return mtt_entry; +} + +static void populate_mtt(__be64 *pas, size_t idx, size_t nentries, + struct mlx5_ib_mr *mr, int flags) +{ + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + dma_addr_t pa; + size_t i; + + if (flags & MLX5_IB_UPD_XLT_ZAP) + return; + + for (i = 0; i < nentries; i++) { + pa = odp->dma_list[idx + i]; + pas[i] = cpu_to_be64(umem_dma_to_mtt(pa)); + } +} + +void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, + struct mlx5_ib_mr *mr, int flags) +{ + if (flags & MLX5_IB_UPD_XLT_INDIRECT) { + populate_klm(xlt, idx, nentries, mr, flags); + } else { + populate_mtt(xlt, idx, nentries, mr, flags); + } +} + static void dma_fence_odp_mr(struct mlx5_ib_mr *mr) { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); @@ -342,7 +380,7 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) memset(caps, 0, sizeof(*caps)); if (!MLX5_CAP_GEN(dev->mdev, pg) || - !mlx5_ib_can_use_umr(dev, true)) + !mlx5_ib_can_use_umr(dev, true, 0)) return; caps->general_caps = IB_ODP_SUPPORT; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index ae7cbd9c9bca..a4f8e7030787 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1918,7 +1918,7 @@ static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev, { enum ib_qp_type qpt = init_attr->qp_type; int scqe_sz; - bool allow_scat_cqe = 0; + bool allow_scat_cqe = false; if (qpt == IB_QPT_UC || qpt == IB_QPT_UD) return; @@ -4870,7 +4870,7 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC; u8 flags = 0; - if (!mlx5_ib_can_use_umr(dev, atomic)) { + if (!mlx5_ib_can_use_umr(dev, atomic, wr->access)) { mlx5_ib_warn(to_mdev(qp->ibqp.device), "Fast update of %s for MR is disabled\n", (MLX5_CAP_GEN(dev->mdev, diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 920f35e28cfc..484b555150e0 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -312,7 +312,18 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) } ctx->db_mmap_entry = &entry->rdma_entry; - uresp.dpm_enabled = dev->user_dpm_enabled; + if (!dev->user_dpm_enabled) + uresp.dpm_flags = 0; + else if (rdma_protocol_iwarp(&dev->ibdev, 1)) + uresp.dpm_flags = QEDR_DPM_TYPE_IWARP_LEGACY; + else + uresp.dpm_flags = QEDR_DPM_TYPE_ROCE_ENHANCED | + QEDR_DPM_TYPE_ROCE_LEGACY; + + uresp.dpm_flags |= QEDR_DPM_SIZES_SET; + uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE; + uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE; + uresp.wids_enabled = 1; uresp.wid_count = oparams.wid_count; uresp.db_pa = rdma_user_mmap_get_offset(ctx->db_mmap_entry); |