summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h3
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.c11
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.h3
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c4
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c125
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c134
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c88
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h4
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c29
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c77
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.h10
-rw-r--r--drivers/infiniband/hw/bnxt_re/roce_hsi.h5
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c55
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c50
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c10
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile42
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c4
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c486
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h71
-rw-r--r--drivers/infiniband/hw/hfi1/chip_registers.h4
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c4
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h48
-rw-r--r--drivers/infiniband/hw/hfi1/init.c113
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.c94
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.h192
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c4
-rw-r--r--drivers/infiniband/hw/hfi1/msix.c363
-rw-r--r--drivers/infiniband/hw/hfi1/msix.h64
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c74
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c8
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c100
-rw-r--r--drivers/infiniband/hw/hfi1/qp.h31
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c24
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c382
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c56
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h21
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c69
-rw-r--r--drivers/infiniband/hw/hfi1/trace.h3
-rw-r--r--drivers/infiniband/hw/hfi1/trace_iowait.h54
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c14
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c22
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c137
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.h20
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c251
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h35
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.h11
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_main.c12
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_sdma.c21
-rw-r--r--drivers/infiniband/hw/hns/Kconfig1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c6
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h45
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c629
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h96
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c123
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c212
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c41
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c73
-rw-r--r--drivers/infiniband/hw/mlx4/Kconfig1
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c20
-rw-r--r--drivers/infiniband/hw/mlx4/main.c182
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c2
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h5
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c8
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c6
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c129
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h14
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c3
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c358
-rw-r--r--drivers/infiniband/hw/mlx5/flow.c393
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.c3
-rw-r--r--drivers/infiniband/hw/mlx5/main.c510
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c9
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h98
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c14
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c123
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c491
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c5
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c44
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c4
-rw-r--r--drivers/infiniband/hw/nes/nes.c3
-rw-r--r--drivers/infiniband/hw/nes/nes.h9
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_nic.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c63
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c74
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c3
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c3
-rw-r--r--drivers/infiniband/hw/qedr/main.c73
-rw-r--r--drivers/infiniband/hw/qedr/qedr.h2
-rw-r--r--drivers/infiniband/hw/qedr/qedr_roce_cm.c4
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c5
-rw-r--r--drivers/infiniband/hw/qib/qib.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c17
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c18
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c342
-rw-r--r--drivers/infiniband/hw/qib/qib_sdma.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_sysfs.c101
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c12
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c17
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c47
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h15
-rw-r--r--drivers/infiniband/hw/usnic/usnic_debugfs.c3
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c39
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.c74
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.h2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c16
-rw-r--r--drivers/infiniband/hw/usnic/usnic_transport.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c91
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.h3
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c46
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c2
118 files changed, 4489 insertions, 3715 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index 96f76896488d..31baa8939a4f 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -40,7 +40,6 @@
#ifndef __BNXT_RE_H__
#define __BNXT_RE_H__
#define ROCE_DRV_MODULE_NAME "bnxt_re"
-#define ROCE_DRV_MODULE_VERSION "1.0.0"
#define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver"
#define BNXT_RE_PAGE_SHIFT_4K (12)
@@ -120,6 +119,8 @@ struct bnxt_re_dev {
#define BNXT_RE_FLAG_HAVE_L2_REF 3
#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4
#define BNXT_RE_FLAG_QOS_WORK_REG 5
+#define BNXT_RE_FLAG_RESOURCES_ALLOCATED 7
+#define BNXT_RE_FLAG_RESOURCES_INITIALIZED 8
#define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29
struct net_device *netdev;
unsigned int version, major, minor;
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c
index 77416bc61e6e..604b71875f5f 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c
@@ -68,6 +68,8 @@ static const char * const bnxt_re_stat_name[] = {
[BNXT_RE_TX_PKTS] = "tx_pkts",
[BNXT_RE_TX_BYTES] = "tx_bytes",
[BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors",
+ [BNXT_RE_RX_DROPS] = "rx_roce_drops",
+ [BNXT_RE_RX_DISCARDS] = "rx_roce_discards",
[BNXT_RE_TO_RETRANSMITS] = "to_retransmits",
[BNXT_RE_SEQ_ERR_NAKS_RCVD] = "seq_err_naks_rcvd",
[BNXT_RE_MAX_RETRY_EXCEEDED] = "max_retry_exceeded",
@@ -106,7 +108,8 @@ static const char * const bnxt_re_stat_name[] = {
[BNXT_RE_RES_CQ_LOAD_ERR] = "res_cq_load_err",
[BNXT_RE_RES_SRQ_LOAD_ERR] = "res_srq_load_err",
[BNXT_RE_RES_TX_PCI_ERR] = "res_tx_pci_err",
- [BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err"
+ [BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err",
+ [BNXT_RE_OUT_OF_SEQ_ERR] = "oos_drop_count"
};
int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
@@ -128,6 +131,10 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
if (bnxt_re_stats) {
stats->value[BNXT_RE_RECOVERABLE_ERRORS] =
le64_to_cpu(bnxt_re_stats->tx_bcast_pkts);
+ stats->value[BNXT_RE_RX_DROPS] =
+ le64_to_cpu(bnxt_re_stats->rx_drop_pkts);
+ stats->value[BNXT_RE_RX_DISCARDS] =
+ le64_to_cpu(bnxt_re_stats->rx_discard_pkts);
stats->value[BNXT_RE_RX_PKTS] =
le64_to_cpu(bnxt_re_stats->rx_ucast_pkts);
stats->value[BNXT_RE_RX_BYTES] =
@@ -220,6 +227,8 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
rdev->stats.res_tx_pci_err;
stats->value[BNXT_RE_RES_RX_PCI_ERR] =
rdev->stats.res_rx_pci_err;
+ stats->value[BNXT_RE_OUT_OF_SEQ_ERR] =
+ rdev->stats.res_oos_drop_count;
}
return ARRAY_SIZE(bnxt_re_stat_name);
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h
index a01a922717d5..76399f477e5c 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.h
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h
@@ -51,6 +51,8 @@ enum bnxt_re_hw_stats {
BNXT_RE_TX_PKTS,
BNXT_RE_TX_BYTES,
BNXT_RE_RECOVERABLE_ERRORS,
+ BNXT_RE_RX_DROPS,
+ BNXT_RE_RX_DISCARDS,
BNXT_RE_TO_RETRANSMITS,
BNXT_RE_SEQ_ERR_NAKS_RCVD,
BNXT_RE_MAX_RETRY_EXCEEDED,
@@ -90,6 +92,7 @@ enum bnxt_re_hw_stats {
BNXT_RE_RES_SRQ_LOAD_ERR,
BNXT_RE_RES_TX_PCI_ERR,
BNXT_RE_RES_RX_PCI_ERR,
+ BNXT_RE_OUT_OF_SEQ_ERR,
BNXT_RE_NUM_COUNTERS
};
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index bc2b9e038439..54fdd4cf5288 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -1598,8 +1598,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state);
new_qp_state = qp_attr->qp_state;
if (!ib_modify_qp_is_ok(curr_qp_state, new_qp_state,
- ib_qp->qp_type, qp_attr_mask,
- IB_LINK_LAYER_ETHERNET)) {
+ ib_qp->qp_type, qp_attr_mask)) {
dev_err(rdev_to_dev(rdev),
"Invalid attribute mask: %#x specified ",
qp_attr_mask);
@@ -2664,6 +2663,7 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
nq->budget++;
atomic_inc(&rdev->cq_count);
+ spin_lock_init(&cq->cq_lock);
if (context) {
struct bnxt_re_cq_resp resp;
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 85cd1a3593d6..cf2282654210 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -67,7 +67,7 @@
#include "hw_counters.h"
static char version[] =
- BNXT_RE_DESC " v" ROCE_DRV_MODULE_VERSION "\n";
+ BNXT_RE_DESC "\n";
MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>");
MODULE_DESCRIPTION(BNXT_RE_DESC " Driver");
@@ -535,6 +535,34 @@ static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
return en_dev;
}
+static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
+}
+static DEVICE_ATTR_RO(hw_rev);
+
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
+}
+static DEVICE_ATTR_RO(hca_type);
+
+static struct attribute *bnxt_re_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ NULL
+};
+
+static const struct attribute_group bnxt_re_dev_attr_group = {
+ .attrs = bnxt_re_attributes,
+};
+
static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev)
{
ib_unregister_device(&rdev->ibdev);
@@ -547,7 +575,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
/* ib device init */
ibdev->owner = THIS_MODULE;
ibdev->node_type = RDMA_NODE_IB_CA;
- strlcpy(ibdev->name, "bnxt_re%d", IB_DEVICE_NAME_MAX);
strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA",
strlen(BNXT_RE_DESC) + 5);
ibdev->phys_port_cnt = 1;
@@ -639,34 +666,11 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
ibdev->get_hw_stats = bnxt_re_ib_get_hw_stats;
ibdev->alloc_hw_stats = bnxt_re_ib_alloc_hw_stats;
+ rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group);
ibdev->driver_id = RDMA_DRIVER_BNXT_RE;
- return ib_register_device(ibdev, NULL);
-}
-
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
-{
- struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
-
- return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
-}
-
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
- char *buf)
-{
- struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
-
- return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
+ return ib_register_device(ibdev, "bnxt_re%d", NULL);
}
-static DEVICE_ATTR(hw_rev, 0444, show_rev, NULL);
-static DEVICE_ATTR(hca_type, 0444, show_hca, NULL);
-
-static struct device_attribute *bnxt_re_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type
-};
-
static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
{
dev_put(rdev->netdev);
@@ -864,10 +868,8 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
{
int i;
- if (rdev->nq[0].hwq.max_elements) {
- for (i = 1; i < rdev->num_msix; i++)
- bnxt_qplib_disable_nq(&rdev->nq[i - 1]);
- }
+ for (i = 1; i < rdev->num_msix; i++)
+ bnxt_qplib_disable_nq(&rdev->nq[i - 1]);
if (rdev->qplib_res.rcfw)
bnxt_qplib_cleanup_res(&rdev->qplib_res);
@@ -876,6 +878,7 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
{
int rc = 0, i;
+ int num_vec_enabled = 0;
bnxt_qplib_init_res(&rdev->qplib_res);
@@ -891,9 +894,13 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
"Failed to enable NQ with rc = 0x%x", rc);
goto fail;
}
+ num_vec_enabled++;
}
return 0;
fail:
+ for (i = num_vec_enabled; i >= 0; i--)
+ bnxt_qplib_disable_nq(&rdev->nq[i]);
+
return rc;
}
@@ -925,6 +932,7 @@ static void bnxt_re_free_res(struct bnxt_re_dev *rdev)
static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
{
int rc = 0, i;
+ int num_vec_created = 0;
/* Configure and allocate resources for qplib */
rdev->qplib_res.rcfw = &rdev->rcfw;
@@ -951,7 +959,7 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
if (rc) {
dev_err(rdev_to_dev(rdev), "Alloc Failed NQ%d rc:%#x",
i, rc);
- goto dealloc_dpi;
+ goto free_nq;
}
rc = bnxt_re_net_ring_alloc
(rdev, rdev->nq[i].hwq.pbl[PBL_LVL_0].pg_map_arr,
@@ -964,14 +972,17 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
dev_err(rdev_to_dev(rdev),
"Failed to allocate NQ fw id with rc = 0x%x",
rc);
+ bnxt_qplib_free_nq(&rdev->nq[i]);
goto free_nq;
}
+ num_vec_created++;
}
return 0;
free_nq:
- for (i = 0; i < rdev->num_msix - 1; i++)
+ for (i = num_vec_created; i >= 0; i--) {
+ bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id);
bnxt_qplib_free_nq(&rdev->nq[i]);
-dealloc_dpi:
+ }
bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
&rdev->qplib_res.dpi_tbl,
&rdev->dpi_privileged);
@@ -989,12 +1000,17 @@ static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
struct ib_event ib_event;
ib_event.device = ibdev;
- if (qp)
+ if (qp) {
ib_event.element.qp = qp;
- else
+ ib_event.event = event;
+ if (qp->event_handler)
+ qp->event_handler(&ib_event, qp->qp_context);
+
+ } else {
ib_event.element.port_num = port_num;
- ib_event.event = event;
- ib_dispatch_event(&ib_event);
+ ib_event.event = event;
+ ib_dispatch_event(&ib_event);
+ }
}
#define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN 0x02
@@ -1189,20 +1205,20 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
{
- int i, rc;
+ int rc;
if (test_and_clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) {
- for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++)
- device_remove_file(&rdev->ibdev.dev,
- bnxt_re_attributes[i]);
/* Cleanup ib dev */
bnxt_re_unregister_ib(rdev);
}
if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
- cancel_delayed_work(&rdev->worker);
+ cancel_delayed_work_sync(&rdev->worker);
- bnxt_re_cleanup_res(rdev);
- bnxt_re_free_res(rdev);
+ if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED,
+ &rdev->flags))
+ bnxt_re_cleanup_res(rdev);
+ if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags))
+ bnxt_re_free_res(rdev);
if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw);
@@ -1241,7 +1257,7 @@ static void bnxt_re_worker(struct work_struct *work)
static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
{
- int i, j, rc;
+ int rc;
bool locked;
@@ -1331,12 +1347,15 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
pr_err("Failed to allocate resources: %#x\n", rc);
goto fail;
}
+ set_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags);
rc = bnxt_re_init_res(rdev);
if (rc) {
pr_err("Failed to initialize resources: %#x\n", rc);
goto fail;
}
+ set_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, &rdev->flags);
+
if (!rdev->is_virtfn) {
rc = bnxt_re_setup_qos(rdev);
if (rc)
@@ -1358,20 +1377,6 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
}
set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
dev_info(rdev_to_dev(rdev), "Device registered successfully");
- for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) {
- rc = device_create_file(&rdev->ibdev.dev,
- bnxt_re_attributes[i]);
- if (rc) {
- dev_err(rdev_to_dev(rdev),
- "Failed to create IB sysfs: %#x", rc);
- /* Must clean up all created device files */
- for (j = 0; j < i; j++)
- device_remove_file(&rdev->ibdev.dev,
- bnxt_re_attributes[j]);
- bnxt_re_unregister_ib(rdev);
- goto fail;
- }
- }
ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
&rdev->active_width);
set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 6ad0d46ab879..b98b054148cd 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -36,6 +36,8 @@
* Description: Fast Path Operators
*/
+#define dev_fmt(fmt) "QPLIB: " fmt
+
#include <linux/interrupt.h>
#include <linux/spinlock.h>
#include <linux/sched.h>
@@ -71,8 +73,7 @@ static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
if (!qp->sq.flushed) {
dev_dbg(&scq->hwq.pdev->dev,
- "QPLIB: FP: Adding to SQ Flush list = %p",
- qp);
+ "FP: Adding to SQ Flush list = %p\n", qp);
bnxt_qplib_cancel_phantom_processing(qp);
list_add_tail(&qp->sq_flush, &scq->sqf_head);
qp->sq.flushed = true;
@@ -80,8 +81,7 @@ static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
if (!qp->srq) {
if (!qp->rq.flushed) {
dev_dbg(&rcq->hwq.pdev->dev,
- "QPLIB: FP: Adding to RQ Flush list = %p",
- qp);
+ "FP: Adding to RQ Flush list = %p\n", qp);
list_add_tail(&qp->rq_flush, &rcq->rqf_head);
qp->rq.flushed = true;
}
@@ -207,7 +207,7 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
if (!qp->sq_hdr_buf) {
rc = -ENOMEM;
dev_err(&res->pdev->dev,
- "QPLIB: Failed to create sq_hdr_buf");
+ "Failed to create sq_hdr_buf\n");
goto fail;
}
}
@@ -221,7 +221,7 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
if (!qp->rq_hdr_buf) {
rc = -ENOMEM;
dev_err(&res->pdev->dev,
- "QPLIB: Failed to create rq_hdr_buf");
+ "Failed to create rq_hdr_buf\n");
goto fail;
}
}
@@ -277,8 +277,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
num_cqne_processed++;
else
dev_warn(&nq->pdev->dev,
- "QPLIB: cqn - type 0x%x not handled",
- type);
+ "cqn - type 0x%x not handled\n", type);
spin_unlock_bh(&cq->compl_lock);
break;
}
@@ -298,7 +297,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
num_srqne_processed++;
else
dev_warn(&nq->pdev->dev,
- "QPLIB: SRQ event 0x%x not handled",
+ "SRQ event 0x%x not handled\n",
nqsrqe->event);
break;
}
@@ -306,8 +305,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
break;
default:
dev_warn(&nq->pdev->dev,
- "QPLIB: nqe with type = 0x%x not handled",
- type);
+ "nqe with type = 0x%x not handled\n", type);
break;
}
raw_cons++;
@@ -360,7 +358,8 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
}
/* Make sure the HW is stopped! */
- bnxt_qplib_nq_stop_irq(nq, true);
+ if (nq->requested)
+ bnxt_qplib_nq_stop_irq(nq, true);
if (nq->bar_reg_iomem)
iounmap(nq->bar_reg_iomem);
@@ -396,7 +395,7 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
rc = irq_set_affinity_hint(nq->vector, &nq->mask);
if (rc) {
dev_warn(&nq->pdev->dev,
- "QPLIB: set affinity failed; vector: %d nq_idx: %d\n",
+ "set affinity failed; vector: %d nq_idx: %d\n",
nq->vector, nq_indx);
}
nq->requested = true;
@@ -443,7 +442,7 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
rc = bnxt_qplib_nq_start_irq(nq, nq_idx, msix_vector, true);
if (rc) {
dev_err(&nq->pdev->dev,
- "QPLIB: Failed to request irq for nq-idx %d", nq_idx);
+ "Failed to request irq for nq-idx %d\n", nq_idx);
goto fail;
}
@@ -662,8 +661,8 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
spin_lock(&srq_hwq->lock);
if (srq->start_idx == srq->last_idx) {
- dev_err(&srq_hwq->pdev->dev, "QPLIB: FP: SRQ (0x%x) is full!",
- srq->id);
+ dev_err(&srq_hwq->pdev->dev,
+ "FP: SRQ (0x%x) is full!\n", srq->id);
rc = -EINVAL;
spin_unlock(&srq_hwq->lock);
goto done;
@@ -1324,7 +1323,7 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
}
}
if (i == res->sgid_tbl.max)
- dev_warn(&res->pdev->dev, "QPLIB: SGID not found??");
+ dev_warn(&res->pdev->dev, "SGID not found??\n");
qp->ah.hop_limit = sb->hop_limit;
qp->ah.traffic_class = sb->traffic_class;
@@ -1536,7 +1535,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
if (bnxt_qplib_queue_full(sq)) {
dev_err(&sq->hwq.pdev->dev,
- "QPLIB: prod = %#x cons = %#x qdepth = %#x delta = %#x",
+ "prod = %#x cons = %#x qdepth = %#x delta = %#x\n",
sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements,
sq->q_full_delta);
rc = -ENOMEM;
@@ -1561,7 +1560,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
/* Copy the inline data */
if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) {
dev_warn(&sq->hwq.pdev->dev,
- "QPLIB: Inline data length > 96 detected");
+ "Inline data length > 96 detected\n");
data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH;
} else {
data_len = wqe->inline_len;
@@ -1776,7 +1775,7 @@ done:
queue_work(qp->scq->nq->cqn_wq, &nq_work->work);
} else {
dev_err(&sq->hwq.pdev->dev,
- "QPLIB: FP: Failed to allocate SQ nq_work!");
+ "FP: Failed to allocate SQ nq_work!\n");
rc = -ENOMEM;
}
}
@@ -1815,13 +1814,12 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
sch_handler = true;
dev_dbg(&rq->hwq.pdev->dev,
- "%s Error QP. Scheduling for poll_cq\n",
- __func__);
+ "%s: Error QP. Scheduling for poll_cq\n", __func__);
goto queue_err;
}
if (bnxt_qplib_queue_full(rq)) {
dev_err(&rq->hwq.pdev->dev,
- "QPLIB: FP: QP (0x%x) RQ is full!", qp->id);
+ "FP: QP (0x%x) RQ is full!\n", qp->id);
rc = -EINVAL;
goto done;
}
@@ -1870,7 +1868,7 @@ queue_err:
queue_work(qp->rcq->nq->cqn_wq, &nq_work->work);
} else {
dev_err(&rq->hwq.pdev->dev,
- "QPLIB: FP: Failed to allocate RQ nq_work!");
+ "FP: Failed to allocate RQ nq_work!\n");
rc = -ENOMEM;
}
}
@@ -1932,7 +1930,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
if (!cq->dpi) {
dev_err(&rcfw->pdev->dev,
- "QPLIB: FP: CREATE_CQ failed due to NULL DPI");
+ "FP: CREATE_CQ failed due to NULL DPI\n");
return -EINVAL;
}
req.dpi = cpu_to_le32(cq->dpi->dpi);
@@ -1969,6 +1967,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
INIT_LIST_HEAD(&cq->sqf_head);
INIT_LIST_HEAD(&cq->rqf_head);
spin_lock_init(&cq->compl_lock);
+ spin_lock_init(&cq->flush_lock);
bnxt_qplib_arm_cq_enable(cq);
return 0;
@@ -2172,7 +2171,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
* comes back
*/
dev_dbg(&cq->hwq.pdev->dev,
- "FP:Got Phantom CQE");
+ "FP: Got Phantom CQE\n");
sq->condition = false;
sq->single = true;
rc = 0;
@@ -2189,7 +2188,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
peek_raw_cq_cons++;
}
dev_err(&cq->hwq.pdev->dev,
- "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x",
+ "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n",
cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
rc = -EINVAL;
}
@@ -2213,7 +2212,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
le64_to_cpu(hwcqe->qp_handle));
if (!qp) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: Process Req qp is NULL");
+ "FP: Process Req qp is NULL\n");
return -EINVAL;
}
sq = &qp->sq;
@@ -2221,16 +2220,14 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq);
if (cqe_sq_cons > sq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Process req reported ");
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x",
+ "FP: CQ Process req reported sq_cons_idx 0x%x which exceeded max 0x%x\n",
cqe_sq_cons, sq->hwq.max_elements);
return -EINVAL;
}
if (qp->sq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
- "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
+ "%s: QP in Flush QP = %p\n", __func__, qp);
goto done;
}
/* Require to walk the sq's swq to fabricate CQEs for all previously
@@ -2262,9 +2259,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
hwcqe->status != CQ_REQ_STATUS_OK) {
cqe->status = hwcqe->status;
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Processed Req ");
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: wr_id[%d] = 0x%llx with status 0x%x",
+ "FP: CQ Processed Req wr_id[%d] = 0x%llx with status 0x%x\n",
sw_sq_cons, cqe->wr_id, cqe->status);
cqe++;
(*budget)--;
@@ -2330,12 +2325,12 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle));
if (!qp) {
- dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq RC qp is NULL");
+ dev_err(&cq->hwq.pdev->dev, "process_cq RC qp is NULL\n");
return -EINVAL;
}
if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
- "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
+ "%s: QP in Flush QP = %p\n", __func__, qp);
goto done;
}
@@ -2356,9 +2351,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
return -EINVAL;
if (wr_id_idx >= srq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Process RC ");
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
+ "FP: CQ Process RC wr_id idx 0x%x exceeded SRQ max 0x%x\n",
wr_id_idx, srq->hwq.max_elements);
return -EINVAL;
}
@@ -2371,9 +2364,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
rq = &qp->rq;
if (wr_id_idx >= rq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Process RC ");
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x",
+ "FP: CQ Process RC wr_id idx 0x%x exceeded RQ max 0x%x\n",
wr_id_idx, rq->hwq.max_elements);
return -EINVAL;
}
@@ -2409,12 +2400,12 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle));
if (!qp) {
- dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq UD qp is NULL");
+ dev_err(&cq->hwq.pdev->dev, "process_cq UD qp is NULL\n");
return -EINVAL;
}
if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
- "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
+ "%s: QP in Flush QP = %p\n", __func__, qp);
goto done;
}
cqe = *pcqe;
@@ -2439,9 +2430,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
if (wr_id_idx >= srq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Process UD ");
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
+ "FP: CQ Process UD wr_id idx 0x%x exceeded SRQ max 0x%x\n",
wr_id_idx, srq->hwq.max_elements);
return -EINVAL;
}
@@ -2454,9 +2443,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
rq = &qp->rq;
if (wr_id_idx >= rq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Process UD ");
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x",
+ "FP: CQ Process UD wr_id idx 0x%x exceeded RQ max 0x%x\n",
wr_id_idx, rq->hwq.max_elements);
return -EINVAL;
}
@@ -2508,13 +2495,12 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle));
if (!qp) {
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: process_cq Raw/QP1 qp is NULL");
+ dev_err(&cq->hwq.pdev->dev, "process_cq Raw/QP1 qp is NULL\n");
return -EINVAL;
}
if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
- "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
+ "%s: QP in Flush QP = %p\n", __func__, qp);
goto done;
}
cqe = *pcqe;
@@ -2543,14 +2529,12 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
srq = qp->srq;
if (!srq) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: SRQ used but not defined??");
+ "FP: SRQ used but not defined??\n");
return -EINVAL;
}
if (wr_id_idx >= srq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Process Raw/QP1 ");
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
+ "FP: CQ Process Raw/QP1 wr_id idx 0x%x exceeded SRQ max 0x%x\n",
wr_id_idx, srq->hwq.max_elements);
return -EINVAL;
}
@@ -2563,9 +2547,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
rq = &qp->rq;
if (wr_id_idx >= rq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Process Raw/QP1 RQ wr_id ");
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: ix 0x%x exceeded RQ max 0x%x",
+ "FP: CQ Process Raw/QP1 RQ wr_id idx 0x%x exceeded RQ max 0x%x\n",
wr_id_idx, rq->hwq.max_elements);
return -EINVAL;
}
@@ -2600,14 +2582,14 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
/* Check the Status */
if (hwcqe->status != CQ_TERMINAL_STATUS_OK)
dev_warn(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Process Terminal Error status = 0x%x",
+ "FP: CQ Process Terminal Error status = 0x%x\n",
hwcqe->status);
qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle));
if (!qp) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Process terminal qp is NULL");
+ "FP: CQ Process terminal qp is NULL\n");
return -EINVAL;
}
@@ -2623,16 +2605,14 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
if (cqe_cons > sq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Process terminal reported ");
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x",
+ "FP: CQ Process terminal reported sq_cons_idx 0x%x which exceeded max 0x%x\n",
cqe_cons, sq->hwq.max_elements);
goto do_rq;
}
if (qp->sq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
- "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
+ "%s: QP in Flush QP = %p\n", __func__, qp);
goto sq_done;
}
@@ -2673,16 +2653,14 @@ do_rq:
goto done;
} else if (cqe_cons > rq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Processed terminal ");
- dev_err(&cq->hwq.pdev->dev,
- "QPLIB: reported rq_cons_idx 0x%x exceeds max 0x%x",
+ "FP: CQ Processed terminal reported rq_cons_idx 0x%x exceeds max 0x%x\n",
cqe_cons, rq->hwq.max_elements);
goto done;
}
if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
- "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp);
+ "%s: QP in Flush QP = %p\n", __func__, qp);
rc = 0;
goto done;
}
@@ -2704,7 +2682,7 @@ static int bnxt_qplib_cq_process_cutoff(struct bnxt_qplib_cq *cq,
/* Check the Status */
if (hwcqe->status != CQ_CUTOFF_STATUS_OK) {
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: FP: CQ Process Cutoff Error status = 0x%x",
+ "FP: CQ Process Cutoff Error status = 0x%x\n",
hwcqe->status);
return -EINVAL;
}
@@ -2724,16 +2702,12 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
spin_lock_irqsave(&cq->flush_lock, flags);
list_for_each_entry(qp, &cq->sqf_head, sq_flush) {
- dev_dbg(&cq->hwq.pdev->dev,
- "QPLIB: FP: Flushing SQ QP= %p",
- qp);
+ dev_dbg(&cq->hwq.pdev->dev, "FP: Flushing SQ QP= %p\n", qp);
__flush_sq(&qp->sq, qp, &cqe, &budget);
}
list_for_each_entry(qp, &cq->rqf_head, rq_flush) {
- dev_dbg(&cq->hwq.pdev->dev,
- "QPLIB: FP: Flushing RQ QP= %p",
- qp);
+ dev_dbg(&cq->hwq.pdev->dev, "FP: Flushing RQ QP= %p\n", qp);
__flush_rq(&qp->rq, qp, &cqe, &budget);
}
spin_unlock_irqrestore(&cq->flush_lock, flags);
@@ -2801,7 +2775,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
goto exit;
default:
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: process_cq unknown type 0x%lx",
+ "process_cq unknown type 0x%lx\n",
hw_cqe->cqe_type_toggle &
CQ_BASE_CQE_TYPE_MASK);
rc = -EINVAL;
@@ -2814,7 +2788,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
* next one
*/
dev_err(&cq->hwq.pdev->dev,
- "QPLIB: process_cqe error rc = 0x%x", rc);
+ "process_cqe error rc = 0x%x\n", rc);
}
raw_cons++;
}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 2852d350ada1..be4e33e9f962 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -35,6 +35,9 @@
*
* Description: RDMA Controller HW interface
*/
+
+#define dev_fmt(fmt) "QPLIB: " fmt
+
#include <linux/interrupt.h>
#include <linux/spinlock.h>
#include <linux/pci.h>
@@ -96,14 +99,13 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW &&
opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) {
dev_err(&rcfw->pdev->dev,
- "QPLIB: RCFW not initialized, reject opcode 0x%x",
- opcode);
+ "RCFW not initialized, reject opcode 0x%x\n", opcode);
return -EINVAL;
}
if (test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
- dev_err(&rcfw->pdev->dev, "QPLIB: RCFW already initialized!");
+ dev_err(&rcfw->pdev->dev, "RCFW already initialized!\n");
return -EINVAL;
}
@@ -115,7 +117,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
*/
spin_lock_irqsave(&cmdq->lock, flags);
if (req->cmd_size >= HWQ_FREE_SLOTS(cmdq)) {
- dev_err(&rcfw->pdev->dev, "QPLIB: RCFW: CMDQ is full!");
+ dev_err(&rcfw->pdev->dev, "RCFW: CMDQ is full!\n");
spin_unlock_irqrestore(&cmdq->lock, flags);
return -EAGAIN;
}
@@ -154,7 +156,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod)][get_cmdq_idx(sw_prod)];
if (!cmdqe) {
dev_err(&rcfw->pdev->dev,
- "QPLIB: RCFW request failed with no cmdqe!");
+ "RCFW request failed with no cmdqe!\n");
goto done;
}
/* Copy a segment of the req cmd to the cmdq */
@@ -210,7 +212,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
if (!retry_cnt || (rc != -EAGAIN && rc != -EBUSY)) {
/* send failed */
- dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x send failed",
+ dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x send failed\n",
cookie, opcode);
return rc;
}
@@ -224,7 +226,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
rc = __wait_for_resp(rcfw, cookie);
if (rc) {
/* timed out */
- dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x timedout (%d)msec",
+ dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x timedout (%d)msec\n",
cookie, opcode, RCFW_CMD_WAIT_TIME_MS);
set_bit(FIRMWARE_TIMED_OUT, &rcfw->flags);
return rc;
@@ -232,7 +234,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
if (evnt->status) {
/* failed with status */
- dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x status %#x",
+ dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x status %#x\n",
cookie, opcode, evnt->status);
rc = -EFAULT;
}
@@ -298,9 +300,9 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
qp_id = le32_to_cpu(err_event->xid);
qp = rcfw->qp_tbl[qp_id].qp_handle;
dev_dbg(&rcfw->pdev->dev,
- "QPLIB: Received QP error notification");
+ "Received QP error notification\n");
dev_dbg(&rcfw->pdev->dev,
- "QPLIB: qpid 0x%x, req_err=0x%x, resp_err=0x%x\n",
+ "qpid 0x%x, req_err=0x%x, resp_err=0x%x\n",
qp_id, err_event->req_err_state_reason,
err_event->res_err_state_reason);
if (!qp)
@@ -309,8 +311,17 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
rcfw->aeq_handler(rcfw, qp_event, qp);
break;
default:
- /* Command Response */
- spin_lock_irqsave(&cmdq->lock, flags);
+ /*
+ * Command Response
+ * cmdq->lock needs to be acquired to synchronie
+ * the command send and completion reaping. This function
+ * is always called with creq->lock held. Using
+ * the nested variant of spin_lock.
+ *
+ */
+
+ spin_lock_irqsave_nested(&cmdq->lock, flags,
+ SINGLE_DEPTH_NESTING);
cookie = le16_to_cpu(qp_event->cookie);
mcookie = qp_event->cookie;
blocked = cookie & RCFW_CMD_IS_BLOCKING;
@@ -322,14 +333,16 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
crsqe->resp = NULL;
} else {
- dev_err(&rcfw->pdev->dev,
- "QPLIB: CMD %s resp->cookie = %#x, evnt->cookie = %#x",
- crsqe->resp ? "mismatch" : "collision",
- crsqe->resp ? crsqe->resp->cookie : 0, mcookie);
+ if (crsqe->resp && crsqe->resp->cookie)
+ dev_err(&rcfw->pdev->dev,
+ "CMD %s cookie sent=%#x, recd=%#x\n",
+ crsqe->resp ? "mismatch" : "collision",
+ crsqe->resp ? crsqe->resp->cookie : 0,
+ mcookie);
}
if (!test_and_clear_bit(cbit, rcfw->cmdq_bitmap))
dev_warn(&rcfw->pdev->dev,
- "QPLIB: CMD bit %d was not requested", cbit);
+ "CMD bit %d was not requested\n", cbit);
cmdq->cons += crsqe->req_size;
crsqe->req_size = 0;
@@ -376,14 +389,14 @@ static void bnxt_qplib_service_creq(unsigned long data)
(rcfw, (struct creq_func_event *)creqe))
rcfw->creq_func_event_processed++;
else
- dev_warn
- (&rcfw->pdev->dev, "QPLIB:aeqe:%#x Not handled",
- type);
+ dev_warn(&rcfw->pdev->dev,
+ "aeqe:%#x Not handled\n", type);
break;
default:
- dev_warn(&rcfw->pdev->dev, "QPLIB: creqe with ");
- dev_warn(&rcfw->pdev->dev,
- "QPLIB: op_event = 0x%x not handled", type);
+ if (type != ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT)
+ dev_warn(&rcfw->pdev->dev,
+ "creqe with event 0x%x not handled\n",
+ type);
break;
}
raw_cons++;
@@ -551,7 +564,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
BNXT_QPLIB_CREQE_UNITS, 0, PAGE_SIZE,
HWQ_TYPE_L2_CMPL)) {
dev_err(&rcfw->pdev->dev,
- "QPLIB: HW channel CREQ allocation failed");
+ "HW channel CREQ allocation failed\n");
goto fail;
}
rcfw->cmdq.max_elements = BNXT_QPLIB_CMDQE_MAX_CNT;
@@ -560,7 +573,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
BNXT_QPLIB_CMDQE_UNITS, 0, PAGE_SIZE,
HWQ_TYPE_CTX)) {
dev_err(&rcfw->pdev->dev,
- "QPLIB: HW channel CMDQ allocation failed");
+ "HW channel CMDQ allocation failed\n");
goto fail;
}
@@ -605,21 +618,18 @@ void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
bnxt_qplib_rcfw_stop_irq(rcfw, true);
- if (rcfw->cmdq_bar_reg_iomem)
- iounmap(rcfw->cmdq_bar_reg_iomem);
- rcfw->cmdq_bar_reg_iomem = NULL;
-
- if (rcfw->creq_bar_reg_iomem)
- iounmap(rcfw->creq_bar_reg_iomem);
- rcfw->creq_bar_reg_iomem = NULL;
+ iounmap(rcfw->cmdq_bar_reg_iomem);
+ iounmap(rcfw->creq_bar_reg_iomem);
indx = find_first_bit(rcfw->cmdq_bitmap, rcfw->bmap_size);
if (indx != rcfw->bmap_size)
dev_err(&rcfw->pdev->dev,
- "QPLIB: disabling RCFW with pending cmd-bit %lx", indx);
+ "disabling RCFW with pending cmd-bit %lx\n", indx);
kfree(rcfw->cmdq_bitmap);
rcfw->bmap_size = 0;
+ rcfw->cmdq_bar_reg_iomem = NULL;
+ rcfw->creq_bar_reg_iomem = NULL;
rcfw->aeq_handler = NULL;
rcfw->vector = 0;
}
@@ -681,8 +691,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
RCFW_COMM_BASE_OFFSET,
RCFW_COMM_SIZE);
if (!rcfw->cmdq_bar_reg_iomem) {
- dev_err(&rcfw->pdev->dev,
- "QPLIB: CMDQ BAR region %d mapping failed",
+ dev_err(&rcfw->pdev->dev, "CMDQ BAR region %d mapping failed\n",
rcfw->cmdq_bar_reg);
return -ENOMEM;
}
@@ -697,14 +706,15 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
res_base = pci_resource_start(pdev, rcfw->creq_bar_reg);
if (!res_base)
dev_err(&rcfw->pdev->dev,
- "QPLIB: CREQ BAR region %d resc start is 0!",
+ "CREQ BAR region %d resc start is 0!\n",
rcfw->creq_bar_reg);
rcfw->creq_bar_reg_iomem = ioremap_nocache(res_base + cp_bar_reg_off,
4);
if (!rcfw->creq_bar_reg_iomem) {
- dev_err(&rcfw->pdev->dev,
- "QPLIB: CREQ BAR region %d mapping failed",
+ dev_err(&rcfw->pdev->dev, "CREQ BAR region %d mapping failed\n",
rcfw->creq_bar_reg);
+ iounmap(rcfw->cmdq_bar_reg_iomem);
+ rcfw->cmdq_bar_reg_iomem = NULL;
return -ENOMEM;
}
rcfw->creq_qp_event_processed = 0;
@@ -717,7 +727,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_vector, true);
if (rc) {
dev_err(&rcfw->pdev->dev,
- "QPLIB: Failed to request IRQ for CREQ rc = 0x%x", rc);
+ "Failed to request IRQ for CREQ rc = 0x%x\n", rc);
bnxt_qplib_disable_rcfw_channel(rcfw);
return rc;
}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 46416dfe8830..9a8687dc0a79 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -154,6 +154,8 @@ struct bnxt_qplib_qp_node {
void *qp_handle; /* ptr to qplib_qp */
};
+#define BNXT_QPLIB_OOS_COUNT_MASK 0xFFFFFFFF
+
/* RCFW Communication Channels */
struct bnxt_qplib_rcfw {
struct pci_dev *pdev;
@@ -190,6 +192,8 @@ struct bnxt_qplib_rcfw {
struct bnxt_qplib_crsq *crsqe_tbl;
int qp_tbl_size;
struct bnxt_qplib_qp_node *qp_tbl;
+ u64 oos_prev;
+ u32 init_oos_stats;
};
void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index 539a5d44e6db..59eeac55626f 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -36,6 +36,8 @@
* Description: QPLib resource manager
*/
+#define dev_fmt(fmt) "QPLIB: " fmt
+
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <linux/interrupt.h>
@@ -68,8 +70,7 @@ static void __free_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
pbl->pg_map_arr[i]);
else
dev_warn(&pdev->dev,
- "QPLIB: PBL free pg_arr[%d] empty?!",
- i);
+ "PBL free pg_arr[%d] empty?!\n", i);
pbl->pg_arr[i] = NULL;
}
}
@@ -537,7 +538,7 @@ static void bnxt_qplib_free_pkey_tbl(struct bnxt_qplib_res *res,
struct bnxt_qplib_pkey_tbl *pkey_tbl)
{
if (!pkey_tbl->tbl)
- dev_dbg(&res->pdev->dev, "QPLIB: PKEY tbl not present");
+ dev_dbg(&res->pdev->dev, "PKEY tbl not present\n");
else
kfree(pkey_tbl->tbl);
@@ -578,7 +579,7 @@ int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res,
struct bnxt_qplib_pd *pd)
{
if (test_and_set_bit(pd->id, pdt->tbl)) {
- dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d",
+ dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d\n",
pd->id);
return -EINVAL;
}
@@ -639,11 +640,11 @@ int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res,
struct bnxt_qplib_dpi *dpi)
{
if (dpi->dpi >= dpit->max) {
- dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d", dpi->dpi);
+ dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d\n", dpi->dpi);
return -EINVAL;
}
if (test_and_set_bit(dpi->dpi, dpit->tbl)) {
- dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d",
+ dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d\n",
dpi->dpi);
return -EINVAL;
}
@@ -673,22 +674,21 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
u32 dbr_len, bytes;
if (dpit->dbr_bar_reg_iomem) {
- dev_err(&res->pdev->dev,
- "QPLIB: DBR BAR region %d already mapped", dbr_bar_reg);
+ dev_err(&res->pdev->dev, "DBR BAR region %d already mapped\n",
+ dbr_bar_reg);
return -EALREADY;
}
bar_reg_base = pci_resource_start(res->pdev, dbr_bar_reg);
if (!bar_reg_base) {
- dev_err(&res->pdev->dev,
- "QPLIB: BAR region %d resc start failed", dbr_bar_reg);
+ dev_err(&res->pdev->dev, "BAR region %d resc start failed\n",
+ dbr_bar_reg);
return -ENOMEM;
}
dbr_len = pci_resource_len(res->pdev, dbr_bar_reg) - dbr_offset;
if (!dbr_len || ((dbr_len & (PAGE_SIZE - 1)) != 0)) {
- dev_err(&res->pdev->dev, "QPLIB: Invalid DBR length %d",
- dbr_len);
+ dev_err(&res->pdev->dev, "Invalid DBR length %d\n", dbr_len);
return -ENOMEM;
}
@@ -696,8 +696,7 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
dbr_len);
if (!dpit->dbr_bar_reg_iomem) {
dev_err(&res->pdev->dev,
- "QPLIB: FP: DBR BAR region %d mapping failed",
- dbr_bar_reg);
+ "FP: DBR BAR region %d mapping failed\n", dbr_bar_reg);
return -ENOMEM;
}
@@ -767,7 +766,7 @@ static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
stats->dma = dma_alloc_coherent(&pdev->dev, stats->size,
&stats->dma_map, GFP_KERNEL);
if (!stats->dma) {
- dev_err(&pdev->dev, "QPLIB: Stats DMA allocation failed");
+ dev_err(&pdev->dev, "Stats DMA allocation failed\n");
return -ENOMEM;
}
return 0;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 4097f3fa25c5..5216b5f844cc 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -36,6 +36,8 @@
* Description: Slow Path Operators
*/
+#define dev_fmt(fmt) "QPLIB: " fmt
+
#include <linux/interrupt.h>
#include <linux/spinlock.h>
#include <linux/sched.h>
@@ -89,7 +91,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
if (!sbuf) {
dev_err(&rcfw->pdev->dev,
- "QPLIB: SP: QUERY_FUNC alloc side buffer failed");
+ "SP: QUERY_FUNC alloc side buffer failed\n");
return -ENOMEM;
}
@@ -135,8 +137,16 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
attr->max_srq = le16_to_cpu(sb->max_srq);
attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1;
attr->max_srq_sges = sb->max_srq_sge;
- /* Bono only reports 1 PKEY for now, but it can support > 1 */
attr->max_pkey = le32_to_cpu(sb->max_pkeys);
+ /*
+ * Some versions of FW reports more than 0xFFFF.
+ * Restrict it for now to 0xFFFF to avoid
+ * reporting trucated value
+ */
+ if (attr->max_pkey > 0xFFFF) {
+ /* ib_port_attr::pkey_tbl_len is u16 */
+ attr->max_pkey = 0xFFFF;
+ }
attr->max_inline_data = le32_to_cpu(sb->max_inline_data);
attr->l2_db_size = (sb->l2_db_space_size + 1) *
@@ -186,8 +196,7 @@ int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res,
(void *)&resp,
NULL, 0);
if (rc) {
- dev_err(&res->pdev->dev,
- "QPLIB: Failed to set function resources");
+ dev_err(&res->pdev->dev, "Failed to set function resources\n");
}
return rc;
}
@@ -199,7 +208,7 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
{
if (index >= sgid_tbl->max) {
dev_err(&res->pdev->dev,
- "QPLIB: Index %d exceeded SGID table max (%d)",
+ "Index %d exceeded SGID table max (%d)\n",
index, sgid_tbl->max);
return -EINVAL;
}
@@ -217,13 +226,12 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
int index;
if (!sgid_tbl) {
- dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated");
+ dev_err(&res->pdev->dev, "SGID table not allocated\n");
return -EINVAL;
}
/* Do we need a sgid_lock here? */
if (!sgid_tbl->active) {
- dev_err(&res->pdev->dev,
- "QPLIB: SGID table has no active entries");
+ dev_err(&res->pdev->dev, "SGID table has no active entries\n");
return -ENOMEM;
}
for (index = 0; index < sgid_tbl->max; index++) {
@@ -231,7 +239,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
break;
}
if (index == sgid_tbl->max) {
- dev_warn(&res->pdev->dev, "GID not found in the SGID table");
+ dev_warn(&res->pdev->dev, "GID not found in the SGID table\n");
return 0;
}
/* Remove GID from the SGID table */
@@ -244,7 +252,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
RCFW_CMD_PREP(req, DELETE_GID, cmd_flags);
if (sgid_tbl->hw_id[index] == 0xFFFF) {
dev_err(&res->pdev->dev,
- "QPLIB: GID entry contains an invalid HW id");
+ "GID entry contains an invalid HW id\n");
return -EINVAL;
}
req.gid_index = cpu_to_le16(sgid_tbl->hw_id[index]);
@@ -258,7 +266,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
sgid_tbl->vlan[index] = 0;
sgid_tbl->active--;
dev_dbg(&res->pdev->dev,
- "QPLIB: SGID deleted hw_id[0x%x] = 0x%x active = 0x%x",
+ "SGID deleted hw_id[0x%x] = 0x%x active = 0x%x\n",
index, sgid_tbl->hw_id[index], sgid_tbl->active);
sgid_tbl->hw_id[index] = (u16)-1;
@@ -277,20 +285,19 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
int i, free_idx;
if (!sgid_tbl) {
- dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated");
+ dev_err(&res->pdev->dev, "SGID table not allocated\n");
return -EINVAL;
}
/* Do we need a sgid_lock here? */
if (sgid_tbl->active == sgid_tbl->max) {
- dev_err(&res->pdev->dev, "QPLIB: SGID table is full");
+ dev_err(&res->pdev->dev, "SGID table is full\n");
return -ENOMEM;
}
free_idx = sgid_tbl->max;
for (i = 0; i < sgid_tbl->max; i++) {
if (!memcmp(&sgid_tbl->tbl[i], gid, sizeof(*gid))) {
dev_dbg(&res->pdev->dev,
- "QPLIB: SGID entry already exist in entry %d!",
- i);
+ "SGID entry already exist in entry %d!\n", i);
*index = i;
return -EALREADY;
} else if (!memcmp(&sgid_tbl->tbl[i], &bnxt_qplib_gid_zero,
@@ -301,7 +308,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
}
if (free_idx == sgid_tbl->max) {
dev_err(&res->pdev->dev,
- "QPLIB: SGID table is FULL but count is not MAX??");
+ "SGID table is FULL but count is not MAX??\n");
return -ENOMEM;
}
if (update) {
@@ -348,7 +355,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
sgid_tbl->vlan[free_idx] = 1;
dev_dbg(&res->pdev->dev,
- "QPLIB: SGID added hw_id[0x%x] = 0x%x active = 0x%x",
+ "SGID added hw_id[0x%x] = 0x%x active = 0x%x\n",
free_idx, sgid_tbl->hw_id[free_idx], sgid_tbl->active);
*index = free_idx;
@@ -404,7 +411,7 @@ int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res,
}
if (index >= pkey_tbl->max) {
dev_err(&res->pdev->dev,
- "QPLIB: Index %d exceeded PKEY table max (%d)",
+ "Index %d exceeded PKEY table max (%d)\n",
index, pkey_tbl->max);
return -EINVAL;
}
@@ -419,14 +426,13 @@ int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
int i, rc = 0;
if (!pkey_tbl) {
- dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated");
+ dev_err(&res->pdev->dev, "PKEY table not allocated\n");
return -EINVAL;
}
/* Do we need a pkey_lock here? */
if (!pkey_tbl->active) {
- dev_err(&res->pdev->dev,
- "QPLIB: PKEY table has no active entries");
+ dev_err(&res->pdev->dev, "PKEY table has no active entries\n");
return -ENOMEM;
}
for (i = 0; i < pkey_tbl->max; i++) {
@@ -435,8 +441,7 @@ int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
}
if (i == pkey_tbl->max) {
dev_err(&res->pdev->dev,
- "QPLIB: PKEY 0x%04x not found in the pkey table",
- *pkey);
+ "PKEY 0x%04x not found in the pkey table\n", *pkey);
return -ENOMEM;
}
memset(&pkey_tbl->tbl[i], 0, sizeof(*pkey));
@@ -453,13 +458,13 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
int i, free_idx, rc = 0;
if (!pkey_tbl) {
- dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated");
+ dev_err(&res->pdev->dev, "PKEY table not allocated\n");
return -EINVAL;
}
/* Do we need a pkey_lock here? */
if (pkey_tbl->active == pkey_tbl->max) {
- dev_err(&res->pdev->dev, "QPLIB: PKEY table is full");
+ dev_err(&res->pdev->dev, "PKEY table is full\n");
return -ENOMEM;
}
free_idx = pkey_tbl->max;
@@ -471,7 +476,7 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
}
if (free_idx == pkey_tbl->max) {
dev_err(&res->pdev->dev,
- "QPLIB: PKEY table is FULL but count is not MAX??");
+ "PKEY table is FULL but count is not MAX??\n");
return -ENOMEM;
}
/* Add PKEY to the pkey_tbl */
@@ -555,8 +560,7 @@ int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
int rc;
if (mrw->lkey == 0xFFFFFFFF) {
- dev_info(&res->pdev->dev,
- "QPLIB: SP: Free a reserved lkey MRW");
+ dev_info(&res->pdev->dev, "SP: Free a reserved lkey MRW\n");
return 0;
}
@@ -666,9 +670,8 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
pages++;
if (pages > MAX_PBL_LVL_1_PGS) {
- dev_err(&res->pdev->dev, "QPLIB: SP: Reg MR pages ");
dev_err(&res->pdev->dev,
- "requested (0x%x) exceeded max (0x%x)",
+ "SP: Reg MR pages requested (0x%x) exceeded max (0x%x)\n",
pages, MAX_PBL_LVL_1_PGS);
return -ENOMEM;
}
@@ -684,7 +687,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
HWQ_TYPE_CTX);
if (rc) {
dev_err(&res->pdev->dev,
- "SP: Reg MR memory allocation failed");
+ "SP: Reg MR memory allocation failed\n");
return -ENOMEM;
}
/* Write to the hwq */
@@ -795,7 +798,7 @@ int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
if (!sbuf) {
dev_err(&rcfw->pdev->dev,
- "QPLIB: SP: QUERY_ROCE_STATS alloc side buffer failed");
+ "SP: QUERY_ROCE_STATS alloc side buffer failed\n");
return -ENOMEM;
}
@@ -845,6 +848,16 @@ int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
stats->res_srq_load_err = le64_to_cpu(sb->res_srq_load_err);
stats->res_tx_pci_err = le64_to_cpu(sb->res_tx_pci_err);
stats->res_rx_pci_err = le64_to_cpu(sb->res_rx_pci_err);
+ if (!rcfw->init_oos_stats) {
+ rcfw->oos_prev = le64_to_cpu(sb->res_oos_drop_count);
+ rcfw->init_oos_stats = 1;
+ } else {
+ stats->res_oos_drop_count +=
+ (le64_to_cpu(sb->res_oos_drop_count) -
+ rcfw->oos_prev) & BNXT_QPLIB_OOS_COUNT_MASK;
+ rcfw->oos_prev = le64_to_cpu(sb->res_oos_drop_count);
+ }
+
bail:
bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
return rc;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index 9d3e8b994945..8079d7f5a008 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -205,6 +205,16 @@ struct bnxt_qplib_roce_stats {
/* res_tx_pci_err is 64 b */
u64 res_rx_pci_err;
/* res_rx_pci_err is 64 b */
+ u64 res_oos_drop_count;
+ /* res_oos_drop_count */
+ u64 active_qp_count_p0;
+ /* port 0 active qps */
+ u64 active_qp_count_p1;
+ /* port 1 active qps */
+ u64 active_qp_count_p2;
+ /* port 2 active qps */
+ u64 active_qp_count_p3;
+ /* port 3 active qps */
};
int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
index 3e5a4f760d0e..8a9ead419ac2 100644
--- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -2929,6 +2929,11 @@ struct creq_query_roce_stats_resp_sb {
__le64 res_srq_load_err;
__le64 res_tx_pci_err;
__le64 res_rx_pci_err;
+ __le64 res_oos_drop_count;
+ __le64 active_qp_count_p0;
+ __le64 active_qp_count_p1;
+ __le64 active_qp_count_p2;
+ __le64 active_qp_count_p3;
};
/* QP error notification event (16 bytes) */
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 1b9ff21aa1d5..ebbec02cebe0 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1127,17 +1127,18 @@ static int iwch_query_port(struct ib_device *ibdev,
return 0;
}
-static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev);
pr_debug("%s dev 0x%p\n", __func__, dev);
return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
}
+static DEVICE_ATTR_RO(hw_rev);
-static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t hca_type_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev);
@@ -1148,9 +1149,10 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
lldev->ethtool_ops->get_drvinfo(lldev, &info);
return sprintf(buf, "%s\n", info.driver);
}
+static DEVICE_ATTR_RO(hca_type);
-static ssize_t show_board(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev);
@@ -1158,6 +1160,7 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor,
iwch_dev->rdev.rnic_info.pdev->device);
}
+static DEVICE_ATTR_RO(board_id);
enum counters {
IPINRECEIVES,
@@ -1274,14 +1277,15 @@ static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats,
return stats->num_counters;
}
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+static struct attribute *iwch_class_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ NULL
+};
-static struct device_attribute *iwch_class_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id,
+static const struct attribute_group iwch_attr_group = {
+ .attrs = iwch_class_attributes,
};
static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num,
@@ -1316,10 +1320,8 @@ static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str)
int iwch_register_device(struct iwch_dev *dev)
{
int ret;
- int i;
pr_debug("%s iwch_dev %p\n", __func__, dev);
- strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX);
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
dev->ibdev.owner = THIS_MODULE;
@@ -1402,33 +1404,16 @@ int iwch_register_device(struct iwch_dev *dev)
sizeof(dev->ibdev.iwcm->ifname));
dev->ibdev.driver_id = RDMA_DRIVER_CXGB3;
- ret = ib_register_device(&dev->ibdev, NULL);
+ rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group);
+ ret = ib_register_device(&dev->ibdev, "cxgb3_%d", NULL);
if (ret)
- goto bail1;
-
- for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) {
- ret = device_create_file(&dev->ibdev.dev,
- iwch_class_attributes[i]);
- if (ret) {
- goto bail2;
- }
- }
- return 0;
-bail2:
- ib_unregister_device(&dev->ibdev);
-bail1:
- kfree(dev->ibdev.iwcm);
+ kfree(dev->ibdev.iwcm);
return ret;
}
void iwch_unregister_device(struct iwch_dev *dev)
{
- int i;
-
pr_debug("%s iwch_dev %p\n", __func__, dev);
- for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i)
- device_remove_file(&dev->ibdev.dev,
- iwch_class_attributes[i]);
ib_unregister_device(&dev->ibdev);
kfree(dev->ibdev.iwcm);
return;
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 0f83cbec33f3..615413bd3e8d 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -403,8 +403,7 @@ void _c4iw_free_ep(struct kref *kref)
ep->com.local_addr.ss_family);
dst_release(ep->dst);
cxgb4_l2t_release(ep->l2t);
- if (ep->mpa_skb)
- kfree_skb(ep->mpa_skb);
+ kfree_skb(ep->mpa_skb);
}
if (!skb_queue_empty(&ep->com.ep_skb_list))
skb_queue_purge(&ep->com.ep_skb_list);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 6d3042794094..1fd8798d91a7 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -161,7 +161,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
cq->gts = rdev->lldi.gts_reg;
cq->rdev = rdev;
- cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS,
+ cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, CXGB4_BAR2_QTYPE_INGRESS,
&cq->bar2_qid,
user ? &cq->bar2_pa : NULL);
if (user && !cq->bar2_pa) {
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 4eda6872e617..cbb3c0ddd990 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -373,8 +373,8 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
return 0;
}
-static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev);
@@ -382,9 +382,10 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%d\n",
CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
}
+static DEVICE_ATTR_RO(hw_rev);
-static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t hca_type_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev);
@@ -395,9 +396,10 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
lldev->ethtool_ops->get_drvinfo(lldev, &info);
return sprintf(buf, "%s\n", info.driver);
}
+static DEVICE_ATTR_RO(hca_type);
-static ssize_t show_board(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev);
@@ -405,6 +407,7 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor,
c4iw_dev->rdev.lldi.pdev->device);
}
+static DEVICE_ATTR_RO(board_id);
enum counters {
IP4INSEGS,
@@ -461,14 +464,15 @@ static int c4iw_get_mib(struct ib_device *ibdev,
return stats->num_counters;
}
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+static struct attribute *c4iw_class_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ NULL
+};
-static struct device_attribute *c4iw_class_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id,
+static const struct attribute_group c4iw_attr_group = {
+ .attrs = c4iw_class_attributes,
};
static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
@@ -530,12 +534,10 @@ static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res)
void c4iw_register_device(struct work_struct *work)
{
int ret;
- int i;
struct uld_ctx *ctx = container_of(work, struct uld_ctx, reg_work);
struct c4iw_dev *dev = ctx->dev;
pr_debug("c4iw_dev %p\n", dev);
- strlcpy(dev->ibdev.name, "cxgb4_%d", IB_DEVICE_NAME_MAX);
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
memcpy(&dev->ibdev.node_guid, dev->rdev.lldi.ports[0]->dev_addr, 6);
dev->ibdev.owner = THIS_MODULE;
@@ -626,20 +628,13 @@ void c4iw_register_device(struct work_struct *work)
memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name,
sizeof(dev->ibdev.iwcm->ifname));
+ rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group);
dev->ibdev.driver_id = RDMA_DRIVER_CXGB4;
- ret = ib_register_device(&dev->ibdev, NULL);
+ ret = ib_register_device(&dev->ibdev, "cxgb4_%d", NULL);
if (ret)
goto err_kfree_iwcm;
-
- for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i) {
- ret = device_create_file(&dev->ibdev.dev,
- c4iw_class_attributes[i]);
- if (ret)
- goto err_unregister_device;
- }
return;
-err_unregister_device:
- ib_unregister_device(&dev->ibdev);
+
err_kfree_iwcm:
kfree(dev->ibdev.iwcm);
err_dealloc_ctx:
@@ -651,12 +646,7 @@ err_dealloc_ctx:
void c4iw_unregister_device(struct c4iw_dev *dev)
{
- int i;
-
pr_debug("c4iw_dev %p\n", dev);
- for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i)
- device_remove_file(&dev->ibdev.dev,
- c4iw_class_attributes[i]);
ib_unregister_device(&dev->ibdev);
kfree(dev->ibdev.iwcm);
return;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 62d6f197ec0b..13478f3b7057 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -279,12 +279,13 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
wq->db = rdev->lldi.db_reg;
- wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS,
+ wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid,
+ CXGB4_BAR2_QTYPE_EGRESS,
&wq->sq.bar2_qid,
user ? &wq->sq.bar2_pa : NULL);
if (need_rq)
wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid,
- T4_BAR2_QTYPE_EGRESS,
+ CXGB4_BAR2_QTYPE_EGRESS,
&wq->rq.bar2_qid,
user ? &wq->rq.bar2_pa : NULL);
@@ -2572,7 +2573,7 @@ static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
memset(wq->queue, 0, wq->memsize);
dma_unmap_addr_set(wq, mapping, wq->dma_addr);
- wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, T4_BAR2_QTYPE_EGRESS,
+ wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, CXGB4_BAR2_QTYPE_EGRESS,
&wq->bar2_qid,
user ? &wq->bar2_pa : NULL);
@@ -2813,8 +2814,7 @@ err_free_queue:
free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
srq->wr_waitp);
err_free_skb:
- if (srq->destroy_skb)
- kfree_skb(srq->destroy_skb);
+ kfree_skb(srq->destroy_skb);
err_free_srq_idx:
c4iw_free_srq_idx(&rhp->rdev, srq->idx);
err_free_wr_wait:
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index f451ba912f47..ff790390c91a 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -8,12 +8,42 @@
#
obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
-hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
- eprom.o exp_rcv.o file_ops.o firmware.o \
- init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
- qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
- uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
- verbs_txreq.o vnic_main.o vnic_sdma.o
+hfi1-y := \
+ affinity.o \
+ chip.o \
+ device.o \
+ driver.o \
+ efivar.o \
+ eprom.o \
+ exp_rcv.o \
+ file_ops.o \
+ firmware.o \
+ init.o \
+ intr.o \
+ iowait.o \
+ mad.o \
+ mmu_rb.o \
+ msix.o \
+ pcie.o \
+ pio.o \
+ pio_copy.o \
+ platform.o \
+ qp.o \
+ qsfp.o \
+ rc.o \
+ ruc.o \
+ sdma.o \
+ sysfs.o \
+ trace.o \
+ uc.o \
+ ud.o \
+ user_exp_rcv.o \
+ user_pages.o \
+ user_sdma.o \
+ verbs.o \
+ verbs_txreq.o \
+ vnic_main.o \
+ vnic_sdma.o
ifdef CONFIG_DEBUG_FS
hfi1-y += debugfs.o
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index bedd5fba33b0..2baf38cc1e23 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -817,10 +817,10 @@ static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
set = &entry->def_intr;
cpumask_set_cpu(cpu, &set->mask);
cpumask_set_cpu(cpu, &set->used);
- for (i = 0; i < dd->num_msix_entries; i++) {
+ for (i = 0; i < dd->msix_info.max_requested; i++) {
struct hfi1_msix_entry *other_msix;
- other_msix = &dd->msix_entries[i];
+ other_msix = &dd->msix_info.msix_entries[i];
if (other_msix->type != IRQ_SDMA || other_msix == msix)
continue;
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index e1668bcc2d13..9b20479dc710 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -67,8 +67,6 @@
#include "debugfs.h"
#include "fault.h"
-#define NUM_IB_PORTS 1
-
uint kdeth_qp;
module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
@@ -1100,9 +1098,9 @@ struct err_reg_info {
const char *desc;
};
-#define NUM_MISC_ERRS (IS_GENERAL_ERR_END - IS_GENERAL_ERR_START)
-#define NUM_DC_ERRS (IS_DC_END - IS_DC_START)
-#define NUM_VARIOUS (IS_VARIOUS_END - IS_VARIOUS_START)
+#define NUM_MISC_ERRS (IS_GENERAL_ERR_END + 1 - IS_GENERAL_ERR_START)
+#define NUM_DC_ERRS (IS_DC_END + 1 - IS_DC_START)
+#define NUM_VARIOUS (IS_VARIOUS_END + 1 - IS_VARIOUS_START)
/*
* Helpers for building HFI and DC error interrupt table entries. Different
@@ -8181,7 +8179,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
/**
* is_rcv_urgent_int() - User receive context urgent IRQ handler
* @dd: valid dd
- * @source: logical IRQ source (ofse from IS_RCVURGENT_START)
+ * @source: logical IRQ source (offset from IS_RCVURGENT_START)
*
* RX block receive urgent interrupt. Source is < 160.
*
@@ -8231,7 +8229,7 @@ static const struct is_table is_table[] = {
is_sdma_eng_err_name, is_sdma_eng_err_int },
{ IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END,
is_sendctxt_err_name, is_sendctxt_err_int },
-{ IS_SDMA_START, IS_SDMA_END,
+{ IS_SDMA_START, IS_SDMA_IDLE_END,
is_sdma_eng_name, is_sdma_eng_int },
{ IS_VARIOUS_START, IS_VARIOUS_END,
is_various_name, is_various_int },
@@ -8257,7 +8255,7 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
/* avoids a double compare by walking the table in-order */
for (entry = &is_table[0]; entry->is_name; entry++) {
- if (source < entry->end) {
+ if (source <= entry->end) {
trace_hfi1_interrupt(dd, entry, source);
entry->is_int(dd, source - entry->start);
return;
@@ -8276,7 +8274,7 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
* context DATA IRQs are threaded and are not supported by this handler.
*
*/
-static irqreturn_t general_interrupt(int irq, void *data)
+irqreturn_t general_interrupt(int irq, void *data)
{
struct hfi1_devdata *dd = data;
u64 regs[CCE_NUM_INT_CSRS];
@@ -8309,7 +8307,7 @@ static irqreturn_t general_interrupt(int irq, void *data)
return handled;
}
-static irqreturn_t sdma_interrupt(int irq, void *data)
+irqreturn_t sdma_interrupt(int irq, void *data)
{
struct sdma_engine *sde = data;
struct hfi1_devdata *dd = sde->dd;
@@ -8401,7 +8399,7 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
* invoked) is finished. The intent is to avoid extra interrupts while we
* are processing packets anyway.
*/
-static irqreturn_t receive_context_interrupt(int irq, void *data)
+irqreturn_t receive_context_interrupt(int irq, void *data)
{
struct hfi1_ctxtdata *rcd = data;
struct hfi1_devdata *dd = rcd->dd;
@@ -8441,7 +8439,7 @@ static irqreturn_t receive_context_interrupt(int irq, void *data)
* Receive packet thread handler. This expects to be invoked with the
* receive interrupt still blocked.
*/
-static irqreturn_t receive_context_thread(int irq, void *data)
+irqreturn_t receive_context_thread(int irq, void *data)
{
struct hfi1_ctxtdata *rcd = data;
int present;
@@ -9651,30 +9649,10 @@ void qsfp_event(struct work_struct *work)
}
}
-static void init_qsfp_int(struct hfi1_devdata *dd)
+void init_qsfp_int(struct hfi1_devdata *dd)
{
struct hfi1_pportdata *ppd = dd->pport;
- u64 qsfp_mask, cce_int_mask;
- const int qsfp1_int_smask = QSFP1_INT % 64;
- const int qsfp2_int_smask = QSFP2_INT % 64;
-
- /*
- * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
- * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
- * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
- * the index of the appropriate CSR in the CCEIntMask CSR array
- */
- cce_int_mask = read_csr(dd, CCE_INT_MASK +
- (8 * (QSFP1_INT / 64)));
- if (dd->hfi1_id) {
- cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
- write_csr(dd, CCE_INT_MASK + (8 * (QSFP1_INT / 64)),
- cce_int_mask);
- } else {
- cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
- write_csr(dd, CCE_INT_MASK + (8 * (QSFP2_INT / 64)),
- cce_int_mask);
- }
+ u64 qsfp_mask;
qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
/* Clear current status to avoid spurious interrupts */
@@ -9691,6 +9669,12 @@ static void init_qsfp_int(struct hfi1_devdata *dd)
write_csr(dd,
dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
qsfp_mask);
+
+ /* Enable the appropriate QSFP IRQ source */
+ if (!dd->hfi1_id)
+ set_intr_bits(dd, QSFP1_INT, QSFP1_INT, true);
+ else
+ set_intr_bits(dd, QSFP2_INT, QSFP2_INT, true);
}
/*
@@ -10577,12 +10561,29 @@ void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
}
}
-/*
- * Verify if BCT for data VLs is non-zero.
+/**
+ * data_vls_operational() - Verify if data VL BCT credits and MTU
+ * are both set.
+ * @ppd: pointer to hfi1_pportdata structure
+ *
+ * Return: true - Ok, false -otherwise.
*/
static inline bool data_vls_operational(struct hfi1_pportdata *ppd)
{
- return !!ppd->actual_vls_operational;
+ int i;
+ u64 reg;
+
+ if (!ppd->actual_vls_operational)
+ return false;
+
+ for (i = 0; i < ppd->vls_supported; i++) {
+ reg = read_csr(ppd->dd, SEND_CM_CREDIT_VL + (8 * i));
+ if ((reg && !ppd->dd->vld[i].mtu) ||
+ (!reg && ppd->dd->vld[i].mtu))
+ return false;
+ }
+
+ return true;
}
/*
@@ -10695,7 +10696,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
if (!data_vls_operational(ppd)) {
dd_dev_err(dd,
- "%s: data VLs not operational\n", __func__);
+ "%s: Invalid data VL credits or mtu\n",
+ __func__);
ret = -EINVAL;
break;
}
@@ -11932,10 +11934,16 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK;
}
- if (op & HFI1_RCVCTRL_INTRAVAIL_ENB)
+ if (op & HFI1_RCVCTRL_INTRAVAIL_ENB) {
+ set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt,
+ IS_RCVAVAIL_START + rcd->ctxt, true);
rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
- if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
+ }
+ if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) {
+ set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt,
+ IS_RCVAVAIL_START + rcd->ctxt, false);
rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
+ }
if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr)
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
@@ -11965,6 +11973,13 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS)
rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
+ if (op & HFI1_RCVCTRL_URGENT_ENB)
+ set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt,
+ IS_RCVURGENT_START + rcd->ctxt, true);
+ if (op & HFI1_RCVCTRL_URGENT_DIS)
+ set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt,
+ IS_RCVURGENT_START + rcd->ctxt, false);
+
hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl);
@@ -12963,63 +12978,71 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
return ret;
}
+/* ========================================================================= */
+
/**
- * get_int_mask - get 64 bit int mask
- * @dd - the devdata
- * @i - the csr (relative to CCE_INT_MASK)
+ * read_mod_write() - Calculate the IRQ register index and set/clear the bits
+ * @dd: valid devdata
+ * @src: IRQ source to determine register index from
+ * @bits: the bits to set or clear
+ * @set: true == set the bits, false == clear the bits
*
- * Returns the mask with the urgent interrupt mask
- * bit clear for kernel receive contexts.
*/
-static u64 get_int_mask(struct hfi1_devdata *dd, u32 i)
+static void read_mod_write(struct hfi1_devdata *dd, u16 src, u64 bits,
+ bool set)
{
- u64 mask = U64_MAX; /* default to no change */
-
- if (i >= (IS_RCVURGENT_START / 64) && i < (IS_RCVURGENT_END / 64)) {
- int j = (i - (IS_RCVURGENT_START / 64)) * 64;
- int k = !j ? IS_RCVURGENT_START % 64 : 0;
+ u64 reg;
+ u16 idx = src / BITS_PER_REGISTER;
- if (j)
- j -= IS_RCVURGENT_START % 64;
- /* j = 0..dd->first_dyn_alloc_ctxt - 1,k = 0..63 */
- for (; j < dd->first_dyn_alloc_ctxt && k < 64; j++, k++)
- /* convert to bit in mask and clear */
- mask &= ~BIT_ULL(k);
- }
- return mask;
+ spin_lock(&dd->irq_src_lock);
+ reg = read_csr(dd, CCE_INT_MASK + (8 * idx));
+ if (set)
+ reg |= bits;
+ else
+ reg &= ~bits;
+ write_csr(dd, CCE_INT_MASK + (8 * idx), reg);
+ spin_unlock(&dd->irq_src_lock);
}
-/* ========================================================================= */
-
-/*
- * Enable/disable chip from delivering interrupts.
+/**
+ * set_intr_bits() - Enable/disable a range (one or more) IRQ sources
+ * @dd: valid devdata
+ * @first: first IRQ source to set/clear
+ * @last: last IRQ source (inclusive) to set/clear
+ * @set: true == set the bits, false == clear the bits
+ *
+ * If first == last, set the exact source.
*/
-void set_intr_state(struct hfi1_devdata *dd, u32 enable)
+int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set)
{
- int i;
+ u64 bits = 0;
+ u64 bit;
+ u16 src;
- /*
- * In HFI, the mask needs to be 1 to allow interrupts.
- */
- if (enable) {
- /* enable all interrupts but urgent on kernel contexts */
- for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
- u64 mask = get_int_mask(dd, i);
+ if (first > NUM_INTERRUPT_SOURCES || last > NUM_INTERRUPT_SOURCES)
+ return -EINVAL;
- write_csr(dd, CCE_INT_MASK + (8 * i), mask);
- }
+ if (last < first)
+ return -ERANGE;
- init_qsfp_int(dd);
- } else {
- for (i = 0; i < CCE_NUM_INT_CSRS; i++)
- write_csr(dd, CCE_INT_MASK + (8 * i), 0ull);
+ for (src = first; src <= last; src++) {
+ bit = src % BITS_PER_REGISTER;
+ /* wrapped to next register? */
+ if (!bit && bits) {
+ read_mod_write(dd, src - 1, bits, set);
+ bits = 0;
+ }
+ bits |= BIT_ULL(bit);
}
+ read_mod_write(dd, last, bits, set);
+
+ return 0;
}
/*
* Clear all interrupt sources on the chip.
*/
-static void clear_all_interrupts(struct hfi1_devdata *dd)
+void clear_all_interrupts(struct hfi1_devdata *dd)
{
int i;
@@ -13043,38 +13066,11 @@ static void clear_all_interrupts(struct hfi1_devdata *dd)
write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0);
}
-/**
- * hfi1_clean_up_interrupts() - Free all IRQ resources
- * @dd: valid device data data structure
- *
- * Free the MSIx and assoicated PCI resources, if they have been allocated.
- */
-void hfi1_clean_up_interrupts(struct hfi1_devdata *dd)
-{
- int i;
- struct hfi1_msix_entry *me = dd->msix_entries;
-
- /* remove irqs - must happen before disabling/turning off */
- for (i = 0; i < dd->num_msix_entries; i++, me++) {
- if (!me->arg) /* => no irq, no affinity */
- continue;
- hfi1_put_irq_affinity(dd, me);
- pci_free_irq(dd->pcidev, i, me->arg);
- }
-
- /* clean structures */
- kfree(dd->msix_entries);
- dd->msix_entries = NULL;
- dd->num_msix_entries = 0;
-
- pci_free_irq_vectors(dd->pcidev);
-}
-
/*
* Remap the interrupt source from the general handler to the given MSI-X
* interrupt.
*/
-static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
+void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
{
u64 reg;
int m, n;
@@ -13098,8 +13094,7 @@ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
write_csr(dd, CCE_INT_MAP + (8 * m), reg);
}
-static void remap_sdma_interrupts(struct hfi1_devdata *dd,
- int engine, int msix_intr)
+void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr)
{
/*
* SDMA engine interrupt sources grouped by type, rather than
@@ -13108,204 +13103,16 @@ static void remap_sdma_interrupts(struct hfi1_devdata *dd,
* SDMAProgress
* SDMAIdle
*/
- remap_intr(dd, IS_SDMA_START + 0 * TXE_NUM_SDMA_ENGINES + engine,
- msix_intr);
- remap_intr(dd, IS_SDMA_START + 1 * TXE_NUM_SDMA_ENGINES + engine,
- msix_intr);
- remap_intr(dd, IS_SDMA_START + 2 * TXE_NUM_SDMA_ENGINES + engine,
- msix_intr);
-}
-
-static int request_msix_irqs(struct hfi1_devdata *dd)
-{
- int first_general, last_general;
- int first_sdma, last_sdma;
- int first_rx, last_rx;
- int i, ret = 0;
-
- /* calculate the ranges we are going to use */
- first_general = 0;
- last_general = first_general + 1;
- first_sdma = last_general;
- last_sdma = first_sdma + dd->num_sdma;
- first_rx = last_sdma;
- last_rx = first_rx + dd->n_krcv_queues + dd->num_vnic_contexts;
-
- /* VNIC MSIx interrupts get mapped when VNIC contexts are created */
- dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues;
-
- /*
- * Sanity check - the code expects all SDMA chip source
- * interrupts to be in the same CSR, starting at bit 0. Verify
- * that this is true by checking the bit location of the start.
- */
- BUILD_BUG_ON(IS_SDMA_START % 64);
-
- for (i = 0; i < dd->num_msix_entries; i++) {
- struct hfi1_msix_entry *me = &dd->msix_entries[i];
- const char *err_info;
- irq_handler_t handler;
- irq_handler_t thread = NULL;
- void *arg = NULL;
- int idx;
- struct hfi1_ctxtdata *rcd = NULL;
- struct sdma_engine *sde = NULL;
- char name[MAX_NAME_SIZE];
-
- /* obtain the arguments to pci_request_irq */
- if (first_general <= i && i < last_general) {
- idx = i - first_general;
- handler = general_interrupt;
- arg = dd;
- snprintf(name, sizeof(name),
- DRIVER_NAME "_%d", dd->unit);
- err_info = "general";
- me->type = IRQ_GENERAL;
- } else if (first_sdma <= i && i < last_sdma) {
- idx = i - first_sdma;
- sde = &dd->per_sdma[idx];
- handler = sdma_interrupt;
- arg = sde;
- snprintf(name, sizeof(name),
- DRIVER_NAME "_%d sdma%d", dd->unit, idx);
- err_info = "sdma";
- remap_sdma_interrupts(dd, idx, i);
- me->type = IRQ_SDMA;
- } else if (first_rx <= i && i < last_rx) {
- idx = i - first_rx;
- rcd = hfi1_rcd_get_by_index_safe(dd, idx);
- if (rcd) {
- /*
- * Set the interrupt register and mask for this
- * context's interrupt.
- */
- rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
- rcd->imask = ((u64)1) <<
- ((IS_RCVAVAIL_START + idx) % 64);
- handler = receive_context_interrupt;
- thread = receive_context_thread;
- arg = rcd;
- snprintf(name, sizeof(name),
- DRIVER_NAME "_%d kctxt%d",
- dd->unit, idx);
- err_info = "receive context";
- remap_intr(dd, IS_RCVAVAIL_START + idx, i);
- me->type = IRQ_RCVCTXT;
- rcd->msix_intr = i;
- hfi1_rcd_put(rcd);
- }
- } else {
- /* not in our expected range - complain, then
- * ignore it
- */
- dd_dev_err(dd,
- "Unexpected extra MSI-X interrupt %d\n", i);
- continue;
- }
- /* no argument, no interrupt */
- if (!arg)
- continue;
- /* make sure the name is terminated */
- name[sizeof(name) - 1] = 0;
- me->irq = pci_irq_vector(dd->pcidev, i);
- ret = pci_request_irq(dd->pcidev, i, handler, thread, arg,
- name);
- if (ret) {
- dd_dev_err(dd,
- "unable to allocate %s interrupt, irq %d, index %d, err %d\n",
- err_info, me->irq, idx, ret);
- return ret;
- }
- /*
- * assign arg after pci_request_irq call, so it will be
- * cleaned up
- */
- me->arg = arg;
-
- ret = hfi1_get_irq_affinity(dd, me);
- if (ret)
- dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
- }
-
- return ret;
-}
-
-void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd)
-{
- int i;
-
- for (i = 0; i < dd->vnic.num_ctxt; i++) {
- struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
- struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
-
- synchronize_irq(me->irq);
- }
-}
-
-void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd)
-{
- struct hfi1_devdata *dd = rcd->dd;
- struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
-
- if (!me->arg) /* => no irq, no affinity */
- return;
-
- hfi1_put_irq_affinity(dd, me);
- pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg);
-
- me->arg = NULL;
-}
-
-void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
-{
- struct hfi1_devdata *dd = rcd->dd;
- struct hfi1_msix_entry *me;
- int idx = rcd->ctxt;
- void *arg = rcd;
- int ret;
-
- rcd->msix_intr = dd->vnic.msix_idx++;
- me = &dd->msix_entries[rcd->msix_intr];
-
- /*
- * Set the interrupt register and mask for this
- * context's interrupt.
- */
- rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
- rcd->imask = ((u64)1) <<
- ((IS_RCVAVAIL_START + idx) % 64);
- me->type = IRQ_RCVCTXT;
- me->irq = pci_irq_vector(dd->pcidev, rcd->msix_intr);
- remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr);
-
- ret = pci_request_irq(dd->pcidev, rcd->msix_intr,
- receive_context_interrupt,
- receive_context_thread, arg,
- DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
- if (ret) {
- dd_dev_err(dd, "vnic irq request (irq %d, idx %d) fail %d\n",
- me->irq, idx, ret);
- return;
- }
- /*
- * assign arg after pci_request_irq call, so it will be
- * cleaned up
- */
- me->arg = arg;
-
- ret = hfi1_get_irq_affinity(dd, me);
- if (ret) {
- dd_dev_err(dd,
- "unable to pin IRQ %d\n", ret);
- pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg);
- }
+ remap_intr(dd, IS_SDMA_START + engine, msix_intr);
+ remap_intr(dd, IS_SDMA_PROGRESS_START + engine, msix_intr);
+ remap_intr(dd, IS_SDMA_IDLE_START + engine, msix_intr);
}
/*
* Set the general handler to accept all interrupts, remap all
* chip interrupts back to MSI-X 0.
*/
-static void reset_interrupts(struct hfi1_devdata *dd)
+void reset_interrupts(struct hfi1_devdata *dd)
{
int i;
@@ -13318,54 +13125,33 @@ static void reset_interrupts(struct hfi1_devdata *dd)
write_csr(dd, CCE_INT_MAP + (8 * i), 0);
}
+/**
+ * set_up_interrupts() - Initialize the IRQ resources and state
+ * @dd: valid devdata
+ *
+ */
static int set_up_interrupts(struct hfi1_devdata *dd)
{
- u32 total;
- int ret, request;
-
- /*
- * Interrupt count:
- * 1 general, "slow path" interrupt (includes the SDMA engines
- * slow source, SDMACleanupDone)
- * N interrupts - one per used SDMA engine
- * M interrupt - one per kernel receive context
- * V interrupt - one for each VNIC context
- */
- total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts;
-
- /* ask for MSI-X interrupts */
- request = request_msix(dd, total);
- if (request < 0) {
- ret = request;
- goto fail;
- } else {
- dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries),
- GFP_KERNEL);
- if (!dd->msix_entries) {
- ret = -ENOMEM;
- goto fail;
- }
- /* using MSI-X */
- dd->num_msix_entries = total;
- dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
- }
+ int ret;
/* mask all interrupts */
- set_intr_state(dd, 0);
+ set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
+
/* clear all pending interrupts */
clear_all_interrupts(dd);
/* reset general handler mask, chip MSI-X mappings */
reset_interrupts(dd);
- ret = request_msix_irqs(dd);
+ /* ask for MSI-X interrupts */
+ ret = msix_initialize(dd);
if (ret)
- goto fail;
+ return ret;
- return 0;
+ ret = msix_request_irqs(dd);
+ if (ret)
+ msix_clean_up_interrupts(dd);
-fail:
- hfi1_clean_up_interrupts(dd);
return ret;
}
@@ -14918,20 +14704,16 @@ err_exit:
}
/**
- * Allocate and initialize the device structure for the hfi.
+ * hfi1_init_dd() - Initialize most of the dd structure.
* @dev: the pci_dev for hfi1_ib device
* @ent: pci_device_id struct for this dev
*
- * Also allocates, initializes, and returns the devdata struct for this
- * device instance
- *
* This is global, and is called directly at init to set up the
* chip-specific function pointers for later use.
*/
-struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
- const struct pci_device_id *ent)
+int hfi1_init_dd(struct hfi1_devdata *dd)
{
- struct hfi1_devdata *dd;
+ struct pci_dev *pdev = dd->pcidev;
struct hfi1_pportdata *ppd;
u64 reg;
int i, ret;
@@ -14942,13 +14724,8 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
"Functional simulator"
};
struct pci_dev *parent = pdev->bus->self;
- u32 sdma_engines;
+ u32 sdma_engines = chip_sdma_engines(dd);
- dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
- sizeof(struct hfi1_pportdata));
- if (IS_ERR(dd))
- goto bail;
- sdma_engines = chip_sdma_engines(dd);
ppd = dd->pport;
for (i = 0; i < dd->num_pports; i++, ppd++) {
int vl;
@@ -15127,6 +14904,12 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret)
goto bail_cleanup;
+ /*
+ * This should probably occur in hfi1_pcie_init(), but historically
+ * occurs after the do_pcie_gen3_transition() code.
+ */
+ tune_pcie_caps(dd);
+
/* start setting dd values and adjusting CSRs */
init_early_variables(dd);
@@ -15239,14 +15022,13 @@ bail_free_cntrs:
free_cntrs(dd);
bail_clear_intr:
hfi1_comp_vectors_clean_up(dd);
- hfi1_clean_up_interrupts(dd);
+ msix_clean_up_interrupts(dd);
bail_cleanup:
hfi1_pcie_ddcleanup(dd);
bail_free:
hfi1_free_devdata(dd);
- dd = ERR_PTR(ret);
bail:
- return dd;
+ return ret;
}
static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 36b04d6300e5..6b9c8f12dff8 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -52,9 +52,7 @@
*/
/* sizes */
-#define CCE_NUM_MSIX_VECTORS 256
-#define CCE_NUM_INT_CSRS 12
-#define CCE_NUM_INT_MAP_CSRS 96
+#define BITS_PER_REGISTER (BITS_PER_BYTE * sizeof(u64))
#define NUM_INTERRUPT_SOURCES 768
#define RXE_NUM_CONTEXTS 160
#define RXE_PER_CONTEXT_SIZE 0x1000 /* 4k */
@@ -161,34 +159,49 @@
(CR_CREDIT_RETURN_DUE_TO_FORCE_MASK << \
CR_CREDIT_RETURN_DUE_TO_FORCE_SHIFT)
-/* interrupt source numbers */
-#define IS_GENERAL_ERR_START 0
-#define IS_SDMAENG_ERR_START 16
-#define IS_SENDCTXT_ERR_START 32
-#define IS_SDMA_START 192 /* includes SDmaProgress,SDmaIdle */
+/* Specific IRQ sources */
+#define CCE_ERR_INT 0
+#define RXE_ERR_INT 1
+#define MISC_ERR_INT 2
+#define PIO_ERR_INT 4
+#define SDMA_ERR_INT 5
+#define EGRESS_ERR_INT 6
+#define TXE_ERR_INT 7
+#define PBC_INT 240
+#define GPIO_ASSERT_INT 241
+#define QSFP1_INT 242
+#define QSFP2_INT 243
+#define TCRIT_INT 244
+
+/* interrupt source ranges */
+#define IS_FIRST_SOURCE CCE_ERR_INT
+#define IS_GENERAL_ERR_START 0
+#define IS_SDMAENG_ERR_START 16
+#define IS_SENDCTXT_ERR_START 32
+#define IS_SDMA_START 192
+#define IS_SDMA_PROGRESS_START 208
+#define IS_SDMA_IDLE_START 224
#define IS_VARIOUS_START 240
#define IS_DC_START 248
#define IS_RCVAVAIL_START 256
#define IS_RCVURGENT_START 416
#define IS_SENDCREDIT_START 576
#define IS_RESERVED_START 736
-#define IS_MAX_SOURCES 768
+#define IS_LAST_SOURCE 767
/* derived interrupt source values */
-#define IS_GENERAL_ERR_END IS_SDMAENG_ERR_START
-#define IS_SDMAENG_ERR_END IS_SENDCTXT_ERR_START
-#define IS_SENDCTXT_ERR_END IS_SDMA_START
-#define IS_SDMA_END IS_VARIOUS_START
-#define IS_VARIOUS_END IS_DC_START
-#define IS_DC_END IS_RCVAVAIL_START
-#define IS_RCVAVAIL_END IS_RCVURGENT_START
-#define IS_RCVURGENT_END IS_SENDCREDIT_START
-#define IS_SENDCREDIT_END IS_RESERVED_START
-#define IS_RESERVED_END IS_MAX_SOURCES
-
-/* absolute interrupt numbers for QSFP1Int and QSFP2Int */
-#define QSFP1_INT 242
-#define QSFP2_INT 243
+#define IS_GENERAL_ERR_END 7
+#define IS_SDMAENG_ERR_END 31
+#define IS_SENDCTXT_ERR_END 191
+#define IS_SDMA_END 207
+#define IS_SDMA_PROGRESS_END 223
+#define IS_SDMA_IDLE_END 239
+#define IS_VARIOUS_END 244
+#define IS_DC_END 255
+#define IS_RCVAVAIL_END 415
+#define IS_RCVURGENT_END 575
+#define IS_SENDCREDIT_END 735
+#define IS_RESERVED_END IS_LAST_SOURCE
/* DCC_CFG_PORT_CONFIG logical link states */
#define LSTATE_DOWN 0x1
@@ -1416,6 +1429,18 @@ void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality);
void hfi1_init_vnic_rsm(struct hfi1_devdata *dd);
void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd);
+irqreturn_t general_interrupt(int irq, void *data);
+irqreturn_t sdma_interrupt(int irq, void *data);
+irqreturn_t receive_context_interrupt(int irq, void *data);
+irqreturn_t receive_context_thread(int irq, void *data);
+
+int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set);
+void init_qsfp_int(struct hfi1_devdata *dd);
+void clear_all_interrupts(struct hfi1_devdata *dd);
+void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr);
+void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr);
+void reset_interrupts(struct hfi1_devdata *dd);
+
/*
* Interrupt source table.
*
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index ee6dca5e2a2f..c6163a347e93 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -878,6 +878,10 @@
#define SEND_CTRL (TXE + 0x000000000000)
#define SEND_CTRL_CM_RESET_SMASK 0x4ull
#define SEND_CTRL_SEND_ENABLE_SMASK 0x1ull
+#define SEND_CTRL_UNSUPPORTED_VL_SHIFT 3
+#define SEND_CTRL_UNSUPPORTED_VL_MASK 0xFFull
+#define SEND_CTRL_UNSUPPORTED_VL_SMASK (SEND_CTRL_UNSUPPORTED_VL_MASK \
+ << SEND_CTRL_UNSUPPORTED_VL_SHIFT)
#define SEND_CTRL_VL_ARBITER_ENABLE_SMASK 0x2ull
#define SEND_CTXT_CHECK_ENABLE (TXE + 0x000000100080)
#define SEND_CTXT_CHECK_ENABLE_CHECK_BYPASS_VL_MAPPING_SMASK 0x80ull
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 1fc75647e47b..c22ebc774a6a 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -681,7 +681,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
HFI1_RCVCTRL_TAILUPD_DIS |
HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
- HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
+ HFI1_RCVCTRL_NO_EGR_DROP_DIS |
+ HFI1_RCVCTRL_URGENT_DIS, uctxt);
/* Clear the context's J_KEY */
hfi1_clear_ctxt_jkey(dd, uctxt);
/*
@@ -1096,6 +1097,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt)
hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey);
rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
+ rcvctrl_ops |= HFI1_RCVCTRL_URGENT_ENB;
if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP))
rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB;
/*
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index d9470317983f..1401b6ea4a28 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -80,6 +80,7 @@
#include "qsfp.h"
#include "platform.h"
#include "affinity.h"
+#include "msix.h"
/* bumped 1 from s/w major version of TrueScale */
#define HFI1_CHIP_VERS_MAJ 3U
@@ -620,6 +621,8 @@ struct rvt_sge_state;
#define HFI1_RCVCTRL_NO_RHQ_DROP_DIS 0x8000
#define HFI1_RCVCTRL_NO_EGR_DROP_ENB 0x10000
#define HFI1_RCVCTRL_NO_EGR_DROP_DIS 0x20000
+#define HFI1_RCVCTRL_URGENT_ENB 0x40000
+#define HFI1_RCVCTRL_URGENT_DIS 0x80000
/* partition enforcement flags */
#define HFI1_PART_ENFORCE_IN 0x1
@@ -667,6 +670,14 @@ struct hfi1_msix_entry {
struct irq_affinity_notify notify;
};
+struct hfi1_msix_info {
+ /* lock to synchronize in_use_msix access */
+ spinlock_t msix_lock;
+ DECLARE_BITMAP(in_use_msix, CCE_NUM_MSIX_VECTORS);
+ struct hfi1_msix_entry *msix_entries;
+ u16 max_requested;
+};
+
/* per-SL CCA information */
struct cca_timer {
struct hrtimer hrtimer;
@@ -992,7 +1003,6 @@ struct hfi1_vnic_data {
struct idr vesw_idr;
u8 rmt_start;
u8 num_ctxt;
- u32 msix_idx;
};
struct hfi1_vnic_vport_info;
@@ -1205,11 +1215,6 @@ struct hfi1_devdata {
struct diag_client *diag_client;
- /* MSI-X information */
- struct hfi1_msix_entry *msix_entries;
- u32 num_msix_entries;
- u32 first_dyn_msix_idx;
-
/* general interrupt: mask of handled interrupts */
u64 gi_mask[CCE_NUM_INT_CSRS];
@@ -1223,6 +1228,9 @@ struct hfi1_devdata {
*/
struct timer_list synth_stats_timer;
+ /* MSI-X information */
+ struct hfi1_msix_info msix_info;
+
/*
* device counters
*/
@@ -1349,6 +1357,8 @@ struct hfi1_devdata {
/* vnic data */
struct hfi1_vnic_data vnic;
+ /* Lock to protect IRQ SRC register access */
+ spinlock_t irq_src_lock;
};
static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare)
@@ -1431,9 +1441,6 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread);
int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread);
int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread);
void set_all_slowpath(struct hfi1_devdata *dd);
-void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd);
-void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd);
-void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd);
extern const struct pci_device_id hfi1_pci_tbl[];
void hfi1_make_ud_req_9B(struct rvt_qp *qp,
@@ -1887,10 +1894,8 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
#define HFI1_CTXT_WAITING_URG 4
/* free up any allocated data at closes */
-struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
- const struct pci_device_id *ent);
+int hfi1_init_dd(struct hfi1_devdata *dd);
void hfi1_free_devdata(struct hfi1_devdata *dd);
-struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra);
/* LED beaconing functions */
void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
@@ -1963,6 +1968,7 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
*/
extern const char ib_hfi1_version[];
+extern const struct attribute_group ib_hfi1_attr_group;
int hfi1_device_create(struct hfi1_devdata *dd);
void hfi1_device_remove(struct hfi1_devdata *dd);
@@ -1974,16 +1980,15 @@ void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd);
/* Hook for sysfs read of QSFP */
int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len);
-int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent);
-void hfi1_clean_up_interrupts(struct hfi1_devdata *dd);
+int hfi1_pcie_init(struct hfi1_devdata *dd);
void hfi1_pcie_cleanup(struct pci_dev *pdev);
int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev);
void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
int pcie_speeds(struct hfi1_devdata *dd);
-int request_msix(struct hfi1_devdata *dd, u32 msireq);
int restore_pci_variables(struct hfi1_devdata *dd);
int save_pci_variables(struct hfi1_devdata *dd);
int do_pcie_gen3_transition(struct hfi1_devdata *dd);
+void tune_pcie_caps(struct hfi1_devdata *dd);
int parse_platform_config(struct hfi1_devdata *dd);
int get_platform_config_field(struct hfi1_devdata *dd,
enum platform_config_table_type_encoding
@@ -2124,19 +2129,6 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
return base_sdma_integrity;
}
-/*
- * hfi1_early_err is used (only!) to print early errors before devdata is
- * allocated, or when dd->pcidev may not be valid, and at the tail end of
- * cleanup when devdata may have been freed, etc. hfi1_dev_porterr is
- * the same as dd_dev_err, but is used when the message really needs
- * the IB port# to be definitive as to what's happening..
- */
-#define hfi1_early_err(dev, fmt, ...) \
- dev_err(dev, fmt, ##__VA_ARGS__)
-
-#define hfi1_early_info(dev, fmt, ...) \
- dev_info(dev, fmt, ##__VA_ARGS__)
-
#define dd_dev_emerg(dd, fmt, ...) \
dev_emerg(&(dd)->pcidev->dev, "%s: " fmt, \
rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 758d273c32cf..09044905284f 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -83,6 +83,8 @@
#define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
#define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
+#define NUM_IB_PORTS 1
+
/*
* Number of user receive contexts we are configured to use (to allow for more
* pio buffers per ctxt, etc.) Zero means use one user context per CPU.
@@ -654,9 +656,8 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
if (loopback) {
- hfi1_early_err(&pdev->dev,
- "Faking data partition 0x8001 in idx %u\n",
- !default_pkey_idx);
+ dd_dev_err(dd, "Faking data partition 0x8001 in idx %u\n",
+ !default_pkey_idx);
ppd->pkeys[!default_pkey_idx] = 0x8001;
}
@@ -702,9 +703,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
return;
bail:
-
- hfi1_early_err(&pdev->dev,
- "Congestion Control Agent disabled for port %d\n", port);
+ dd_dev_err(dd, "Congestion Control Agent disabled for port %d\n", port);
}
/*
@@ -833,6 +832,23 @@ wq_error:
}
/**
+ * enable_general_intr() - Enable the IRQs that will be handled by the
+ * general interrupt handler.
+ * @dd: valid devdata
+ *
+ */
+static void enable_general_intr(struct hfi1_devdata *dd)
+{
+ set_intr_bits(dd, CCE_ERR_INT, MISC_ERR_INT, true);
+ set_intr_bits(dd, PIO_ERR_INT, TXE_ERR_INT, true);
+ set_intr_bits(dd, IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END, true);
+ set_intr_bits(dd, PBC_INT, GPIO_ASSERT_INT, true);
+ set_intr_bits(dd, TCRIT_INT, TCRIT_INT, true);
+ set_intr_bits(dd, IS_DC_START, IS_DC_END, true);
+ set_intr_bits(dd, IS_SENDCREDIT_START, IS_SENDCREDIT_END, true);
+}
+
+/**
* hfi1_init - do the actual initialization sequence on the chip
* @dd: the hfi1_ib device
* @reinit: re-initializing, so don't allocate new memory
@@ -916,6 +932,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
"failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
ret = lastfail;
}
+ /* enable IRQ */
hfi1_rcd_put(rcd);
}
@@ -954,7 +971,8 @@ done:
HFI1_STATUS_INITTED;
if (!ret) {
/* enable all interrupts from the chip */
- set_intr_state(dd, 1);
+ enable_general_intr(dd);
+ init_qsfp_int(dd);
/* chip is OK for user apps; mark it as initialized */
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
@@ -1051,9 +1069,9 @@ static void shutdown_device(struct hfi1_devdata *dd)
}
dd->flags &= ~HFI1_INITTED;
- /* mask and clean up interrupts, but not errors */
- set_intr_state(dd, 0);
- hfi1_clean_up_interrupts(dd);
+ /* mask and clean up interrupts */
+ set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
+ msix_clean_up_interrupts(dd);
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
@@ -1246,15 +1264,19 @@ void hfi1_free_devdata(struct hfi1_devdata *dd)
kobject_put(&dd->kobj);
}
-/*
- * Allocate our primary per-unit data structure. Must be done via verbs
- * allocator, because the verbs cleanup process both does cleanup and
- * free of the data structure.
+/**
+ * hfi1_alloc_devdata - Allocate our primary per-unit data structure.
+ * @pdev: Valid PCI device
+ * @extra: How many bytes to alloc past the default
+ *
+ * Must be done via verbs allocator, because the verbs cleanup process
+ * both does cleanup and free of the data structure.
* "extra" is for chip-specific data.
*
* Use the idr mechanism to get a unit number for this unit.
*/
-struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
+static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
+ size_t extra)
{
unsigned long flags;
struct hfi1_devdata *dd;
@@ -1287,8 +1309,8 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
idr_preload_end();
if (ret < 0) {
- hfi1_early_err(&pdev->dev,
- "Could not allocate unit ID: error %d\n", -ret);
+ dev_err(&pdev->dev,
+ "Could not allocate unit ID: error %d\n", -ret);
goto bail;
}
rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit);
@@ -1309,6 +1331,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
spin_lock_init(&dd->pio_map_lock);
mutex_init(&dd->dc8051_lock);
init_waitqueue_head(&dd->event_queue);
+ spin_lock_init(&dd->irq_src_lock);
dd->int_counter = alloc_percpu(u64);
if (!dd->int_counter) {
@@ -1481,9 +1504,6 @@ static int __init hfi1_mod_init(void)
idr_init(&hfi1_unit_table);
hfi1_dbg_init();
- ret = hfi1_wss_init();
- if (ret < 0)
- goto bail_wss;
ret = pci_register_driver(&hfi1_pci_driver);
if (ret < 0) {
pr_err("Unable to register driver: error %d\n", -ret);
@@ -1492,8 +1512,6 @@ static int __init hfi1_mod_init(void)
goto bail; /* all OK */
bail_dev:
- hfi1_wss_exit();
-bail_wss:
hfi1_dbg_exit();
idr_destroy(&hfi1_unit_table);
dev_cleanup();
@@ -1510,7 +1528,6 @@ static void __exit hfi1_mod_cleanup(void)
{
pci_unregister_driver(&hfi1_pci_driver);
node_affinity_destroy_all();
- hfi1_wss_exit();
hfi1_dbg_exit();
idr_destroy(&hfi1_unit_table);
@@ -1604,23 +1621,23 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
hfi1_free_devdata(dd);
}
-static int init_validate_rcvhdrcnt(struct device *dev, uint thecnt)
+static int init_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt)
{
if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
- hfi1_early_err(dev, "Receive header queue count too small\n");
+ dd_dev_err(dd, "Receive header queue count too small\n");
return -EINVAL;
}
if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
- hfi1_early_err(dev,
- "Receive header queue count cannot be greater than %u\n",
- HFI1_MAX_HDRQ_EGRBUF_CNT);
+ dd_dev_err(dd,
+ "Receive header queue count cannot be greater than %u\n",
+ HFI1_MAX_HDRQ_EGRBUF_CNT);
return -EINVAL;
}
if (thecnt % HDRQ_INCREMENT) {
- hfi1_early_err(dev, "Receive header queue count %d must be divisible by %lu\n",
- thecnt, HDRQ_INCREMENT);
+ dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n",
+ thecnt, HDRQ_INCREMENT);
return -EINVAL;
}
@@ -1639,22 +1656,29 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Validate dev ids */
if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
ent->device == PCI_DEVICE_ID_INTEL1)) {
- hfi1_early_err(&pdev->dev,
- "Failing on unknown Intel deviceid 0x%x\n",
- ent->device);
+ dev_err(&pdev->dev, "Failing on unknown Intel deviceid 0x%x\n",
+ ent->device);
ret = -ENODEV;
goto bail;
}
+ /* Allocate the dd so we can get to work */
+ dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
+ sizeof(struct hfi1_pportdata));
+ if (IS_ERR(dd)) {
+ ret = PTR_ERR(dd);
+ goto bail;
+ }
+
/* Validate some global module parameters */
- ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt);
+ ret = init_validate_rcvhdrcnt(dd, rcvhdrcnt);
if (ret)
goto bail;
/* use the encoding function as a sanitization check */
if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) {
- hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n",
- hfi1_hdrq_entsize);
+ dd_dev_err(dd, "Invalid HdrQ Entry size %u\n",
+ hfi1_hdrq_entsize);
ret = -EINVAL;
goto bail;
}
@@ -1676,10 +1700,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
clamp_val(eager_buffer_size,
MIN_EAGER_BUFFER * 8,
MAX_EAGER_BUFFER_TOTAL);
- hfi1_early_info(&pdev->dev, "Eager buffer size %u\n",
- eager_buffer_size);
+ dd_dev_info(dd, "Eager buffer size %u\n",
+ eager_buffer_size);
} else {
- hfi1_early_err(&pdev->dev, "Invalid Eager buffer size of 0\n");
+ dd_dev_err(dd, "Invalid Eager buffer size of 0\n");
ret = -EINVAL;
goto bail;
}
@@ -1687,7 +1711,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* restrict value of hfi1_rcvarr_split */
hfi1_rcvarr_split = clamp_val(hfi1_rcvarr_split, 0, 100);
- ret = hfi1_pcie_init(pdev, ent);
+ ret = hfi1_pcie_init(dd);
if (ret)
goto bail;
@@ -1695,12 +1719,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
* Do device-specific initialization, function table setup, dd
* allocation, etc.
*/
- dd = hfi1_init_dd(pdev, ent);
-
- if (IS_ERR(dd)) {
- ret = PTR_ERR(dd);
+ ret = hfi1_init_dd(dd);
+ if (ret)
goto clean_bail; /* error already printed */
- }
ret = create_workqueues(dd);
if (ret)
@@ -1731,7 +1752,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
dd_dev_err(dd, "Failed to create /dev devices: %d\n", -j);
if (initfail || ret) {
- hfi1_clean_up_interrupts(dd);
+ msix_clean_up_interrupts(dd);
stop_timers(dd);
flush_workqueue(ib_wq);
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
diff --git a/drivers/infiniband/hw/hfi1/iowait.c b/drivers/infiniband/hw/hfi1/iowait.c
new file mode 100644
index 000000000000..582f1ba136ff
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/iowait.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+#include "iowait.h"
+#include "trace_iowait.h"
+
+void iowait_set_flag(struct iowait *wait, u32 flag)
+{
+ trace_hfi1_iowait_set(wait, flag);
+ set_bit(flag, &wait->flags);
+}
+
+bool iowait_flag_set(struct iowait *wait, u32 flag)
+{
+ return test_bit(flag, &wait->flags);
+}
+
+inline void iowait_clear_flag(struct iowait *wait, u32 flag)
+{
+ trace_hfi1_iowait_clear(wait, flag);
+ clear_bit(flag, &wait->flags);
+}
+
+/**
+ * iowait_init() - initialize wait structure
+ * @wait: wait struct to initialize
+ * @tx_limit: limit for overflow queuing
+ * @func: restart function for workqueue
+ * @sleep: sleep function for no space
+ * @resume: wakeup function for no space
+ *
+ * This function initializes the iowait
+ * structure embedded in the QP or PQ.
+ *
+ */
+void iowait_init(struct iowait *wait, u32 tx_limit,
+ void (*func)(struct work_struct *work),
+ void (*tidfunc)(struct work_struct *work),
+ int (*sleep)(struct sdma_engine *sde,
+ struct iowait_work *wait,
+ struct sdma_txreq *tx,
+ uint seq,
+ bool pkts_sent),
+ void (*wakeup)(struct iowait *wait, int reason),
+ void (*sdma_drained)(struct iowait *wait))
+{
+ int i;
+
+ wait->count = 0;
+ INIT_LIST_HEAD(&wait->list);
+ init_waitqueue_head(&wait->wait_dma);
+ init_waitqueue_head(&wait->wait_pio);
+ atomic_set(&wait->sdma_busy, 0);
+ atomic_set(&wait->pio_busy, 0);
+ wait->tx_limit = tx_limit;
+ wait->sleep = sleep;
+ wait->wakeup = wakeup;
+ wait->sdma_drained = sdma_drained;
+ wait->flags = 0;
+ for (i = 0; i < IOWAIT_SES; i++) {
+ wait->wait[i].iow = wait;
+ INIT_LIST_HEAD(&wait->wait[i].tx_head);
+ if (i == IOWAIT_IB_SE)
+ INIT_WORK(&wait->wait[i].iowork, func);
+ else
+ INIT_WORK(&wait->wait[i].iowork, tidfunc);
+ }
+}
+
+/**
+ * iowait_cancel_work - cancel all work in iowait
+ * @w: the iowait struct
+ */
+void iowait_cancel_work(struct iowait *w)
+{
+ cancel_work_sync(&iowait_get_ib_work(w)->iowork);
+ cancel_work_sync(&iowait_get_tid_work(w)->iowork);
+}
+
+/**
+ * iowait_set_work_flag - set work flag based on leg
+ * @w - the iowait work struct
+ */
+int iowait_set_work_flag(struct iowait_work *w)
+{
+ if (w == &w->iow->wait[IOWAIT_IB_SE]) {
+ iowait_set_flag(w->iow, IOWAIT_PENDING_IB);
+ return IOWAIT_IB_SE;
+ }
+ iowait_set_flag(w->iow, IOWAIT_PENDING_TID);
+ return IOWAIT_TID_SE;
+}
diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index 3d9c32c7c340..23a58ac0d47c 100644
--- a/drivers/infiniband/hw/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h
@@ -1,7 +1,7 @@
#ifndef _HFI1_IOWAIT_H
#define _HFI1_IOWAIT_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -49,6 +49,7 @@
#include <linux/list.h>
#include <linux/workqueue.h>
+#include <linux/wait.h>
#include <linux/sched.h>
#include "sdma_txreq.h"
@@ -59,16 +60,47 @@
*/
typedef void (*restart_t)(struct work_struct *work);
+#define IOWAIT_PENDING_IB 0x0
+#define IOWAIT_PENDING_TID 0x1
+
+/*
+ * A QP can have multiple Send Engines (SEs).
+ *
+ * The current use case is for supporting a TID RDMA
+ * packet build/xmit mechanism independent from verbs.
+ */
+#define IOWAIT_SES 2
+#define IOWAIT_IB_SE 0
+#define IOWAIT_TID_SE 1
+
struct sdma_txreq;
struct sdma_engine;
/**
- * struct iowait - linkage for delayed progress/waiting
+ * @iowork: the work struct
+ * @tx_head: list of prebuilt packets
+ * @iow: the parent iowait structure
+ *
+ * This structure is the work item (process) specific
+ * details associated with the each of the two SEs of the
+ * QP.
+ *
+ * The workstruct and the queued TXs are unique to each
+ * SE.
+ */
+struct iowait;
+struct iowait_work {
+ struct work_struct iowork;
+ struct list_head tx_head;
+ struct iowait *iow;
+};
+
+/**
* @list: used to add/insert into QP/PQ wait lists
- * @lock: uses to record the list head lock
* @tx_head: overflow list of sdma_txreq's
* @sleep: no space callback
* @wakeup: space callback wakeup
* @sdma_drained: sdma count drained
+ * @lock: lock protected head of wait queue
* @iowork: workqueue overhead
* @wait_dma: wait for sdma_busy == 0
* @wait_pio: wait for pio_busy == 0
@@ -76,6 +108,8 @@ struct sdma_engine;
* @count: total number of descriptors in tx_head'ed list
* @tx_limit: limit for overflow queuing
* @tx_count: number of tx entry's in tx_head'ed list
+ * @flags: wait flags (one per QP)
+ * @wait: SE array
*
* This is to be embedded in user's state structure
* (QP or PQ).
@@ -98,13 +132,11 @@ struct sdma_engine;
* Waiters explicity know that, but the destroy
* code that unwaits QPs does not.
*/
-
struct iowait {
struct list_head list;
- struct list_head tx_head;
int (*sleep)(
struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *tx,
uint seq,
bool pkts_sent
@@ -112,7 +144,6 @@ struct iowait {
void (*wakeup)(struct iowait *wait, int reason);
void (*sdma_drained)(struct iowait *wait);
seqlock_t *lock;
- struct work_struct iowork;
wait_queue_head_t wait_dma;
wait_queue_head_t wait_pio;
atomic_t sdma_busy;
@@ -121,63 +152,37 @@ struct iowait {
u32 tx_limit;
u32 tx_count;
u8 starved_cnt;
+ unsigned long flags;
+ struct iowait_work wait[IOWAIT_SES];
};
#define SDMA_AVAIL_REASON 0
-/**
- * iowait_init() - initialize wait structure
- * @wait: wait struct to initialize
- * @tx_limit: limit for overflow queuing
- * @func: restart function for workqueue
- * @sleep: sleep function for no space
- * @resume: wakeup function for no space
- *
- * This function initializes the iowait
- * structure embedded in the QP or PQ.
- *
- */
+void iowait_set_flag(struct iowait *wait, u32 flag);
+bool iowait_flag_set(struct iowait *wait, u32 flag);
+void iowait_clear_flag(struct iowait *wait, u32 flag);
-static inline void iowait_init(
- struct iowait *wait,
- u32 tx_limit,
- void (*func)(struct work_struct *work),
- int (*sleep)(
- struct sdma_engine *sde,
- struct iowait *wait,
- struct sdma_txreq *tx,
- uint seq,
- bool pkts_sent),
- void (*wakeup)(struct iowait *wait, int reason),
- void (*sdma_drained)(struct iowait *wait))
-{
- wait->count = 0;
- wait->lock = NULL;
- INIT_LIST_HEAD(&wait->list);
- INIT_LIST_HEAD(&wait->tx_head);
- INIT_WORK(&wait->iowork, func);
- init_waitqueue_head(&wait->wait_dma);
- init_waitqueue_head(&wait->wait_pio);
- atomic_set(&wait->sdma_busy, 0);
- atomic_set(&wait->pio_busy, 0);
- wait->tx_limit = tx_limit;
- wait->sleep = sleep;
- wait->wakeup = wakeup;
- wait->sdma_drained = sdma_drained;
-}
+void iowait_init(struct iowait *wait, u32 tx_limit,
+ void (*func)(struct work_struct *work),
+ void (*tidfunc)(struct work_struct *work),
+ int (*sleep)(struct sdma_engine *sde,
+ struct iowait_work *wait,
+ struct sdma_txreq *tx,
+ uint seq,
+ bool pkts_sent),
+ void (*wakeup)(struct iowait *wait, int reason),
+ void (*sdma_drained)(struct iowait *wait));
/**
- * iowait_schedule() - initialize wait structure
+ * iowait_schedule() - schedule the default send engine work
* @wait: wait struct to schedule
* @wq: workqueue for schedule
* @cpu: cpu
*/
-static inline void iowait_schedule(
- struct iowait *wait,
- struct workqueue_struct *wq,
- int cpu)
+static inline bool iowait_schedule(struct iowait *wait,
+ struct workqueue_struct *wq, int cpu)
{
- queue_work_on(cpu, wq, &wait->iowork);
+ return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_IB_SE].iowork);
}
/**
@@ -228,6 +233,8 @@ static inline void iowait_sdma_add(struct iowait *wait, int count)
*/
static inline int iowait_sdma_dec(struct iowait *wait)
{
+ if (!wait)
+ return 0;
return atomic_dec_and_test(&wait->sdma_busy);
}
@@ -267,11 +274,13 @@ static inline void iowait_pio_inc(struct iowait *wait)
}
/**
- * iowait_sdma_dec - note pio complete
+ * iowait_pio_dec - note pio complete
* @wait: iowait structure
*/
static inline int iowait_pio_dec(struct iowait *wait)
{
+ if (!wait)
+ return 0;
return atomic_dec_and_test(&wait->pio_busy);
}
@@ -293,9 +302,9 @@ static inline void iowait_drain_wakeup(struct iowait *wait)
/**
* iowait_get_txhead() - get packet off of iowait list
*
- * @wait wait struture
+ * @wait iowait_work struture
*/
-static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
+static inline struct sdma_txreq *iowait_get_txhead(struct iowait_work *wait)
{
struct sdma_txreq *tx = NULL;
@@ -309,6 +318,28 @@ static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
return tx;
}
+static inline u16 iowait_get_desc(struct iowait_work *w)
+{
+ u16 num_desc = 0;
+ struct sdma_txreq *tx = NULL;
+
+ if (!list_empty(&w->tx_head)) {
+ tx = list_first_entry(&w->tx_head, struct sdma_txreq,
+ list);
+ num_desc = tx->num_desc;
+ }
+ return num_desc;
+}
+
+static inline u32 iowait_get_all_desc(struct iowait *w)
+{
+ u32 num_desc = 0;
+
+ num_desc = iowait_get_desc(&w->wait[IOWAIT_IB_SE]);
+ num_desc += iowait_get_desc(&w->wait[IOWAIT_TID_SE]);
+ return num_desc;
+}
+
/**
* iowait_queue - Put the iowait on a wait queue
* @pkts_sent: have some packets been sent before queuing?
@@ -372,12 +403,57 @@ static inline void iowait_starve_find_max(struct iowait *w, u8 *max,
}
/**
- * iowait_packet_queued() - determine if a packet is already built
- * @wait: the wait structure
+ * iowait_packet_queued() - determine if a packet is queued
+ * @wait: the iowait_work structure
*/
-static inline bool iowait_packet_queued(struct iowait *wait)
+static inline bool iowait_packet_queued(struct iowait_work *wait)
{
return !list_empty(&wait->tx_head);
}
+/**
+ * inc_wait_count - increment wait counts
+ * @w: the log work struct
+ * @n: the count
+ */
+static inline void iowait_inc_wait_count(struct iowait_work *w, u16 n)
+{
+ if (!w)
+ return;
+ w->iow->tx_count++;
+ w->iow->count += n;
+}
+
+/**
+ * iowait_get_tid_work - return iowait_work for tid SE
+ * @w: the iowait struct
+ */
+static inline struct iowait_work *iowait_get_tid_work(struct iowait *w)
+{
+ return &w->wait[IOWAIT_TID_SE];
+}
+
+/**
+ * iowait_get_ib_work - return iowait_work for ib SE
+ * @w: the iowait struct
+ */
+static inline struct iowait_work *iowait_get_ib_work(struct iowait *w)
+{
+ return &w->wait[IOWAIT_IB_SE];
+}
+
+/**
+ * iowait_ioww_to_iow - return iowait given iowait_work
+ * @w: the iowait_work struct
+ */
+static inline struct iowait *iowait_ioww_to_iow(struct iowait_work *w)
+{
+ if (likely(w))
+ return w->iow;
+ return NULL;
+}
+
+void iowait_cancel_work(struct iowait *w);
+int iowait_set_work_flag(struct iowait_work *w);
+
#endif
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 0307405491e0..88a0cf930136 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015-2017 Intel Corporation.
+ * Copyright(c) 2015-2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -4836,7 +4836,7 @@ static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
int ret;
int pkey_idx;
int local_mad = 0;
- u32 resp_len = 0;
+ u32 resp_len = in_wc->byte_len - sizeof(*in_grh);
struct hfi1_ibport *ibp = to_iport(ibdev, port);
pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c
new file mode 100644
index 000000000000..d920b165d696
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/msix.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "hfi.h"
+#include "affinity.h"
+#include "sdma.h"
+
+/**
+ * msix_initialize() - Calculate, request and configure MSIx IRQs
+ * @dd: valid hfi1 devdata
+ *
+ */
+int msix_initialize(struct hfi1_devdata *dd)
+{
+ u32 total;
+ int ret;
+ struct hfi1_msix_entry *entries;
+
+ /*
+ * MSIx interrupt count:
+ * one for the general, "slow path" interrupt
+ * one per used SDMA engine
+ * one per kernel receive context
+ * one for each VNIC context
+ * ...any new IRQs should be added here.
+ */
+ total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts;
+
+ if (total >= CCE_NUM_MSIX_VECTORS)
+ return -EINVAL;
+
+ ret = pci_alloc_irq_vectors(dd->pcidev, total, total, PCI_IRQ_MSIX);
+ if (ret < 0) {
+ dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", ret);
+ return ret;
+ }
+
+ entries = kcalloc(total, sizeof(*dd->msix_info.msix_entries),
+ GFP_KERNEL);
+ if (!entries) {
+ pci_free_irq_vectors(dd->pcidev);
+ return -ENOMEM;
+ }
+
+ dd->msix_info.msix_entries = entries;
+ spin_lock_init(&dd->msix_info.msix_lock);
+ bitmap_zero(dd->msix_info.in_use_msix, total);
+ dd->msix_info.max_requested = total;
+ dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
+
+ return 0;
+}
+
+/**
+ * msix_request_irq() - Allocate a free MSIx IRQ
+ * @dd: valid devdata
+ * @arg: context information for the IRQ
+ * @handler: IRQ handler
+ * @thread: IRQ thread handler (could be NULL)
+ * @idx: zero base idx if multiple devices are needed
+ * @type: affinty IRQ type
+ *
+ * Allocated an MSIx vector if available, and then create the appropriate
+ * meta data needed to keep track of the pci IRQ request.
+ *
+ * Return:
+ * < 0 Error
+ * >= 0 MSIx vector
+ *
+ */
+static int msix_request_irq(struct hfi1_devdata *dd, void *arg,
+ irq_handler_t handler, irq_handler_t thread,
+ u32 idx, enum irq_type type)
+{
+ unsigned long nr;
+ int irq;
+ int ret;
+ const char *err_info;
+ char name[MAX_NAME_SIZE];
+ struct hfi1_msix_entry *me;
+
+ /* Allocate an MSIx vector */
+ spin_lock(&dd->msix_info.msix_lock);
+ nr = find_first_zero_bit(dd->msix_info.in_use_msix,
+ dd->msix_info.max_requested);
+ if (nr < dd->msix_info.max_requested)
+ __set_bit(nr, dd->msix_info.in_use_msix);
+ spin_unlock(&dd->msix_info.msix_lock);
+
+ if (nr == dd->msix_info.max_requested)
+ return -ENOSPC;
+
+ /* Specific verification and determine the name */
+ switch (type) {
+ case IRQ_GENERAL:
+ /* general interrupt must be MSIx vector 0 */
+ if (nr) {
+ spin_lock(&dd->msix_info.msix_lock);
+ __clear_bit(nr, dd->msix_info.in_use_msix);
+ spin_unlock(&dd->msix_info.msix_lock);
+ dd_dev_err(dd, "Invalid index %lu for GENERAL IRQ\n",
+ nr);
+ return -EINVAL;
+ }
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit);
+ err_info = "general";
+ break;
+ case IRQ_SDMA:
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d",
+ dd->unit, idx);
+ err_info = "sdma";
+ break;
+ case IRQ_RCVCTXT:
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d",
+ dd->unit, idx);
+ err_info = "receive context";
+ break;
+ case IRQ_OTHER:
+ default:
+ return -EINVAL;
+ }
+ name[sizeof(name) - 1] = 0;
+
+ irq = pci_irq_vector(dd->pcidev, nr);
+ ret = pci_request_irq(dd->pcidev, nr, handler, thread, arg, name);
+ if (ret) {
+ dd_dev_err(dd,
+ "%s: request for IRQ %d failed, MSIx %d, err %d\n",
+ err_info, irq, idx, ret);
+ spin_lock(&dd->msix_info.msix_lock);
+ __clear_bit(nr, dd->msix_info.in_use_msix);
+ spin_unlock(&dd->msix_info.msix_lock);
+ return ret;
+ }
+
+ /*
+ * assign arg after pci_request_irq call, so it will be
+ * cleaned up
+ */
+ me = &dd->msix_info.msix_entries[nr];
+ me->irq = irq;
+ me->arg = arg;
+ me->type = type;
+
+ /* This is a request, so a failure is not fatal */
+ ret = hfi1_get_irq_affinity(dd, me);
+ if (ret)
+ dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
+
+ return nr;
+}
+
+/**
+ * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs
+ * @rcd: valid rcd context
+ *
+ */
+int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
+{
+ int nr;
+
+ nr = msix_request_irq(rcd->dd, rcd, receive_context_interrupt,
+ receive_context_thread, rcd->ctxt, IRQ_RCVCTXT);
+ if (nr < 0)
+ return nr;
+
+ /*
+ * Set the interrupt register and mask for this
+ * context's interrupt.
+ */
+ rcd->ireg = (IS_RCVAVAIL_START + rcd->ctxt) / 64;
+ rcd->imask = ((u64)1) << ((IS_RCVAVAIL_START + rcd->ctxt) % 64);
+ rcd->msix_intr = nr;
+ remap_intr(rcd->dd, IS_RCVAVAIL_START + rcd->ctxt, nr);
+
+ return 0;
+}
+
+/**
+ * msix_request_smda_ira() - Helper for getting SDMA IRQ resources
+ * @sde: valid sdma engine
+ *
+ */
+int msix_request_sdma_irq(struct sdma_engine *sde)
+{
+ int nr;
+
+ nr = msix_request_irq(sde->dd, sde, sdma_interrupt, NULL,
+ sde->this_idx, IRQ_SDMA);
+ if (nr < 0)
+ return nr;
+ sde->msix_intr = nr;
+ remap_sdma_interrupts(sde->dd, sde->this_idx, nr);
+
+ return 0;
+}
+
+/**
+ * enable_sdma_src() - Helper to enable SDMA IRQ srcs
+ * @dd: valid devdata structure
+ * @i: index of SDMA engine
+ */
+static void enable_sdma_srcs(struct hfi1_devdata *dd, int i)
+{
+ set_intr_bits(dd, IS_SDMA_START + i, IS_SDMA_START + i, true);
+ set_intr_bits(dd, IS_SDMA_PROGRESS_START + i,
+ IS_SDMA_PROGRESS_START + i, true);
+ set_intr_bits(dd, IS_SDMA_IDLE_START + i, IS_SDMA_IDLE_START + i, true);
+ set_intr_bits(dd, IS_SDMAENG_ERR_START + i, IS_SDMAENG_ERR_START + i,
+ true);
+}
+
+/**
+ * msix_request_irqs() - Allocate all MSIx IRQs
+ * @dd: valid devdata structure
+ *
+ * Helper function to request the used MSIx IRQs.
+ *
+ */
+int msix_request_irqs(struct hfi1_devdata *dd)
+{
+ int i;
+ int ret;
+
+ ret = msix_request_irq(dd, dd, general_interrupt, NULL, 0, IRQ_GENERAL);
+ if (ret < 0)
+ return ret;
+
+ for (i = 0; i < dd->num_sdma; i++) {
+ struct sdma_engine *sde = &dd->per_sdma[i];
+
+ ret = msix_request_sdma_irq(sde);
+ if (ret)
+ return ret;
+ enable_sdma_srcs(sde->dd, i);
+ }
+
+ for (i = 0; i < dd->n_krcv_queues; i++) {
+ struct hfi1_ctxtdata *rcd = hfi1_rcd_get_by_index_safe(dd, i);
+
+ if (rcd)
+ ret = msix_request_rcd_irq(rcd);
+ hfi1_rcd_put(rcd);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * msix_free_irq() - Free the specified MSIx resources and IRQ
+ * @dd: valid devdata
+ * @msix_intr: MSIx vector to free.
+ *
+ */
+void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr)
+{
+ struct hfi1_msix_entry *me;
+
+ if (msix_intr >= dd->msix_info.max_requested)
+ return;
+
+ me = &dd->msix_info.msix_entries[msix_intr];
+
+ if (!me->arg) /* => no irq, no affinity */
+ return;
+
+ hfi1_put_irq_affinity(dd, me);
+ pci_free_irq(dd->pcidev, msix_intr, me->arg);
+
+ me->arg = NULL;
+
+ spin_lock(&dd->msix_info.msix_lock);
+ __clear_bit(msix_intr, dd->msix_info.in_use_msix);
+ spin_unlock(&dd->msix_info.msix_lock);
+}
+
+/**
+ * hfi1_clean_up_msix_interrupts() - Free all MSIx IRQ resources
+ * @dd: valid device data data structure
+ *
+ * Free the MSIx and associated PCI resources, if they have been allocated.
+ */
+void msix_clean_up_interrupts(struct hfi1_devdata *dd)
+{
+ int i;
+ struct hfi1_msix_entry *me = dd->msix_info.msix_entries;
+
+ /* remove irqs - must happen before disabling/turning off */
+ for (i = 0; i < dd->msix_info.max_requested; i++, me++)
+ msix_free_irq(dd, i);
+
+ /* clean structures */
+ kfree(dd->msix_info.msix_entries);
+ dd->msix_info.msix_entries = NULL;
+ dd->msix_info.max_requested = 0;
+
+ pci_free_irq_vectors(dd->pcidev);
+}
+
+/**
+ * msix_vnic_syncrhonize_irq() - Vnic IRQ synchronize
+ * @dd: valid devdata
+ */
+void msix_vnic_synchronize_irq(struct hfi1_devdata *dd)
+{
+ int i;
+
+ for (i = 0; i < dd->vnic.num_ctxt; i++) {
+ struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
+ struct hfi1_msix_entry *me;
+
+ me = &dd->msix_info.msix_entries[rcd->msix_intr];
+
+ synchronize_irq(me->irq);
+ }
+}
diff --git a/drivers/infiniband/hw/hfi1/msix.h b/drivers/infiniband/hw/hfi1/msix.h
new file mode 100644
index 000000000000..a514881632a4
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/msix.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef _HFI1_MSIX_H
+#define _HFI1_MSIX_H
+
+#include "hfi.h"
+
+/* MSIx interface */
+int msix_initialize(struct hfi1_devdata *dd);
+int msix_request_irqs(struct hfi1_devdata *dd);
+void msix_clean_up_interrupts(struct hfi1_devdata *dd);
+int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd);
+int msix_request_sdma_irq(struct sdma_engine *sde);
+void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr);
+
+/* VNIC interface */
+void msix_vnic_synchronize_irq(struct hfi1_devdata *dd);
+
+#endif
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index cca413eaa74e..c96d193bb236 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -61,19 +61,12 @@
*/
/*
- * Code to adjust PCIe capabilities.
- */
-static void tune_pcie_caps(struct hfi1_devdata *);
-
-/*
* Do all the common PCIe setup and initialization.
- * devdata is not yet allocated, and is not allocated until after this
- * routine returns success. Therefore dd_dev_err() can't be used for error
- * printing.
*/
-int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
+int hfi1_pcie_init(struct hfi1_devdata *dd)
{
int ret;
+ struct pci_dev *pdev = dd->pcidev;
ret = pci_enable_device(pdev);
if (ret) {
@@ -89,15 +82,13 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
* about that, it appears. If the original BAR was retained
* in the kernel data structures, this may be OK.
*/
- hfi1_early_err(&pdev->dev, "pci enable failed: error %d\n",
- -ret);
- goto done;
+ dd_dev_err(dd, "pci enable failed: error %d\n", -ret);
+ return ret;
}
ret = pci_request_regions(pdev, DRIVER_NAME);
if (ret) {
- hfi1_early_err(&pdev->dev,
- "pci_request_regions fails: err %d\n", -ret);
+ dd_dev_err(dd, "pci_request_regions fails: err %d\n", -ret);
goto bail;
}
@@ -110,8 +101,7 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
*/
ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (ret) {
- hfi1_early_err(&pdev->dev,
- "Unable to set DMA mask: %d\n", ret);
+ dd_dev_err(dd, "Unable to set DMA mask: %d\n", ret);
goto bail;
}
ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
@@ -119,18 +109,16 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
}
if (ret) {
- hfi1_early_err(&pdev->dev,
- "Unable to set DMA consistent mask: %d\n", ret);
+ dd_dev_err(dd, "Unable to set DMA consistent mask: %d\n", ret);
goto bail;
}
pci_set_master(pdev);
(void)pci_enable_pcie_error_reporting(pdev);
- goto done;
+ return 0;
bail:
hfi1_pcie_cleanup(pdev);
-done:
return ret;
}
@@ -206,7 +194,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
dd_dev_err(dd, "WC mapping of send buffers failed\n");
goto nomem;
}
- dd_dev_info(dd, "WC piobase: %p\n for %x", dd->piobase, TXE_PIO_SIZE);
+ dd_dev_info(dd, "WC piobase: %p for %x\n", dd->piobase, TXE_PIO_SIZE);
dd->physaddr = addr; /* used for io_remap, etc. */
@@ -344,26 +332,6 @@ int pcie_speeds(struct hfi1_devdata *dd)
return 0;
}
-/*
- * Returns:
- * - actual number of interrupts allocated or
- * - error
- */
-int request_msix(struct hfi1_devdata *dd, u32 msireq)
-{
- int nvec;
-
- nvec = pci_alloc_irq_vectors(dd->pcidev, msireq, msireq, PCI_IRQ_MSIX);
- if (nvec < 0) {
- dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec);
- return nvec;
- }
-
- tune_pcie_caps(dd);
-
- return nvec;
-}
-
/* restore command and BARs after a reset has wiped them out */
int restore_pci_variables(struct hfi1_devdata *dd)
{
@@ -479,14 +447,19 @@ error:
* Check and optionally adjust them to maximize our throughput.
*/
static int hfi1_pcie_caps;
-module_param_named(pcie_caps, hfi1_pcie_caps, int, S_IRUGO);
+module_param_named(pcie_caps, hfi1_pcie_caps, int, 0444);
MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
uint aspm_mode = ASPM_MODE_DISABLED;
-module_param_named(aspm, aspm_mode, uint, S_IRUGO);
+module_param_named(aspm, aspm_mode, uint, 0444);
MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
-static void tune_pcie_caps(struct hfi1_devdata *dd)
+/**
+ * tune_pcie_caps() - Code to adjust PCIe capabilities.
+ * @dd: Valid device data structure
+ *
+ */
+void tune_pcie_caps(struct hfi1_devdata *dd)
{
struct pci_dev *parent;
u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
@@ -1028,6 +1001,7 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
const u8 (*ctle_tunings)[4];
uint static_ctle_mode;
int return_error = 0;
+ u32 target_width;
/* PCIe Gen3 is for the ASIC only */
if (dd->icode != ICODE_RTL_SILICON)
@@ -1067,6 +1041,9 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
return 0;
}
+ /* Previous Gen1/Gen2 bus width */
+ target_width = dd->lbus_width;
+
/*
* Do the Gen3 transition. Steps are those of the PCIe Gen3
* recipe.
@@ -1435,11 +1412,12 @@ retry:
dd_dev_info(dd, "%s: new speed and width: %s\n", __func__,
dd->lbus_info);
- if (dd->lbus_speed != target_speed) { /* not target */
+ if (dd->lbus_speed != target_speed ||
+ dd->lbus_width < target_width) { /* not target */
/* maybe retry */
do_retry = retry_count < pcie_retry;
- dd_dev_err(dd, "PCIe link speed did not switch to Gen%d%s\n",
- pcie_target, do_retry ? ", retrying" : "");
+ dd_dev_err(dd, "PCIe link speed or width did not match target%s\n",
+ do_retry ? ", retrying" : "");
retry_count++;
if (do_retry) {
msleep(100); /* allow time to settle */
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 752057647f09..9ab50d2308dc 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -71,14 +71,6 @@ void __cm_reset(struct hfi1_devdata *dd, u64 sendctrl)
}
}
-/* defined in header release 48 and higher */
-#ifndef SEND_CTRL_UNSUPPORTED_VL_SHIFT
-#define SEND_CTRL_UNSUPPORTED_VL_SHIFT 3
-#define SEND_CTRL_UNSUPPORTED_VL_MASK 0xffull
-#define SEND_CTRL_UNSUPPORTED_VL_SMASK (SEND_CTRL_UNSUPPORTED_VL_MASK \
- << SEND_CTRL_UNSUPPORTED_VL_SHIFT)
-#endif
-
/* global control of PIO send */
void pio_send_control(struct hfi1_devdata *dd, int op)
{
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 9b1e84a6b1cc..6f3bc4dab858 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -66,7 +66,7 @@ MODULE_PARM_DESC(qp_table_size, "QP table size");
static void flush_tx_list(struct rvt_qp *qp);
static int iowait_sleep(
struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *stx,
unsigned int seq,
bool pkts_sent);
@@ -134,15 +134,13 @@ const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
};
-static void flush_tx_list(struct rvt_qp *qp)
+static void flush_list_head(struct list_head *l)
{
- struct hfi1_qp_priv *priv = qp->priv;
-
- while (!list_empty(&priv->s_iowait.tx_head)) {
+ while (!list_empty(l)) {
struct sdma_txreq *tx;
tx = list_first_entry(
- &priv->s_iowait.tx_head,
+ l,
struct sdma_txreq,
list);
list_del_init(&tx->list);
@@ -151,6 +149,14 @@ static void flush_tx_list(struct rvt_qp *qp)
}
}
+static void flush_tx_list(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ flush_list_head(&iowait_get_ib_work(&priv->s_iowait)->tx_head);
+ flush_list_head(&iowait_get_tid_work(&priv->s_iowait)->tx_head);
+}
+
static void flush_iowait(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
@@ -282,33 +288,46 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
}
/**
- * hfi1_check_send_wqe - validate wqe
+ * hfi1_setup_wqe - set up the wqe
* @qp - The qp
* @wqe - The built wqe
+ * @call_send - Determine if the send should be posted or scheduled.
*
- * validate wqe. This is called
- * prior to inserting the wqe into
- * the ring but after the wqe has been
- * setup.
+ * Perform setup of the wqe. This is called
+ * prior to inserting the wqe into the ring but after
+ * the wqe has been setup by RDMAVT. This function
+ * allows the driver the opportunity to perform
+ * validation and additional setup of the wqe.
*
* Returns 0 on success, -EINVAL on failure
*
*/
-int hfi1_check_send_wqe(struct rvt_qp *qp,
- struct rvt_swqe *wqe)
+int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
{
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
struct rvt_ah *ah;
+ struct hfi1_pportdata *ppd;
+ struct hfi1_devdata *dd;
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
case IB_QPT_UC:
if (wqe->length > 0x80000000U)
return -EINVAL;
+ if (wqe->length > qp->pmtu)
+ *call_send = false;
break;
case IB_QPT_SMI:
- ah = ibah_to_rvtah(wqe->ud_wr.ah);
- if (wqe->length > (1 << ah->log_pmtu))
+ /*
+ * SM packets should exclusively use VL15 and their SL is
+ * ignored (IBTA v1.3, Section 3.5.8.2). Therefore, when ah
+ * is created, SL is 0 in most cases and as a result some
+ * fields (vl and pmtu) in ah may not be set correctly,
+ * depending on the SL2SC and SC2VL tables at the time.
+ */
+ ppd = ppd_from_ibp(ibp);
+ dd = dd_from_ppd(ppd);
+ if (wqe->length > dd->vld[15].mtu)
return -EINVAL;
break;
case IB_QPT_GSI:
@@ -321,7 +340,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
default:
break;
}
- return wqe->length <= piothreshold;
+ return 0;
}
/**
@@ -333,7 +352,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
* It is only used in the post send, which doesn't hold
* the s_lock.
*/
-void _hfi1_schedule_send(struct rvt_qp *qp)
+bool _hfi1_schedule_send(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibport *ibp =
@@ -341,10 +360,10 @@ void _hfi1_schedule_send(struct rvt_qp *qp)
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
- iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
- priv->s_sde ?
- priv->s_sde->cpu :
- cpumask_first(cpumask_of_node(dd->node)));
+ return iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
+ priv->s_sde ?
+ priv->s_sde->cpu :
+ cpumask_first(cpumask_of_node(dd->node)));
}
static void qp_pio_drain(struct rvt_qp *qp)
@@ -372,12 +391,32 @@ static void qp_pio_drain(struct rvt_qp *qp)
*
* This schedules qp progress and caller should hold
* the s_lock.
+ * @return true if the first leg is scheduled;
+ * false if the first leg is not scheduled.
*/
-void hfi1_schedule_send(struct rvt_qp *qp)
+bool hfi1_schedule_send(struct rvt_qp *qp)
{
lockdep_assert_held(&qp->s_lock);
- if (hfi1_send_ok(qp))
+ if (hfi1_send_ok(qp)) {
_hfi1_schedule_send(qp);
+ return true;
+ }
+ if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
+ iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait,
+ IOWAIT_PENDING_IB);
+ return false;
+}
+
+static void hfi1_qp_schedule(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+ bool ret;
+
+ if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_IB)) {
+ ret = hfi1_schedule_send(qp);
+ if (ret)
+ iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
+ }
}
void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
@@ -388,16 +427,22 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
if (qp->s_flags & flag) {
qp->s_flags &= ~flag;
trace_hfi1_qpwakeup(qp, flag);
- hfi1_schedule_send(qp);
+ hfi1_qp_schedule(qp);
}
spin_unlock_irqrestore(&qp->s_lock, flags);
/* Notify hfi1_destroy_qp() if it is waiting. */
rvt_put_qp(qp);
}
+void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait)
+{
+ if (iowait_set_work_flag(wait) == IOWAIT_IB_SE)
+ qp->s_flags &= ~RVT_S_BUSY;
+}
+
static int iowait_sleep(
struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *stx,
uint seq,
bool pkts_sent)
@@ -438,7 +483,7 @@ static int iowait_sleep(
rvt_get_qp(qp);
}
write_sequnlock(&dev->iowait_lock);
- qp->s_flags &= ~RVT_S_BUSY;
+ hfi1_qp_unbusy(qp, wait);
spin_unlock_irqrestore(&qp->s_lock, flags);
ret = -EBUSY;
} else {
@@ -637,6 +682,7 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
&priv->s_iowait,
1,
_hfi1_do_send,
+ NULL,
iowait_sleep,
iowait_wakeup,
iowait_sdma_drained);
@@ -686,7 +732,7 @@ void stop_send_queue(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
- cancel_work_sync(&priv->s_iowait.iowork);
+ iowait_cancel_work(&priv->s_iowait);
}
void quiesce_qp(struct rvt_qp *qp)
diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index 078cff7560b6..7adb6dff6813 100644
--- a/drivers/infiniband/hw/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
@@ -58,18 +58,6 @@ extern unsigned int hfi1_qp_table_size;
extern const struct rvt_operation_params hfi1_post_parms[];
/*
- * Send if not busy or waiting for I/O and either
- * a RC response is pending or we can process send work requests.
- */
-static inline int hfi1_send_ok(struct rvt_qp *qp)
-{
- return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) &&
- (verbs_txreq_queued(qp) ||
- (qp->s_flags & RVT_S_RESP_PENDING) ||
- !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
-}
-
-/*
* Driver specific s_flags starting at bit 31 down to HFI1_S_MIN_BIT_MASK
*
* HFI1_S_AHG_VALID - ahg header valid on chip
@@ -90,6 +78,20 @@ static inline int hfi1_send_ok(struct rvt_qp *qp)
#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND)
/*
+ * Send if not busy or waiting for I/O and either
+ * a RC response is pending or we can process send work requests.
+ */
+static inline int hfi1_send_ok(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ return !(qp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT_IO)) &&
+ (verbs_txreq_queued(iowait_get_ib_work(&priv->s_iowait)) ||
+ (qp->s_flags & RVT_S_RESP_PENDING) ||
+ !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
+}
+
+/*
* free_ahg - clear ahg from QP
*/
static inline void clear_ahg(struct rvt_qp *qp)
@@ -129,8 +131,8 @@ struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5);
void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter);
-void _hfi1_schedule_send(struct rvt_qp *qp);
-void hfi1_schedule_send(struct rvt_qp *qp);
+bool _hfi1_schedule_send(struct rvt_qp *qp);
+bool hfi1_schedule_send(struct rvt_qp *qp);
void hfi1_migrate_qp(struct rvt_qp *qp);
@@ -150,4 +152,5 @@ void quiesce_qp(struct rvt_qp *qp);
u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu);
int mtu_to_path_mtu(u32 mtu);
void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl);
+void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait);
#endif /* _QP_H */
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 9bd63abb2dfe..188aa4f686a0 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -309,7 +309,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
}
clear_ahg(qp);
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
- hfi1_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
+ rvt_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
/* will get called again */
goto done_free_tx;
@@ -378,9 +378,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
wqe->wr.ex.invalidate_rkey);
local_ops = 1;
}
- hfi1_send_complete(qp, wqe,
- err ? IB_WC_LOC_PROT_ERR
- : IB_WC_SUCCESS);
+ rvt_send_complete(qp, wqe,
+ err ? IB_WC_LOC_PROT_ERR
+ : IB_WC_SUCCESS);
if (local_ops)
atomic_dec(&qp->local_ops_pending);
goto done_free_tx;
@@ -1043,7 +1043,7 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
hfi1_migrate_qp(qp);
qp->s_retry = qp->s_retry_cnt;
} else if (qp->s_last == qp->s_acked) {
- hfi1_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
+ rvt_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
return;
} else { /* need to handle delayed completion */
@@ -1468,7 +1468,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
ibp->rvp.n_other_naks++;
class_b:
if (qp->s_last == qp->s_acked) {
- hfi1_send_complete(qp, wqe, status);
+ rvt_send_complete(qp, wqe, status);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
}
break;
@@ -1644,7 +1644,8 @@ read_middle:
qp->s_rdma_read_len -= pmtu;
update_last_psn(qp, psn);
spin_unlock_irqrestore(&qp->s_lock, flags);
- hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, false, false);
+ rvt_copy_sge(qp, &qp->s_rdma_read_sge,
+ data, pmtu, false, false);
goto bail;
case OP(RDMA_READ_RESPONSE_ONLY):
@@ -1684,7 +1685,8 @@ read_last:
if (unlikely(tlen != qp->s_rdma_read_len))
goto ack_len_err;
aeth = be32_to_cpu(ohdr->u.aeth);
- hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, false, false);
+ rvt_copy_sge(qp, &qp->s_rdma_read_sge,
+ data, tlen, false, false);
WARN_ON(qp->s_rdma_read_sge.num_sge);
(void)do_rc_ack(qp, aeth, psn,
OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
@@ -1704,7 +1706,7 @@ ack_len_err:
status = IB_WC_LOC_LEN_ERR;
ack_err:
if (qp->s_last == qp->s_acked) {
- hfi1_send_complete(qp, wqe, status);
+ rvt_send_complete(qp, wqe, status);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
}
ack_done:
@@ -2144,7 +2146,7 @@ send_middle:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto nack_inv;
- hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false);
+ rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -2200,7 +2202,7 @@ send_last:
wc.byte_len = tlen + qp->r_rcv_len;
if (unlikely(wc.byte_len > qp->r_len))
goto nack_inv;
- hfi1_copy_sge(&qp->r_sge, data, tlen, true, copy_last);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, copy_last);
rvt_put_ss(&qp->r_sge);
qp->r_msn++;
if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index 5f56f3c1b4c4..7fb317c711df 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -156,333 +156,6 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet)
}
/**
- * ruc_loopback - handle UC and RC loopback requests
- * @sqp: the sending QP
- *
- * This is called from hfi1_do_send() to
- * forward a WQE addressed to the same HFI.
- * Note that although we are single threaded due to the send engine, we still
- * have to protect against post_send(). We don't have to worry about
- * receive interrupts since this is a connected protocol and all packets
- * will pass through here.
- */
-static void ruc_loopback(struct rvt_qp *sqp)
-{
- struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
- struct rvt_qp *qp;
- struct rvt_swqe *wqe;
- struct rvt_sge *sge;
- unsigned long flags;
- struct ib_wc wc;
- u64 sdata;
- atomic64_t *maddr;
- enum ib_wc_status send_status;
- bool release;
- int ret;
- bool copy_last = false;
- int local_ops = 0;
-
- rcu_read_lock();
-
- /*
- * Note that we check the responder QP state after
- * checking the requester's state.
- */
- qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
- sqp->remote_qpn);
-
- spin_lock_irqsave(&sqp->s_lock, flags);
-
- /* Return if we are already busy processing a work request. */
- if ((sqp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT)) ||
- !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
- goto unlock;
-
- sqp->s_flags |= RVT_S_BUSY;
-
-again:
- if (sqp->s_last == READ_ONCE(sqp->s_head))
- goto clr_busy;
- wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
-
- /* Return if it is not OK to start a new work request. */
- if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
- if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
- goto clr_busy;
- /* We are in the error state, flush the work request. */
- send_status = IB_WC_WR_FLUSH_ERR;
- goto flush_send;
- }
-
- /*
- * We can rely on the entry not changing without the s_lock
- * being held until we update s_last.
- * We increment s_cur to indicate s_last is in progress.
- */
- if (sqp->s_last == sqp->s_cur) {
- if (++sqp->s_cur >= sqp->s_size)
- sqp->s_cur = 0;
- }
- spin_unlock_irqrestore(&sqp->s_lock, flags);
-
- if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
- qp->ibqp.qp_type != sqp->ibqp.qp_type) {
- ibp->rvp.n_pkt_drops++;
- /*
- * For RC, the requester would timeout and retry so
- * shortcut the timeouts and just signal too many retries.
- */
- if (sqp->ibqp.qp_type == IB_QPT_RC)
- send_status = IB_WC_RETRY_EXC_ERR;
- else
- send_status = IB_WC_SUCCESS;
- goto serr;
- }
-
- memset(&wc, 0, sizeof(wc));
- send_status = IB_WC_SUCCESS;
-
- release = true;
- sqp->s_sge.sge = wqe->sg_list[0];
- sqp->s_sge.sg_list = wqe->sg_list + 1;
- sqp->s_sge.num_sge = wqe->wr.num_sge;
- sqp->s_len = wqe->length;
- switch (wqe->wr.opcode) {
- case IB_WR_REG_MR:
- goto send_comp;
-
- case IB_WR_LOCAL_INV:
- if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
- if (rvt_invalidate_rkey(sqp,
- wqe->wr.ex.invalidate_rkey))
- send_status = IB_WC_LOC_PROT_ERR;
- local_ops = 1;
- }
- goto send_comp;
-
- case IB_WR_SEND_WITH_INV:
- if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) {
- wc.wc_flags = IB_WC_WITH_INVALIDATE;
- wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey;
- }
- goto send;
-
- case IB_WR_SEND_WITH_IMM:
- wc.wc_flags = IB_WC_WITH_IMM;
- wc.ex.imm_data = wqe->wr.ex.imm_data;
- /* FALLTHROUGH */
- case IB_WR_SEND:
-send:
- ret = rvt_get_rwqe(qp, false);
- if (ret < 0)
- goto op_err;
- if (!ret)
- goto rnr_nak;
- break;
-
- case IB_WR_RDMA_WRITE_WITH_IMM:
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
- goto inv_err;
- wc.wc_flags = IB_WC_WITH_IMM;
- wc.ex.imm_data = wqe->wr.ex.imm_data;
- ret = rvt_get_rwqe(qp, true);
- if (ret < 0)
- goto op_err;
- if (!ret)
- goto rnr_nak;
- /* skip copy_last set and qp_access_flags recheck */
- goto do_write;
- case IB_WR_RDMA_WRITE:
- copy_last = rvt_is_user_qp(qp);
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
- goto inv_err;
-do_write:
- if (wqe->length == 0)
- break;
- if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
- wqe->rdma_wr.remote_addr,
- wqe->rdma_wr.rkey,
- IB_ACCESS_REMOTE_WRITE)))
- goto acc_err;
- qp->r_sge.sg_list = NULL;
- qp->r_sge.num_sge = 1;
- qp->r_sge.total_len = wqe->length;
- break;
-
- case IB_WR_RDMA_READ:
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
- goto inv_err;
- if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
- wqe->rdma_wr.remote_addr,
- wqe->rdma_wr.rkey,
- IB_ACCESS_REMOTE_READ)))
- goto acc_err;
- release = false;
- sqp->s_sge.sg_list = NULL;
- sqp->s_sge.num_sge = 1;
- qp->r_sge.sge = wqe->sg_list[0];
- qp->r_sge.sg_list = wqe->sg_list + 1;
- qp->r_sge.num_sge = wqe->wr.num_sge;
- qp->r_sge.total_len = wqe->length;
- break;
-
- case IB_WR_ATOMIC_CMP_AND_SWP:
- case IB_WR_ATOMIC_FETCH_AND_ADD:
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
- goto inv_err;
- if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
- wqe->atomic_wr.remote_addr,
- wqe->atomic_wr.rkey,
- IB_ACCESS_REMOTE_ATOMIC)))
- goto acc_err;
- /* Perform atomic OP and save result. */
- maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
- sdata = wqe->atomic_wr.compare_add;
- *(u64 *)sqp->s_sge.sge.vaddr =
- (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
- (u64)atomic64_add_return(sdata, maddr) - sdata :
- (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
- sdata, wqe->atomic_wr.swap);
- rvt_put_mr(qp->r_sge.sge.mr);
- qp->r_sge.num_sge = 0;
- goto send_comp;
-
- default:
- send_status = IB_WC_LOC_QP_OP_ERR;
- goto serr;
- }
-
- sge = &sqp->s_sge.sge;
- while (sqp->s_len) {
- u32 len = sqp->s_len;
-
- if (len > sge->length)
- len = sge->length;
- if (len > sge->sge_length)
- len = sge->sge_length;
- WARN_ON_ONCE(len == 0);
- hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release, copy_last);
- sge->vaddr += len;
- sge->length -= len;
- sge->sge_length -= len;
- if (sge->sge_length == 0) {
- if (!release)
- rvt_put_mr(sge->mr);
- if (--sqp->s_sge.num_sge)
- *sge = *sqp->s_sge.sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- break;
- sge->n = 0;
- }
- sge->vaddr =
- sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length =
- sge->mr->map[sge->m]->segs[sge->n].length;
- }
- sqp->s_len -= len;
- }
- if (release)
- rvt_put_ss(&qp->r_sge);
-
- if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
- goto send_comp;
-
- if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
- wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
- else
- wc.opcode = IB_WC_RECV;
- wc.wr_id = qp->r_wr_id;
- wc.status = IB_WC_SUCCESS;
- wc.byte_len = wqe->length;
- wc.qp = &qp->ibqp;
- wc.src_qp = qp->remote_qpn;
- wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
- wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
- wc.port_num = 1;
- /* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- wqe->wr.send_flags & IB_SEND_SOLICITED);
-
-send_comp:
- spin_lock_irqsave(&sqp->s_lock, flags);
- ibp->rvp.n_loop_pkts++;
-flush_send:
- sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
- hfi1_send_complete(sqp, wqe, send_status);
- if (local_ops) {
- atomic_dec(&sqp->local_ops_pending);
- local_ops = 0;
- }
- goto again;
-
-rnr_nak:
- /* Handle RNR NAK */
- if (qp->ibqp.qp_type == IB_QPT_UC)
- goto send_comp;
- ibp->rvp.n_rnr_naks++;
- /*
- * Note: we don't need the s_lock held since the BUSY flag
- * makes this single threaded.
- */
- if (sqp->s_rnr_retry == 0) {
- send_status = IB_WC_RNR_RETRY_EXC_ERR;
- goto serr;
- }
- if (sqp->s_rnr_retry_cnt < 7)
- sqp->s_rnr_retry--;
- spin_lock_irqsave(&sqp->s_lock, flags);
- if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
- goto clr_busy;
- rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
- IB_AETH_CREDIT_SHIFT);
- goto clr_busy;
-
-op_err:
- send_status = IB_WC_REM_OP_ERR;
- wc.status = IB_WC_LOC_QP_OP_ERR;
- goto err;
-
-inv_err:
- send_status = IB_WC_REM_INV_REQ_ERR;
- wc.status = IB_WC_LOC_QP_OP_ERR;
- goto err;
-
-acc_err:
- send_status = IB_WC_REM_ACCESS_ERR;
- wc.status = IB_WC_LOC_PROT_ERR;
-err:
- /* responder goes to error state */
- rvt_rc_error(qp, wc.status);
-
-serr:
- spin_lock_irqsave(&sqp->s_lock, flags);
- hfi1_send_complete(sqp, wqe, send_status);
- if (sqp->ibqp.qp_type == IB_QPT_RC) {
- int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
-
- sqp->s_flags &= ~RVT_S_BUSY;
- spin_unlock_irqrestore(&sqp->s_lock, flags);
- if (lastwqe) {
- struct ib_event ev;
-
- ev.device = sqp->ibqp.device;
- ev.element.qp = &sqp->ibqp;
- ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
- sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
- }
- goto done;
- }
-clr_busy:
- sqp->s_flags &= ~RVT_S_BUSY;
-unlock:
- spin_unlock_irqrestore(&sqp->s_lock, flags);
-done:
- rcu_read_unlock();
-}
-
-/**
* hfi1_make_grh - construct a GRH header
* @ibp: a pointer to the IB port
* @hdr: a pointer to the GRH header being constructed
@@ -825,8 +498,8 @@ void hfi1_do_send_from_rvt(struct rvt_qp *qp)
void _hfi1_do_send(struct work_struct *work)
{
- struct iowait *wait = container_of(work, struct iowait, iowork);
- struct rvt_qp *qp = iowait_to_qp(wait);
+ struct iowait_work *w = container_of(work, struct iowait_work, iowork);
+ struct rvt_qp *qp = iowait_to_qp(w->iow);
hfi1_do_send(qp, true);
}
@@ -850,6 +523,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
ps.ppd = ppd_from_ibp(ps.ibp);
ps.in_thread = in_thread;
+ ps.wait = iowait_get_ib_work(&priv->s_iowait);
trace_hfi1_rc_do_send(qp, in_thread);
@@ -858,7 +532,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
~((1 << ps.ppd->lmc) - 1)) ==
ps.ppd->lid)) {
- ruc_loopback(qp);
+ rvt_ruc_loopback(qp);
return;
}
make_req = hfi1_make_rc_req;
@@ -868,7 +542,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
~((1 << ps.ppd->lmc) - 1)) ==
ps.ppd->lid)) {
- ruc_loopback(qp);
+ rvt_ruc_loopback(qp);
return;
}
make_req = hfi1_make_uc_req;
@@ -883,6 +557,8 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
/* Return if we are already busy processing a work request. */
if (!hfi1_send_ok(qp)) {
+ if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
+ iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
return;
}
@@ -896,7 +572,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
ps.pkts_sent = false;
/* insure a pre-built packet is handled */
- ps.s_txreq = get_waiting_verbs_txreq(qp);
+ ps.s_txreq = get_waiting_verbs_txreq(ps.wait);
do {
/* Check for a constructed packet to be sent. */
if (ps.s_txreq) {
@@ -907,6 +583,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
*/
if (hfi1_verbs_send(qp, &ps))
return;
+
/* allow other tasks to run */
if (schedule_send_yield(qp, &ps))
return;
@@ -917,44 +594,3 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
iowait_starve_clear(ps.pkts_sent, &priv->s_iowait);
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
}
-
-/*
- * This should be called with s_lock held.
- */
-void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
- enum ib_wc_status status)
-{
- u32 old_last, last;
-
- if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
- return;
-
- last = qp->s_last;
- old_last = last;
- trace_hfi1_qp_send_completion(qp, wqe, last);
- if (++last >= qp->s_size)
- last = 0;
- trace_hfi1_qp_send_completion(qp, wqe, last);
- qp->s_last = last;
- /* See post_send() */
- barrier();
- rvt_put_swqe(wqe);
- if (qp->ibqp.qp_type == IB_QPT_UD ||
- qp->ibqp.qp_type == IB_QPT_SMI ||
- qp->ibqp.qp_type == IB_QPT_GSI)
- atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
-
- rvt_qp_swqe_complete(qp,
- wqe,
- ib_hfi1_wc_opcode[wqe->wr.opcode],
- status);
-
- if (qp->s_acked == old_last)
- qp->s_acked = last;
- if (qp->s_cur == old_last)
- qp->s_cur = last;
- if (qp->s_tail == old_last)
- qp->s_tail = last;
- if (qp->state == IB_QPS_SQD && last == qp->s_cur)
- qp->s_draining = 0;
-}
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 88e326d6cc49..891d2386d1ca 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -378,7 +378,7 @@ static inline void complete_tx(struct sdma_engine *sde,
__sdma_txclean(sde->dd, tx);
if (complete)
(*complete)(tx, res);
- if (wait && iowait_sdma_dec(wait))
+ if (iowait_sdma_dec(wait))
iowait_drain_wakeup(wait);
}
@@ -1758,7 +1758,6 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
struct iowait *wait, *nw;
struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
uint i, n = 0, seq, max_idx = 0;
- struct sdma_txreq *stx;
struct hfi1_ibdev *dev = &sde->dd->verbs_dev;
u8 max_starved_cnt = 0;
@@ -1779,19 +1778,13 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
nw,
&sde->dmawait,
list) {
- u16 num_desc = 0;
+ u32 num_desc;
if (!wait->wakeup)
continue;
if (n == ARRAY_SIZE(waits))
break;
- if (!list_empty(&wait->tx_head)) {
- stx = list_first_entry(
- &wait->tx_head,
- struct sdma_txreq,
- list);
- num_desc = stx->num_desc;
- }
+ num_desc = iowait_get_all_desc(wait);
if (num_desc > avail)
break;
avail -= num_desc;
@@ -2346,7 +2339,7 @@ static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx)
*/
static int sdma_check_progress(
struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *tx,
bool pkts_sent)
{
@@ -2356,12 +2349,12 @@ static int sdma_check_progress(
if (tx->num_desc <= sde->desc_avail)
return -EAGAIN;
/* pulse the head_lock */
- if (wait && wait->sleep) {
+ if (wait && iowait_ioww_to_iow(wait)->sleep) {
unsigned seq;
seq = raw_seqcount_begin(
(const seqcount_t *)&sde->head_lock.seqcount);
- ret = wait->sleep(sde, wait, tx, seq, pkts_sent);
+ ret = wait->iow->sleep(sde, wait, tx, seq, pkts_sent);
if (ret == -EAGAIN)
sde->desc_avail = sdma_descq_freecnt(sde);
} else {
@@ -2373,7 +2366,7 @@ static int sdma_check_progress(
/**
* sdma_send_txreq() - submit a tx req to ring
* @sde: sdma engine to use
- * @wait: wait structure to use when full (may be NULL)
+ * @wait: SE wait structure to use when full (may be NULL)
* @tx: sdma_txreq to submit
* @pkts_sent: has any packet been sent yet?
*
@@ -2386,7 +2379,7 @@ static int sdma_check_progress(
* -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
*/
int sdma_send_txreq(struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *tx,
bool pkts_sent)
{
@@ -2397,7 +2390,7 @@ int sdma_send_txreq(struct sdma_engine *sde,
/* user should have supplied entire packet */
if (unlikely(tx->tlen))
return -EINVAL;
- tx->wait = wait;
+ tx->wait = iowait_ioww_to_iow(wait);
spin_lock_irqsave(&sde->tail_lock, flags);
retry:
if (unlikely(!__sdma_running(sde)))
@@ -2406,14 +2399,14 @@ retry:
goto nodesc;
tail = submit_tx(sde, tx);
if (wait)
- iowait_sdma_inc(wait);
+ iowait_sdma_inc(iowait_ioww_to_iow(wait));
sdma_update_tail(sde, tail);
unlock:
spin_unlock_irqrestore(&sde->tail_lock, flags);
return ret;
unlock_noconn:
if (wait)
- iowait_sdma_inc(wait);
+ iowait_sdma_inc(iowait_ioww_to_iow(wait));
tx->next_descq_idx = 0;
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
tx->sn = sde->tail_sn++;
@@ -2422,10 +2415,7 @@ unlock_noconn:
spin_lock(&sde->flushlist_lock);
list_add_tail(&tx->list, &sde->flushlist);
spin_unlock(&sde->flushlist_lock);
- if (wait) {
- wait->tx_count++;
- wait->count += tx->num_desc;
- }
+ iowait_inc_wait_count(wait, tx->num_desc);
schedule_work(&sde->flush_worker);
ret = -ECOMM;
goto unlock;
@@ -2442,9 +2432,9 @@ nodesc:
/**
* sdma_send_txlist() - submit a list of tx req to ring
* @sde: sdma engine to use
- * @wait: wait structure to use when full (may be NULL)
+ * @wait: SE wait structure to use when full (may be NULL)
* @tx_list: list of sdma_txreqs to submit
- * @count: pointer to a u32 which, after return will contain the total number of
+ * @count: pointer to a u16 which, after return will contain the total number of
* sdma_txreqs removed from the tx_list. This will include sdma_txreqs
* whose SDMA descriptors are submitted to the ring and the sdma_txreqs
* which are added to SDMA engine flush list if the SDMA engine state is
@@ -2467,8 +2457,8 @@ nodesc:
* -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
* -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
*/
-int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
- struct list_head *tx_list, u32 *count_out)
+int sdma_send_txlist(struct sdma_engine *sde, struct iowait_work *wait,
+ struct list_head *tx_list, u16 *count_out)
{
struct sdma_txreq *tx, *tx_next;
int ret = 0;
@@ -2479,7 +2469,7 @@ int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
spin_lock_irqsave(&sde->tail_lock, flags);
retry:
list_for_each_entry_safe(tx, tx_next, tx_list, list) {
- tx->wait = wait;
+ tx->wait = iowait_ioww_to_iow(wait);
if (unlikely(!__sdma_running(sde)))
goto unlock_noconn;
if (unlikely(tx->num_desc > sde->desc_avail))
@@ -2500,8 +2490,9 @@ retry:
update_tail:
total_count = submit_count + flush_count;
if (wait) {
- iowait_sdma_add(wait, total_count);
- iowait_starve_clear(submit_count > 0, wait);
+ iowait_sdma_add(iowait_ioww_to_iow(wait), total_count);
+ iowait_starve_clear(submit_count > 0,
+ iowait_ioww_to_iow(wait));
}
if (tail != INVALID_TAIL)
sdma_update_tail(sde, tail);
@@ -2511,7 +2502,7 @@ update_tail:
unlock_noconn:
spin_lock(&sde->flushlist_lock);
list_for_each_entry_safe(tx, tx_next, tx_list, list) {
- tx->wait = wait;
+ tx->wait = iowait_ioww_to_iow(wait);
list_del_init(&tx->list);
tx->next_descq_idx = 0;
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
@@ -2520,10 +2511,7 @@ unlock_noconn:
#endif
list_add_tail(&tx->list, &sde->flushlist);
flush_count++;
- if (wait) {
- wait->tx_count++;
- wait->count += tx->num_desc;
- }
+ iowait_inc_wait_count(wait, tx->num_desc);
}
spin_unlock(&sde->flushlist_lock);
schedule_work(&sde->flush_worker);
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 46c775f255d1..6dc63d7c5685 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -1,7 +1,7 @@
#ifndef _HFI1_SDMA_H
#define _HFI1_SDMA_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -62,16 +62,6 @@
/* Hardware limit for SDMA packet size */
#define MAX_SDMA_PKT_SIZE ((16 * 1024) - 1)
-#define SDMA_TXREQ_S_OK 0
-#define SDMA_TXREQ_S_SENDERROR 1
-#define SDMA_TXREQ_S_ABORTED 2
-#define SDMA_TXREQ_S_SHUTDOWN 3
-
-/* flags bits */
-#define SDMA_TXREQ_F_URGENT 0x0001
-#define SDMA_TXREQ_F_AHG_COPY 0x0002
-#define SDMA_TXREQ_F_USE_AHG 0x0004
-
#define SDMA_MAP_NONE 0
#define SDMA_MAP_SINGLE 1
#define SDMA_MAP_PAGE 2
@@ -415,6 +405,7 @@ struct sdma_engine {
struct list_head flushlist;
struct cpumask cpu_mask;
struct kobject kobj;
+ u32 msix_intr;
};
int sdma_init(struct hfi1_devdata *dd, u8 port);
@@ -849,16 +840,16 @@ static inline int sdma_txadd_kvaddr(
dd, SDMA_MAP_SINGLE, tx, addr, len);
}
-struct iowait;
+struct iowait_work;
int sdma_send_txreq(struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *tx,
bool pkts_sent);
int sdma_send_txlist(struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct list_head *tx_list,
- u32 *count);
+ u16 *count_out);
int sdma_ahg_alloc(struct sdma_engine *sde);
void sdma_ahg_free(struct sdma_engine *sde, int ahg_index);
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 25e867393463..2be513d4c9da 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -494,17 +494,18 @@ static struct kobj_type hfi1_vl2mtu_ktype = {
* Start of per-unit (or driver, in some cases, but replicated
* per unit) functions (these get a device *)
*/
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
+ char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
}
+static DEVICE_ATTR_RO(hw_rev);
-static ssize_t show_hfi(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
@@ -517,8 +518,9 @@ static ssize_t show_hfi(struct device *device, struct device_attribute *attr,
ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname);
return ret;
}
+static DEVICE_ATTR_RO(board_id);
-static ssize_t show_boardversion(struct device *device,
+static ssize_t boardversion_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
@@ -528,8 +530,9 @@ static ssize_t show_boardversion(struct device *device,
/* The string printed here is already newline-terminated. */
return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
}
+static DEVICE_ATTR_RO(boardversion);
-static ssize_t show_nctxts(struct device *device,
+static ssize_t nctxts_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
@@ -546,8 +549,9 @@ static ssize_t show_nctxts(struct device *device,
min(dd->num_user_contexts,
(u32)dd->sc_sizes[SC_USER].count));
}
+static DEVICE_ATTR_RO(nctxts);
-static ssize_t show_nfreectxts(struct device *device,
+static ssize_t nfreectxts_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
@@ -557,8 +561,9 @@ static ssize_t show_nfreectxts(struct device *device,
/* Return the number of free user ports (contexts) available. */
return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts);
}
+static DEVICE_ATTR_RO(nfreectxts);
-static ssize_t show_serial(struct device *device,
+static ssize_t serial_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
@@ -567,8 +572,9 @@ static ssize_t show_serial(struct device *device,
return scnprintf(buf, PAGE_SIZE, "%s", dd->serial);
}
+static DEVICE_ATTR_RO(serial);
-static ssize_t store_chip_reset(struct device *device,
+static ssize_t chip_reset_store(struct device *device,
struct device_attribute *attr, const char *buf,
size_t count)
{
@@ -586,6 +592,7 @@ static ssize_t store_chip_reset(struct device *device,
bail:
return ret < 0 ? ret : count;
}
+static DEVICE_ATTR_WO(chip_reset);
/*
* Convert the reported temperature from an integer (reported in
@@ -598,7 +605,7 @@ bail:
/*
* Dump tempsense values, in decimal, to ease shell-scripts.
*/
-static ssize_t show_tempsense(struct device *device,
+static ssize_t tempsense_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
@@ -622,6 +629,7 @@ static ssize_t show_tempsense(struct device *device,
}
return ret;
}
+static DEVICE_ATTR_RO(tempsense);
/*
* end of per-unit (or driver, in some cases, but replicated
@@ -629,24 +637,20 @@ static ssize_t show_tempsense(struct device *device,
*/
/* start of per-unit file structures and support code */
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_hfi, NULL);
-static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL);
-static DEVICE_ATTR(nfreectxts, S_IRUGO, show_nfreectxts, NULL);
-static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
-static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
-static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL);
-static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset);
-
-static struct device_attribute *hfi1_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_board_id,
- &dev_attr_nctxts,
- &dev_attr_nfreectxts,
- &dev_attr_serial,
- &dev_attr_boardversion,
- &dev_attr_tempsense,
- &dev_attr_chip_reset,
+static struct attribute *hfi1_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_board_id.attr,
+ &dev_attr_nctxts.attr,
+ &dev_attr_nfreectxts.attr,
+ &dev_attr_serial.attr,
+ &dev_attr_boardversion.attr,
+ &dev_attr_tempsense.attr,
+ &dev_attr_chip_reset.attr,
+ NULL,
+};
+
+const struct attribute_group ib_hfi1_attr_group = {
+ .attrs = hfi1_attributes,
};
int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
@@ -832,12 +836,6 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
struct device *class_dev = &dev->dev;
int i, j, ret;
- for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) {
- ret = device_create_file(&dev->dev, hfi1_attributes[i]);
- if (ret)
- goto bail;
- }
-
for (i = 0; i < dd->num_sdma; i++) {
ret = kobject_init_and_add(&dd->per_sdma[i].kobj,
&sde_ktype, &class_dev->kobj,
@@ -855,9 +853,6 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
return 0;
bail:
- for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i)
- device_remove_file(&dev->dev, hfi1_attributes[i]);
-
for (i = 0; i < dd->num_sdma; i++)
kobject_del(&dd->per_sdma[i].kobj);
diff --git a/drivers/infiniband/hw/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h
index 8540463ef3f7..84458f1325e1 100644
--- a/drivers/infiniband/hw/hfi1/trace.h
+++ b/drivers/infiniband/hw/hfi1/trace.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -62,3 +62,4 @@ __print_symbolic(etype, \
#include "trace_rx.h"
#include "trace_tx.h"
#include "trace_mmu.h"
+#include "trace_iowait.h"
diff --git a/drivers/infiniband/hw/hfi1/trace_iowait.h b/drivers/infiniband/hw/hfi1/trace_iowait.h
new file mode 100644
index 000000000000..27f4334ece2b
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/trace_iowait.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+#if !defined(__HFI1_TRACE_IOWAIT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HFI1_TRACE_IOWAIT_H
+
+#include <linux/tracepoint.h>
+#include "iowait.h"
+#include "verbs.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hfi1_iowait
+
+DECLARE_EVENT_CLASS(hfi1_iowait_template,
+ TP_PROTO(struct iowait *wait, u32 flag),
+ TP_ARGS(wait, flag),
+ TP_STRUCT__entry(/* entry */
+ __field(unsigned long, addr)
+ __field(unsigned long, flags)
+ __field(u32, flag)
+ __field(u32, qpn)
+ ),
+ TP_fast_assign(/* assign */
+ __entry->addr = (unsigned long)wait;
+ __entry->flags = wait->flags;
+ __entry->flag = (1 << flag);
+ __entry->qpn = iowait_to_qp(wait)->ibqp.qp_num;
+ ),
+ TP_printk(/* print */
+ "iowait 0x%lx qp %u flags 0x%lx flag 0x%x",
+ __entry->addr,
+ __entry->qpn,
+ __entry->flags,
+ __entry->flag
+ )
+ );
+
+DEFINE_EVENT(hfi1_iowait_template, hfi1_iowait_set,
+ TP_PROTO(struct iowait *wait, u32 flag),
+ TP_ARGS(wait, flag));
+
+DEFINE_EVENT(hfi1_iowait_template, hfi1_iowait_clear,
+ TP_PROTO(struct iowait *wait, u32 flag),
+ TP_ARGS(wait, flag));
+
+#endif /* __HFI1_TRACE_IOWAIT_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_iowait
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index e254dcec6f64..6aca0c5a7f97 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -88,7 +88,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
}
clear_ahg(qp);
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
- hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done_free_tx;
}
@@ -140,7 +140,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp, wqe->wr.ex.invalidate_rkey);
local_ops = 1;
}
- hfi1_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR
+ rvt_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR
: IB_WC_SUCCESS);
if (local_ops)
atomic_dec(&qp->local_ops_pending);
@@ -426,7 +426,7 @@ send_first:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto rewind;
- hfi1_copy_sge(&qp->r_sge, data, pmtu, false, false);
+ rvt_copy_sge(qp, &qp->r_sge, data, pmtu, false, false);
break;
case OP(SEND_LAST_WITH_IMMEDIATE):
@@ -449,7 +449,7 @@ send_last:
if (unlikely(wc.byte_len > qp->r_len))
goto rewind;
wc.opcode = IB_WC_RECV;
- hfi1_copy_sge(&qp->r_sge, data, tlen, false, false);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, false, false);
rvt_put_ss(&qp->s_rdma_read_sge);
last_imm:
wc.wr_id = qp->r_wr_id;
@@ -523,7 +523,7 @@ rdma_first:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto drop;
- hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false);
+ rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -550,7 +550,7 @@ rdma_last_imm:
}
wc.byte_len = qp->r_len;
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
- hfi1_copy_sge(&qp->r_sge, data, tlen, true, false);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
rvt_put_ss(&qp->r_sge);
goto last_imm;
@@ -564,7 +564,7 @@ rdma_last:
tlen -= (hdrsize + extra_bytes);
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
- hfi1_copy_sge(&qp->r_sge, data, tlen, true, false);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
rvt_put_ss(&qp->r_sge);
break;
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 70d39fc450a1..4baa8f4d49de 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -210,8 +210,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
}
hfi1_make_grh(ibp, &grh, &grd, 0, 0);
- hfi1_copy_sge(&qp->r_sge, &grh,
- sizeof(grh), true, false);
+ rvt_copy_sge(qp, &qp->r_sge, &grh,
+ sizeof(grh), true, false);
wc.wc_flags |= IB_WC_GRH;
} else {
rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
@@ -228,7 +228,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (len > sge->sge_length)
len = sge->sge_length;
WARN_ON_ONCE(len == 0);
- hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, true, false);
+ rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
@@ -518,7 +518,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
goto bail;
}
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
- hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done_free_tx;
}
@@ -560,7 +560,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
ud_loopback(qp, wqe);
spin_lock_irqsave(&qp->s_lock, tflags);
ps->flags = tflags;
- hfi1_send_complete(qp, wqe, IB_WC_SUCCESS);
+ rvt_send_complete(qp, wqe, IB_WC_SUCCESS);
goto done_free_tx;
}
}
@@ -1019,8 +1019,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
goto drop;
}
if (packet->grh) {
- hfi1_copy_sge(&qp->r_sge, packet->grh,
- sizeof(struct ib_grh), true, false);
+ rvt_copy_sge(qp, &qp->r_sge, packet->grh,
+ sizeof(struct ib_grh), true, false);
wc.wc_flags |= IB_WC_GRH;
} else if (packet->etype == RHF_RCV_TYPE_BYPASS) {
struct ib_grh grh;
@@ -1030,14 +1030,14 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
* out when creating 16B, add back the GRH here.
*/
hfi1_make_ext_grh(packet, &grh, slid, dlid);
- hfi1_copy_sge(&qp->r_sge, &grh,
- sizeof(struct ib_grh), true, false);
+ rvt_copy_sge(qp, &qp->r_sge, &grh,
+ sizeof(struct ib_grh), true, false);
wc.wc_flags |= IB_WC_GRH;
} else {
rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
}
- hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
- true, false);
+ rvt_copy_sge(qp, &qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
+ true, false);
rvt_put_ss(&qp->r_sge);
if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
return;
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 5c88706121c1..3f0aadccd9f6 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -76,8 +76,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
static unsigned initial_pkt_count = 8;
-static int user_sdma_send_pkts(struct user_sdma_request *req,
- unsigned maxpkts);
+static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts);
static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status);
static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq);
static void user_sdma_free_request(struct user_sdma_request *req, bool unpin);
@@ -101,7 +100,7 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
static int defer_packet_queue(
struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *txreq,
uint seq,
bool pkts_sent);
@@ -124,13 +123,13 @@ static struct mmu_rb_ops sdma_rb_ops = {
static int defer_packet_queue(
struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *txreq,
uint seq,
bool pkts_sent)
{
struct hfi1_user_sdma_pkt_q *pq =
- container_of(wait, struct hfi1_user_sdma_pkt_q, busy);
+ container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy);
struct hfi1_ibdev *dev = &pq->dd->verbs_dev;
struct user_sdma_txreq *tx =
container_of(txreq, struct user_sdma_txreq, txreq);
@@ -187,13 +186,12 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
pq->ctxt = uctxt->ctxt;
pq->subctxt = fd->subctxt;
pq->n_max_reqs = hfi1_sdma_comp_ring_size;
- pq->state = SDMA_PKT_Q_INACTIVE;
atomic_set(&pq->n_reqs, 0);
init_waitqueue_head(&pq->wait);
atomic_set(&pq->n_locked, 0);
pq->mm = fd->mm;
- iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
+ iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
activate_packet_queue, NULL);
pq->reqidx = 0;
@@ -276,7 +274,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
/* Wait until all requests have been freed. */
wait_event_interruptible(
pq->wait,
- (READ_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE));
+ !atomic_read(&pq->n_reqs));
kfree(pq->reqs);
kfree(pq->req_in_use);
kmem_cache_destroy(pq->txreq_cache);
@@ -312,6 +310,13 @@ static u8 dlid_to_selector(u16 dlid)
return mapping[hash];
}
+/**
+ * hfi1_user_sdma_process_request() - Process and start a user sdma request
+ * @fd: valid file descriptor
+ * @iovec: array of io vectors to process
+ * @dim: overall iovec array size
+ * @count: number of io vector array entries processed
+ */
int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
struct iovec *iovec, unsigned long dim,
unsigned long *count)
@@ -328,7 +333,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
u8 opcode, sc, vl;
u16 pkey;
u32 slid;
- int req_queued = 0;
u16 dlid;
u32 selector;
@@ -392,7 +396,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
req->data_len = 0;
req->pq = pq;
req->cq = cq;
- req->status = -1;
req->ahg_idx = -1;
req->iov_idx = 0;
req->sent = 0;
@@ -400,12 +403,14 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
req->seqcomp = 0;
req->seqsubmitted = 0;
req->tids = NULL;
- req->done = 0;
req->has_error = 0;
INIT_LIST_HEAD(&req->txps);
memcpy(&req->info, &info, sizeof(info));
+ /* The request is initialized, count it */
+ atomic_inc(&pq->n_reqs);
+
if (req_opcode(info.ctrl) == EXPECTED) {
/* expected must have a TID info and at least one data vector */
if (req->data_iovs < 2) {
@@ -500,7 +505,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
ret = pin_vector_pages(req, &req->iovs[i]);
if (ret) {
req->data_iovs = i;
- req->status = ret;
goto free_req;
}
req->data_len += req->iovs[i].iov.iov_len;
@@ -561,23 +565,11 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
req->ahg_idx = sdma_ahg_alloc(req->sde);
set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
- atomic_inc(&pq->n_reqs);
- req_queued = 1;
+ pq->state = SDMA_PKT_Q_ACTIVE;
/* Send the first N packets in the request to buy us some time */
ret = user_sdma_send_pkts(req, pcount);
- if (unlikely(ret < 0 && ret != -EBUSY)) {
- req->status = ret;
+ if (unlikely(ret < 0 && ret != -EBUSY))
goto free_req;
- }
-
- /*
- * It is possible that the SDMA engine would have processed all the
- * submitted packets by the time we get here. Therefore, only set
- * packet queue state to ACTIVE if there are still uncompleted
- * requests.
- */
- if (atomic_read(&pq->n_reqs))
- xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
/*
* This is a somewhat blocking send implementation.
@@ -588,14 +580,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
while (req->seqsubmitted != req->info.npkts) {
ret = user_sdma_send_pkts(req, pcount);
if (ret < 0) {
- if (ret != -EBUSY) {
- req->status = ret;
- WRITE_ONCE(req->has_error, 1);
- if (READ_ONCE(req->seqcomp) ==
- req->seqsubmitted - 1)
- goto free_req;
- return ret;
- }
+ if (ret != -EBUSY)
+ goto free_req;
wait_event_interruptible_timeout(
pq->busy.wait_dma,
(pq->state == SDMA_PKT_Q_ACTIVE),
@@ -606,10 +592,19 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
*count += idx;
return 0;
free_req:
- user_sdma_free_request(req, true);
- if (req_queued)
+ /*
+ * If the submitted seqsubmitted == npkts, the completion routine
+ * controls the final state. If sequbmitted < npkts, wait for any
+ * outstanding packets to finish before cleaning up.
+ */
+ if (req->seqsubmitted < req->info.npkts) {
+ if (req->seqsubmitted)
+ wait_event(pq->busy.wait_dma,
+ (req->seqcomp == req->seqsubmitted - 1));
+ user_sdma_free_request(req, true);
pq_update(pq);
- set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
+ set_comp_state(pq, cq, info.comp_idx, ERROR, ret);
+ }
return ret;
}
@@ -760,9 +755,10 @@ static int user_sdma_txadd(struct user_sdma_request *req,
return ret;
}
-static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
+static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts)
{
- int ret = 0, count;
+ int ret = 0;
+ u16 count;
unsigned npkts = 0;
struct user_sdma_txreq *tx = NULL;
struct hfi1_user_sdma_pkt_q *pq = NULL;
@@ -864,8 +860,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
changes = set_txreq_header_ahg(req, tx,
datalen);
- if (changes < 0)
+ if (changes < 0) {
+ ret = changes;
goto free_tx;
+ }
}
} else {
ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) +
@@ -914,10 +912,11 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
npkts++;
}
dosend:
- ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
+ ret = sdma_send_txlist(req->sde,
+ iowait_get_ib_work(&pq->busy),
+ &req->txps, &count);
req->seqsubmitted += count;
if (req->seqsubmitted == req->info.npkts) {
- WRITE_ONCE(req->done, 1);
/*
* The txreq has already been submitted to the HW queue
* so we can free the AHG entry now. Corruption will not
@@ -1365,11 +1364,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
return idx;
}
-/*
- * SDMA tx request completion callback. Called when the SDMA progress
- * state machine gets notification that the SDMA descriptors for this
- * tx request have been processed by the DMA engine. Called in
- * interrupt context.
+/**
+ * user_sdma_txreq_cb() - SDMA tx request completion callback.
+ * @txreq: valid sdma tx request
+ * @status: success/failure of request
+ *
+ * Called when the SDMA progress state machine gets notification that
+ * the SDMA descriptors for this tx request have been processed by the
+ * DMA engine. Called in interrupt context.
+ * Only do work on completed sequences.
*/
static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
{
@@ -1378,7 +1381,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
struct user_sdma_request *req;
struct hfi1_user_sdma_pkt_q *pq;
struct hfi1_user_sdma_comp_q *cq;
- u16 idx;
+ enum hfi1_sdma_comp_state state = COMPLETE;
if (!tx->req)
return;
@@ -1391,39 +1394,25 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
SDMA_DBG(req, "SDMA completion with error %d",
status);
WRITE_ONCE(req->has_error, 1);
+ state = ERROR;
}
req->seqcomp = tx->seqnum;
kmem_cache_free(pq->txreq_cache, tx);
- tx = NULL;
-
- idx = req->info.comp_idx;
- if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
- if (req->seqcomp == req->info.npkts - 1) {
- req->status = 0;
- user_sdma_free_request(req, false);
- pq_update(pq);
- set_comp_state(pq, cq, idx, COMPLETE, 0);
- }
- } else {
- if (status != SDMA_TXREQ_S_OK)
- req->status = status;
- if (req->seqcomp == (READ_ONCE(req->seqsubmitted) - 1) &&
- (READ_ONCE(req->done) ||
- READ_ONCE(req->has_error))) {
- user_sdma_free_request(req, false);
- pq_update(pq);
- set_comp_state(pq, cq, idx, ERROR, req->status);
- }
- }
+
+ /* sequence isn't complete? We are done */
+ if (req->seqcomp != req->info.npkts - 1)
+ return;
+
+ user_sdma_free_request(req, false);
+ set_comp_state(pq, cq, req->info.comp_idx, state, status);
+ pq_update(pq);
}
static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
{
- if (atomic_dec_and_test(&pq->n_reqs)) {
- xchg(&pq->state, SDMA_PKT_Q_INACTIVE);
+ if (atomic_dec_and_test(&pq->n_reqs))
wake_up(&pq->wait);
- }
}
static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
@@ -1448,6 +1437,8 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
if (!node)
continue;
+ req->iovs[i].node = NULL;
+
if (unpin)
hfi1_mmu_rb_remove(req->pq->handler,
&node->rb);
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index d2bc77f75253..14dfd757dafd 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -105,9 +105,10 @@ static inline int ahg_header_set(u32 *arr, int idx, size_t array_size,
#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */
#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */
-#define SDMA_PKT_Q_INACTIVE BIT(0)
-#define SDMA_PKT_Q_ACTIVE BIT(1)
-#define SDMA_PKT_Q_DEFERRED BIT(2)
+enum pkt_q_sdma_state {
+ SDMA_PKT_Q_ACTIVE,
+ SDMA_PKT_Q_DEFERRED,
+};
/*
* Maximum retry attempts to submit a TX request
@@ -133,7 +134,7 @@ struct hfi1_user_sdma_pkt_q {
struct user_sdma_request *reqs;
unsigned long *req_in_use;
struct iowait busy;
- unsigned state;
+ enum pkt_q_sdma_state state;
wait_queue_head_t wait;
unsigned long unpinned;
struct mmu_rb_handler *handler;
@@ -203,14 +204,12 @@ struct user_sdma_request {
s8 ahg_idx;
/* Writeable fields shared with interrupt */
- u64 seqcomp ____cacheline_aligned_in_smp;
- u64 seqsubmitted;
- /* status of the last txreq completed */
- int status;
+ u16 seqcomp ____cacheline_aligned_in_smp;
+ u16 seqsubmitted;
/* Send side fields */
struct list_head txps ____cacheline_aligned_in_smp;
- u64 seqnum;
+ u16 seqnum;
/*
* KDETH.OFFSET (TID) field
* The offset can cover multiple packets, depending on the
@@ -228,7 +227,6 @@ struct user_sdma_request {
u16 tididx;
/* progress index moving along the iovs array */
u8 iov_idx;
- u8 done;
u8 has_error;
struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];
@@ -248,7 +246,7 @@ struct user_sdma_txreq {
struct user_sdma_request *req;
u16 flags;
unsigned int busycount;
- u64 seqnum;
+ u16 seqnum;
};
int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index a7c586a5589d..48e11e510358 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -129,8 +129,6 @@ unsigned short piothreshold = 256;
module_param(piothreshold, ushort, S_IRUGO);
MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio");
-#define COPY_CACHELESS 1
-#define COPY_ADAPTIVE 2
static unsigned int sge_copy_mode;
module_param(sge_copy_mode, uint, S_IRUGO);
MODULE_PARM_DESC(sge_copy_mode,
@@ -151,159 +149,13 @@ static int pio_wait(struct rvt_qp *qp,
/* 16B trailing buffer */
static const u8 trail_buf[MAX_16B_PADDING];
-static uint wss_threshold;
+static uint wss_threshold = 80;
module_param(wss_threshold, uint, S_IRUGO);
MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy");
static uint wss_clean_period = 256;
module_param(wss_clean_period, uint, S_IRUGO);
MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned");
-/* memory working set size */
-struct hfi1_wss {
- unsigned long *entries;
- atomic_t total_count;
- atomic_t clean_counter;
- atomic_t clean_entry;
-
- int threshold;
- int num_entries;
- long pages_mask;
-};
-
-static struct hfi1_wss wss;
-
-int hfi1_wss_init(void)
-{
- long llc_size;
- long llc_bits;
- long table_size;
- long table_bits;
-
- /* check for a valid percent range - default to 80 if none or invalid */
- if (wss_threshold < 1 || wss_threshold > 100)
- wss_threshold = 80;
- /* reject a wildly large period */
- if (wss_clean_period > 1000000)
- wss_clean_period = 256;
- /* reject a zero period */
- if (wss_clean_period == 0)
- wss_clean_period = 1;
-
- /*
- * Calculate the table size - the next power of 2 larger than the
- * LLC size. LLC size is in KiB.
- */
- llc_size = wss_llc_size() * 1024;
- table_size = roundup_pow_of_two(llc_size);
-
- /* one bit per page in rounded up table */
- llc_bits = llc_size / PAGE_SIZE;
- table_bits = table_size / PAGE_SIZE;
- wss.pages_mask = table_bits - 1;
- wss.num_entries = table_bits / BITS_PER_LONG;
-
- wss.threshold = (llc_bits * wss_threshold) / 100;
- if (wss.threshold == 0)
- wss.threshold = 1;
-
- atomic_set(&wss.clean_counter, wss_clean_period);
-
- wss.entries = kcalloc(wss.num_entries, sizeof(*wss.entries),
- GFP_KERNEL);
- if (!wss.entries) {
- hfi1_wss_exit();
- return -ENOMEM;
- }
-
- return 0;
-}
-
-void hfi1_wss_exit(void)
-{
- /* coded to handle partially initialized and repeat callers */
- kfree(wss.entries);
- wss.entries = NULL;
-}
-
-/*
- * Advance the clean counter. When the clean period has expired,
- * clean an entry.
- *
- * This is implemented in atomics to avoid locking. Because multiple
- * variables are involved, it can be racy which can lead to slightly
- * inaccurate information. Since this is only a heuristic, this is
- * OK. Any innaccuracies will clean themselves out as the counter
- * advances. That said, it is unlikely the entry clean operation will
- * race - the next possible racer will not start until the next clean
- * period.
- *
- * The clean counter is implemented as a decrement to zero. When zero
- * is reached an entry is cleaned.
- */
-static void wss_advance_clean_counter(void)
-{
- int entry;
- int weight;
- unsigned long bits;
-
- /* become the cleaner if we decrement the counter to zero */
- if (atomic_dec_and_test(&wss.clean_counter)) {
- /*
- * Set, not add, the clean period. This avoids an issue
- * where the counter could decrement below the clean period.
- * Doing a set can result in lost decrements, slowing the
- * clean advance. Since this a heuristic, this possible
- * slowdown is OK.
- *
- * An alternative is to loop, advancing the counter by a
- * clean period until the result is > 0. However, this could
- * lead to several threads keeping another in the clean loop.
- * This could be mitigated by limiting the number of times
- * we stay in the loop.
- */
- atomic_set(&wss.clean_counter, wss_clean_period);
-
- /*
- * Uniquely grab the entry to clean and move to next.
- * The current entry is always the lower bits of
- * wss.clean_entry. The table size, wss.num_entries,
- * is always a power-of-2.
- */
- entry = (atomic_inc_return(&wss.clean_entry) - 1)
- & (wss.num_entries - 1);
-
- /* clear the entry and count the bits */
- bits = xchg(&wss.entries[entry], 0);
- weight = hweight64((u64)bits);
- /* only adjust the contended total count if needed */
- if (weight)
- atomic_sub(weight, &wss.total_count);
- }
-}
-
-/*
- * Insert the given address into the working set array.
- */
-static void wss_insert(void *address)
-{
- u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss.pages_mask;
- u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */
- u32 nr = page & (BITS_PER_LONG - 1);
-
- if (!test_and_set_bit(nr, &wss.entries[entry]))
- atomic_inc(&wss.total_count);
-
- wss_advance_clean_counter();
-}
-
-/*
- * Is the working set larger than the threshold?
- */
-static inline bool wss_exceeds_threshold(void)
-{
- return atomic_read(&wss.total_count) >= wss.threshold;
-}
-
/*
* Translate ib_wr_opcode into ib_wc_opcode.
*/
@@ -438,79 +290,6 @@ static const u32 pio_opmask[BIT(3)] = {
*/
__be64 ib_hfi1_sys_image_guid;
-/**
- * hfi1_copy_sge - copy data to SGE memory
- * @ss: the SGE state
- * @data: the data to copy
- * @length: the length of the data
- * @release: boolean to release MR
- * @copy_last: do a separate copy of the last 8 bytes
- */
-void hfi1_copy_sge(
- struct rvt_sge_state *ss,
- void *data, u32 length,
- bool release,
- bool copy_last)
-{
- struct rvt_sge *sge = &ss->sge;
- int i;
- bool in_last = false;
- bool cacheless_copy = false;
-
- if (sge_copy_mode == COPY_CACHELESS) {
- cacheless_copy = length >= PAGE_SIZE;
- } else if (sge_copy_mode == COPY_ADAPTIVE) {
- if (length >= PAGE_SIZE) {
- /*
- * NOTE: this *assumes*:
- * o The first vaddr is the dest.
- * o If multiple pages, then vaddr is sequential.
- */
- wss_insert(sge->vaddr);
- if (length >= (2 * PAGE_SIZE))
- wss_insert(sge->vaddr + PAGE_SIZE);
-
- cacheless_copy = wss_exceeds_threshold();
- } else {
- wss_advance_clean_counter();
- }
- }
- if (copy_last) {
- if (length > 8) {
- length -= 8;
- } else {
- copy_last = false;
- in_last = true;
- }
- }
-
-again:
- while (length) {
- u32 len = rvt_get_sge_length(sge, length);
-
- WARN_ON_ONCE(len == 0);
- if (unlikely(in_last)) {
- /* enforce byte transfer ordering */
- for (i = 0; i < len; i++)
- ((u8 *)sge->vaddr)[i] = ((u8 *)data)[i];
- } else if (cacheless_copy) {
- cacheless_memcpy(sge->vaddr, data, len);
- } else {
- memcpy(sge->vaddr, data, len);
- }
- rvt_update_sge(ss, len, release);
- data += len;
- length -= len;
- }
-
- if (copy_last) {
- copy_last = false;
- in_last = true;
- length = 8;
- goto again;
- }
-}
-
/*
* Make sure the QP is ready and able to accept the given opcode.
*/
@@ -713,7 +492,7 @@ static void verbs_sdma_complete(
spin_lock(&qp->s_lock);
if (tx->wqe) {
- hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
+ rvt_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
} else if (qp->ibqp.qp_type == IB_QPT_RC) {
struct hfi1_opa_header *hdr;
@@ -737,7 +516,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
write_seqlock(&dev->iowait_lock);
list_add_tail(&ps->s_txreq->txreq.list,
- &priv->s_iowait.tx_head);
+ &ps->wait->tx_head);
if (list_empty(&priv->s_iowait.list)) {
if (list_empty(&dev->memwait))
mod_timer(&dev->mem_timer, jiffies + 1);
@@ -748,7 +527,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
rvt_get_qp(qp);
}
write_sequnlock(&dev->iowait_lock);
- qp->s_flags &= ~RVT_S_BUSY;
+ hfi1_qp_unbusy(qp, ps->wait);
ret = -EBUSY;
}
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -950,8 +729,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (unlikely(ret))
goto bail_build;
}
- ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq,
- ps->pkts_sent);
+ ret = sdma_send_txreq(tx->sde, ps->wait, &tx->txreq, ps->pkts_sent);
if (unlikely(ret < 0)) {
if (ret == -ECOMM)
goto bail_ecomm;
@@ -1001,7 +779,7 @@ static int pio_wait(struct rvt_qp *qp,
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
write_seqlock(&dev->iowait_lock);
list_add_tail(&ps->s_txreq->txreq.list,
- &priv->s_iowait.tx_head);
+ &ps->wait->tx_head);
if (list_empty(&priv->s_iowait.list)) {
struct hfi1_ibdev *dev = &dd->verbs_dev;
int was_empty;
@@ -1020,7 +798,7 @@ static int pio_wait(struct rvt_qp *qp,
hfi1_sc_wantpiobuf_intr(sc, 1);
}
write_sequnlock(&dev->iowait_lock);
- qp->s_flags &= ~RVT_S_BUSY;
+ hfi1_qp_unbusy(qp, ps->wait);
ret = -EBUSY;
}
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1160,7 +938,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
pio_bail:
if (qp->s_wqe) {
spin_lock_irqsave(&qp->s_lock, flags);
- hfi1_send_complete(qp, qp->s_wqe, wc_status);
+ rvt_send_complete(qp, qp->s_wqe, wc_status);
spin_unlock_irqrestore(&qp->s_lock, flags);
} else if (qp->ibqp.qp_type == IB_QPT_RC) {
spin_lock_irqsave(&qp->s_lock, flags);
@@ -1367,7 +1145,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
hfi1_cdbg(PIO, "%s() Failed. Completing with err",
__func__);
spin_lock_irqsave(&qp->s_lock, flags);
- hfi1_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
+ rvt_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
spin_unlock_irqrestore(&qp->s_lock, flags);
}
return -EINVAL;
@@ -1943,7 +1721,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp;
dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc;
- dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe;
+ dd->verbs_dev.rdi.driver_f.setup_wqe = hfi1_setup_wqe;
dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup =
hfi1_comp_vect_mappings_lookup;
@@ -1956,10 +1734,16 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size;
dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd);
+ dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode;
+ dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold;
+ dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period;
/* post send table */
dd->verbs_dev.rdi.post_parms = hfi1_post_parms;
+ /* opcode translation table */
+ dd->verbs_dev.rdi.wc_opcode = ib_hfi1_wc_opcode;
+
ppd = dd->pport;
for (i = 0; i < dd->num_pports; i++, ppd++)
rvt_init_port(&dd->verbs_dev.rdi,
@@ -1967,6 +1751,9 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
i,
ppd->pkeys);
+ rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev,
+ &ib_hfi1_attr_group);
+
ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_HFI1);
if (ret)
goto err_verbs_txreq;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index a4d06502f06d..64c9054db5f3 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -166,11 +166,13 @@ struct hfi1_qp_priv {
* This structure is used to hold commonly lookedup and computed values during
* the send engine progress.
*/
+struct iowait_work;
struct hfi1_pkt_state {
struct hfi1_ibdev *dev;
struct hfi1_ibport *ibp;
struct hfi1_pportdata *ppd;
struct verbs_txreq *s_txreq;
+ struct iowait_work *wait;
unsigned long flags;
unsigned long timeout;
unsigned long timeout_int;
@@ -247,7 +249,7 @@ static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev)
return container_of(rdi, struct hfi1_ibdev, rdi);
}
-static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait)
+static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait)
{
struct hfi1_qp_priv *priv;
@@ -313,9 +315,6 @@ void hfi1_put_txreq(struct verbs_txreq *tx);
int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
-void hfi1_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
- bool release, bool copy_last);
-
void hfi1_cnp_rcv(struct hfi1_packet *packet);
void hfi1_uc_rcv(struct hfi1_packet *packet);
@@ -343,7 +342,8 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait);
-int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ bool *call_send);
extern const u32 rc_only_opcode;
extern const u32 uc_only_opcode;
@@ -363,9 +363,6 @@ void hfi1_do_send_from_rvt(struct rvt_qp *qp);
void hfi1_do_send(struct rvt_qp *qp, bool in_thread);
-void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
- enum ib_wc_status status);
-
void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn);
int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
@@ -390,28 +387,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u64 pbc);
-int hfi1_wss_init(void);
-void hfi1_wss_exit(void);
-
-/* platform specific: return the lowest level cache (llc) size, in KiB */
-static inline int wss_llc_size(void)
-{
- /* assume that the boot CPU value is universal for all CPUs */
- return boot_cpu_data.x86_cache_size;
-}
-
-/* platform specific: cacheless copy */
-static inline void cacheless_memcpy(void *dst, void *src, size_t n)
-{
- /*
- * Use the only available X64 cacheless copy. Add a __user cast
- * to quiet sparse. The src agument is already in the kernel so
- * there are no security issues. The extra fault recovery machinery
- * is not invoked.
- */
- __copy_user_nocache(dst, (void __user *)src, n, 0);
-}
-
static inline bool opa_bth_is_migration(struct ib_other_headers *ohdr)
{
return ohdr->bth[1] & cpu_to_be32(OPA_BTH_MIG_REQ);
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 1c19bbc764b2..2a77af26a231 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -102,22 +102,19 @@ static inline struct sdma_txreq *get_sdma_txreq(struct verbs_txreq *tx)
return &tx->txreq;
}
-static inline struct verbs_txreq *get_waiting_verbs_txreq(struct rvt_qp *qp)
+static inline struct verbs_txreq *get_waiting_verbs_txreq(struct iowait_work *w)
{
struct sdma_txreq *stx;
- struct hfi1_qp_priv *priv = qp->priv;
- stx = iowait_get_txhead(&priv->s_iowait);
+ stx = iowait_get_txhead(w);
if (stx)
return container_of(stx, struct verbs_txreq, txreq);
return NULL;
}
-static inline bool verbs_txreq_queued(struct rvt_qp *qp)
+static inline bool verbs_txreq_queued(struct iowait_work *w)
{
- struct hfi1_qp_priv *priv = qp->priv;
-
- return iowait_packet_queued(&priv->s_iowait);
+ return iowait_packet_queued(w);
}
void hfi1_put_txreq(struct verbs_txreq *tx);
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index c643d80c5a53..c9876d9e3cb9 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -120,7 +120,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
uctxt->seq_cnt = 1;
uctxt->is_vnic = true;
- hfi1_set_vnic_msix_info(uctxt);
+ msix_request_rcd_irq(uctxt);
hfi1_stats.sps_ctxts++;
dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
@@ -135,8 +135,6 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
flush_wc();
- hfi1_reset_vnic_msix_info(uctxt);
-
/*
* Disable receive context and interrupt available, reset all
* RcvCtxtCtrl bits to default values.
@@ -148,6 +146,10 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
+ /* msix_intr will always be > 0, only clean up if this is true */
+ if (uctxt->msix_intr)
+ msix_free_irq(dd, uctxt->msix_intr);
+
uctxt->event_flags = 0;
hfi1_clear_tids(uctxt);
@@ -626,7 +628,7 @@ static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id);
/* ensure irqs see the change */
- hfi1_vnic_synchronize_irq(dd);
+ msix_vnic_synchronize_irq(dd);
/* remove unread skbs */
for (i = 0; i < vinfo->num_rx_q; i++) {
@@ -690,8 +692,6 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
rc = hfi1_vnic_txreq_init(dd);
if (rc)
goto txreq_fail;
-
- dd->vnic.msix_idx = dd->first_dyn_msix_idx;
}
for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
index c3c96c5869ed..97bd940a056a 100644
--- a/drivers/infiniband/hw/hfi1/vnic_sdma.c
+++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2017 Intel Corporation.
+ * Copyright(c) 2017 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -198,8 +198,8 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
goto free_desc;
tx->retry_count = 0;
- ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq,
- vnic_sdma->pkts_sent);
+ ret = sdma_send_txreq(sde, iowait_get_ib_work(&vnic_sdma->wait),
+ &tx->txreq, vnic_sdma->pkts_sent);
/* When -ECOMM, sdma callback will be called with ABORT status */
if (unlikely(ret && unlikely(ret != -ECOMM)))
goto free_desc;
@@ -230,13 +230,13 @@ tx_err:
* become available.
*/
static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *txreq,
uint seq,
bool pkts_sent)
{
struct hfi1_vnic_sdma *vnic_sdma =
- container_of(wait, struct hfi1_vnic_sdma, wait);
+ container_of(wait->iow, struct hfi1_vnic_sdma, wait);
struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev;
struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
@@ -247,7 +247,7 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
write_seqlock(&dev->iowait_lock);
if (list_empty(&vnic_sdma->wait.list))
- iowait_queue(pkts_sent, wait, &sde->dmawait);
+ iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
write_sequnlock(&dev->iowait_lock);
return -EBUSY;
}
@@ -285,7 +285,8 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
for (i = 0; i < vinfo->num_tx_q; i++) {
struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[i];
- iowait_init(&vnic_sdma->wait, 0, NULL, hfi1_vnic_sdma_sleep,
+ iowait_init(&vnic_sdma->wait, 0, NULL, NULL,
+ hfi1_vnic_sdma_sleep,
hfi1_vnic_sdma_wakeup, NULL);
vnic_sdma->sde = &vinfo->dd->per_sdma[i];
vnic_sdma->dd = vinfo->dd;
@@ -295,10 +296,12 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
/* Add a free descriptor watermark for wakeups */
if (vnic_sdma->sde->descq_cnt > HFI1_VNIC_SDMA_DESC_WTRMRK) {
+ struct iowait_work *work;
+
INIT_LIST_HEAD(&vnic_sdma->stx.list);
vnic_sdma->stx.num_desc = HFI1_VNIC_SDMA_DESC_WTRMRK;
- list_add_tail(&vnic_sdma->stx.list,
- &vnic_sdma->wait.tx_head);
+ work = iowait_get_ib_work(&vnic_sdma->wait);
+ list_add_tail(&vnic_sdma->stx.list, &work->tx_head);
}
}
}
diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig
index fddb5fdf92de..21c2100b2ea9 100644
--- a/drivers/infiniband/hw/hns/Kconfig
+++ b/drivers/infiniband/hw/hns/Kconfig
@@ -1,6 +1,7 @@
config INFINIBAND_HNS
tristate "HNS RoCE Driver"
depends on NET_VENDOR_HISILICON
+ depends on INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
depends on ARM64 || (COMPILE_TEST && 64BIT)
---help---
This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index 0d96c5bb38cd..9990dc9eb96a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -49,6 +49,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
struct hns_roce_ah *ah;
u16 vlan_tag = 0xffff;
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+ bool vlan_en = false;
ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
if (!ah)
@@ -58,8 +59,10 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN);
gid_attr = ah_attr->grh.sgid_attr;
- if (is_vlan_dev(gid_attr->ndev))
+ if (is_vlan_dev(gid_attr->ndev)) {
vlan_tag = vlan_dev_vlan_id(gid_attr->ndev);
+ vlan_en = true;
+ }
if (vlan_tag < 0x1000)
vlan_tag |= (rdma_ah_get_sl(ah_attr) &
@@ -71,6 +74,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
HNS_ROCE_PORT_NUM_SHIFT));
ah->av.gid_index = grh->sgid_index;
ah->av.vlan = cpu_to_le16(vlan_tag);
+ ah->av.vlan_en = vlan_en;
dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index,
ah->av.vlan);
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 9a24fd0ee3e7..d39bdfdb5de9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -88,8 +88,11 @@
#define BITMAP_RR 1
#define MR_TYPE_MR 0x00
+#define MR_TYPE_FRMR 0x01
#define MR_TYPE_DMA 0x03
+#define HNS_ROCE_FRMR_MAX_PA 512
+
#define PKEY_ID 0xffff
#define GUID_LEN 8
#define NODE_DESC_SIZE 64
@@ -193,6 +196,9 @@ enum {
HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2),
HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3),
HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4),
+ HNS_ROCE_CAP_FLAG_MW = BIT(7),
+ HNS_ROCE_CAP_FLAG_FRMR = BIT(8),
+ HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10),
};
enum hns_roce_mtt_type {
@@ -219,19 +225,11 @@ struct hns_roce_uar {
unsigned long logic_idx;
};
-struct hns_roce_vma_data {
- struct list_head list;
- struct vm_area_struct *vma;
- struct mutex *vma_list_mutex;
-};
-
struct hns_roce_ucontext {
struct ib_ucontext ibucontext;
struct hns_roce_uar uar;
struct list_head page_list;
struct mutex page_mutex;
- struct list_head vma_list;
- struct mutex vma_list_mutex;
};
struct hns_roce_pd {
@@ -293,6 +291,16 @@ struct hns_roce_mtt {
enum hns_roce_mtt_type mtt_type;
};
+struct hns_roce_mw {
+ struct ib_mw ibmw;
+ u32 pdn;
+ u32 rkey;
+ int enabled; /* MW's active status */
+ u32 pbl_hop_num;
+ u32 pbl_ba_pg_sz;
+ u32 pbl_buf_pg_sz;
+};
+
/* Only support 4K page size for mr register */
#define MR_SIZE_4K 0
@@ -304,6 +312,7 @@ struct hns_roce_mr {
u32 key; /* Key of MR */
u32 pd; /* PD num of MR */
u32 access;/* Access permission of MR */
+ u32 npages;
int enabled; /* MR's active status */
int type; /* MR's register type */
u64 *pbl_buf;/* MR's PBL space */
@@ -457,6 +466,7 @@ struct hns_roce_av {
u8 dgid[HNS_ROCE_GID_SIZE];
u8 mac[6];
__le16 vlan;
+ bool vlan_en;
};
struct hns_roce_ah {
@@ -656,6 +666,7 @@ struct hns_roce_eq_table {
};
struct hns_roce_caps {
+ u64 fw_ver;
u8 num_ports;
int gid_table_len[HNS_ROCE_MAX_PORTS];
int pkey_table_len[HNS_ROCE_MAX_PORTS];
@@ -665,7 +676,9 @@ struct hns_roce_caps {
u32 max_sq_sg; /* 2 */
u32 max_sq_inline; /* 32 */
u32 max_rq_sg; /* 2 */
+ u32 max_extend_sg;
int num_qps; /* 256k */
+ int reserved_qps;
u32 max_wqes; /* 16k */
u32 max_sq_desc_sz; /* 64 */
u32 max_rq_desc_sz; /* 64 */
@@ -738,6 +751,7 @@ struct hns_roce_work {
struct hns_roce_dev *hr_dev;
struct work_struct work;
u32 qpn;
+ u32 cqn;
int event_type;
int sub_type;
};
@@ -764,6 +778,8 @@ struct hns_roce_hw {
struct hns_roce_mr *mr, int flags, u32 pdn,
int mr_access_flags, u64 iova, u64 size,
void *mb_buf);
+ int (*frmr_write_mtpt)(void *mb_buf, struct hns_roce_mr *mr);
+ int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw);
void (*write_cqc)(struct hns_roce_dev *hr_dev,
struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts,
dma_addr_t dma_handle, int nent, u32 vector);
@@ -863,6 +879,11 @@ static inline struct hns_roce_mr *to_hr_mr(struct ib_mr *ibmr)
return container_of(ibmr, struct hns_roce_mr, ibmr);
}
+static inline struct hns_roce_mw *to_hr_mw(struct ib_mw *ibmw)
+{
+ return container_of(ibmw, struct hns_roce_mw, ibmw);
+}
+
static inline struct hns_roce_qp *to_hr_qp(struct ib_qp *ibqp)
{
return container_of(ibqp, struct hns_roce_qp, ibqp);
@@ -968,12 +989,20 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length,
u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
struct ib_udata *udata);
+struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg);
+int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
int hns_roce_dereg_mr(struct ib_mr *ibmr);
int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
struct hns_roce_cmd_mailbox *mailbox,
unsigned long mpt_index);
unsigned long key_to_hw_index(u32 key);
+struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type,
+ struct ib_udata *udata);
+int hns_roce_dealloc_mw(struct ib_mw *ibmw);
+
void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
struct hns_roce_buf *buf);
int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 081aa91fc162..ca05810c92dc 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -731,7 +731,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
cq_init_attr.comp_vector = 0;
cq = hns_roce_ib_create_cq(&hr_dev->ib_dev, &cq_init_attr, NULL, NULL);
if (IS_ERR(cq)) {
- dev_err(dev, "Create cq for reseved loop qp failed!");
+ dev_err(dev, "Create cq for reserved loop qp failed!");
return -ENOMEM;
}
free_mr->mr_free_cq = to_hr_cq(cq);
@@ -744,7 +744,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
pd = hns_roce_alloc_pd(&hr_dev->ib_dev, NULL, NULL);
if (IS_ERR(pd)) {
- dev_err(dev, "Create pd for reseved loop qp failed!");
+ dev_err(dev, "Create pd for reserved loop qp failed!");
ret = -ENOMEM;
goto alloc_pd_failed;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 0218c0f8c2a7..a4c62ae23a9a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -54,6 +54,59 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
dseg->len = cpu_to_le32(sg->length);
}
+static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+ struct hns_roce_wqe_frmr_seg *fseg,
+ const struct ib_reg_wr *wr)
+{
+ struct hns_roce_mr *mr = to_hr_mr(wr->mr);
+
+ /* use ib_access_flags */
+ roce_set_bit(rc_sq_wqe->byte_4,
+ V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S,
+ wr->access & IB_ACCESS_MW_BIND ? 1 : 0);
+ roce_set_bit(rc_sq_wqe->byte_4,
+ V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S,
+ wr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
+ roce_set_bit(rc_sq_wqe->byte_4,
+ V2_RC_FRMR_WQE_BYTE_4_RR_S,
+ wr->access & IB_ACCESS_REMOTE_READ ? 1 : 0);
+ roce_set_bit(rc_sq_wqe->byte_4,
+ V2_RC_FRMR_WQE_BYTE_4_RW_S,
+ wr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0);
+ roce_set_bit(rc_sq_wqe->byte_4,
+ V2_RC_FRMR_WQE_BYTE_4_LW_S,
+ wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
+
+ /* Data structure reuse may lead to confusion */
+ rc_sq_wqe->msg_len = cpu_to_le32(mr->pbl_ba & 0xffffffff);
+ rc_sq_wqe->inv_key = cpu_to_le32(mr->pbl_ba >> 32);
+
+ rc_sq_wqe->byte_16 = cpu_to_le32(wr->mr->length & 0xffffffff);
+ rc_sq_wqe->byte_20 = cpu_to_le32(wr->mr->length >> 32);
+ rc_sq_wqe->rkey = cpu_to_le32(wr->key);
+ rc_sq_wqe->va = cpu_to_le64(wr->mr->iova);
+
+ fseg->pbl_size = cpu_to_le32(mr->pbl_size);
+ roce_set_field(fseg->mode_buf_pg_sz,
+ V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M,
+ V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S,
+ mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
+ roce_set_bit(fseg->mode_buf_pg_sz,
+ V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
+}
+
+static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg,
+ const struct ib_atomic_wr *wr)
+{
+ if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ aseg->fetchadd_swap_data = cpu_to_le64(wr->swap);
+ aseg->cmp_data = cpu_to_le64(wr->compare_add);
+ } else {
+ aseg->fetchadd_swap_data = cpu_to_le64(wr->compare_add);
+ aseg->cmp_data = 0;
+ }
+}
+
static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
unsigned int *sge_ind)
{
@@ -121,6 +174,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
}
if (wr->opcode == IB_WR_RDMA_READ) {
+ *bad_wr = wr;
dev_err(hr_dev->dev, "Not support inline data!\n");
return -EINVAL;
}
@@ -179,6 +233,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
struct hns_roce_v2_ud_send_wqe *ud_sq_wqe;
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe;
struct hns_roce_qp *qp = to_hr_qp(ibqp);
+ struct hns_roce_wqe_frmr_seg *fseg;
struct device *dev = hr_dev->dev;
struct hns_roce_v2_db sq_db;
struct ib_qp_attr attr;
@@ -191,6 +246,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
int attr_mask;
u32 tmp_len;
int ret = 0;
+ u32 hr_op;
u8 *smac;
int nreq;
int i;
@@ -356,6 +412,9 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
V2_UD_SEND_WQE_BYTE_40_PORTN_S,
qp->port);
+ roce_set_bit(ud_sq_wqe->byte_40,
+ V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S,
+ ah->av.vlan_en ? 1 : 0);
roce_set_field(ud_sq_wqe->byte_48,
V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M,
V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S,
@@ -406,99 +465,100 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
roce_set_bit(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit);
+ wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
switch (wr->opcode) {
case IB_WR_RDMA_READ:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_RDMA_READ);
+ hr_op = HNS_ROCE_V2_WQE_OP_RDMA_READ;
rc_sq_wqe->rkey =
cpu_to_le32(rdma_wr(wr)->rkey);
rc_sq_wqe->va =
cpu_to_le64(rdma_wr(wr)->remote_addr);
break;
case IB_WR_RDMA_WRITE:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_RDMA_WRITE);
+ hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE;
rc_sq_wqe->rkey =
cpu_to_le32(rdma_wr(wr)->rkey);
rc_sq_wqe->va =
cpu_to_le64(rdma_wr(wr)->remote_addr);
break;
case IB_WR_RDMA_WRITE_WITH_IMM:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM);
+ hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM;
rc_sq_wqe->rkey =
cpu_to_le32(rdma_wr(wr)->rkey);
rc_sq_wqe->va =
cpu_to_le64(rdma_wr(wr)->remote_addr);
break;
case IB_WR_SEND:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_SEND);
+ hr_op = HNS_ROCE_V2_WQE_OP_SEND;
break;
case IB_WR_SEND_WITH_INV:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_SEND_WITH_INV);
+ hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_INV;
break;
case IB_WR_SEND_WITH_IMM:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM);
+ hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM;
break;
case IB_WR_LOCAL_INV:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_LOCAL_INV);
+ hr_op = HNS_ROCE_V2_WQE_OP_LOCAL_INV;
+ roce_set_bit(rc_sq_wqe->byte_4,
+ V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
+ rc_sq_wqe->inv_key =
+ cpu_to_le32(wr->ex.invalidate_rkey);
+ break;
+ case IB_WR_REG_MR:
+ hr_op = HNS_ROCE_V2_WQE_OP_FAST_REG_PMR;
+ fseg = wqe;
+ set_frmr_seg(rc_sq_wqe, fseg, reg_wr(wr));
break;
case IB_WR_ATOMIC_CMP_AND_SWP:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP);
+ hr_op = HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP;
+ rc_sq_wqe->rkey =
+ cpu_to_le32(atomic_wr(wr)->rkey);
+ rc_sq_wqe->va =
+ cpu_to_le64(atomic_wr(wr)->remote_addr);
break;
case IB_WR_ATOMIC_FETCH_AND_ADD:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD);
+ hr_op = HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD;
+ rc_sq_wqe->rkey =
+ cpu_to_le32(atomic_wr(wr)->rkey);
+ rc_sq_wqe->va =
+ cpu_to_le64(atomic_wr(wr)->remote_addr);
break;
case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP);
+ hr_op =
+ HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP;
break;
case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD);
+ hr_op =
+ HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD;
break;
default:
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_MASK);
+ hr_op = HNS_ROCE_V2_WQE_OP_MASK;
break;
}
- wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
+ roce_set_field(rc_sq_wqe->byte_4,
+ V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
+ V2_RC_SEND_WQE_BYTE_4_OPCODE_S, hr_op);
+
+ if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
+ struct hns_roce_v2_wqe_data_seg *dseg;
+
+ dseg = wqe;
+ set_data_seg_v2(dseg, wr->sg_list);
+ wqe += sizeof(struct hns_roce_v2_wqe_data_seg);
+ set_atomic_seg(wqe, atomic_wr(wr));
+ roce_set_field(rc_sq_wqe->byte_16,
+ V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
+ V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
+ wr->num_sge);
+ } else if (wr->opcode != IB_WR_REG_MR) {
+ ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe,
+ wqe, &sge_ind, bad_wr);
+ if (ret)
+ goto out;
+ }
- ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, wqe,
- &sge_ind, bad_wr);
- if (ret)
- goto out;
ind++;
} else {
dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type);
@@ -935,7 +995,24 @@ static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev)
resp = (struct hns_roce_query_version *)desc.data;
hr_dev->hw_rev = le32_to_cpu(resp->rocee_hw_version);
- hr_dev->vendor_id = le32_to_cpu(resp->rocee_vendor_id);
+ hr_dev->vendor_id = hr_dev->pci_dev->vendor;
+
+ return 0;
+}
+
+static int hns_roce_query_fw_ver(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_query_fw_info *resp;
+ struct hns_roce_cmq_desc desc;
+ int ret;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_QUERY_FW_VER, true);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ return ret;
+
+ resp = (struct hns_roce_query_fw_info *)desc.data;
+ hr_dev->caps.fw_ver = (u64)(le32_to_cpu(resp->fw_ver));
return 0;
}
@@ -1158,6 +1235,13 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
ret = hns_roce_cmq_query_hw_info(hr_dev);
if (ret) {
+ dev_err(hr_dev->dev, "Query hardware version fail, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ ret = hns_roce_query_fw_ver(hr_dev);
+ if (ret) {
dev_err(hr_dev->dev, "Query firmware version fail, ret = %d.\n",
ret);
return ret;
@@ -1185,14 +1269,16 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
return ret;
}
- hr_dev->vendor_part_id = 0;
- hr_dev->sys_image_guid = 0;
+
+ hr_dev->vendor_part_id = hr_dev->pci_dev->device;
+ hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid);
caps->num_qps = HNS_ROCE_V2_MAX_QP_NUM;
caps->max_wqes = HNS_ROCE_V2_MAX_WQE_NUM;
caps->num_cqs = HNS_ROCE_V2_MAX_CQ_NUM;
caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM;
caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM;
+ caps->max_extend_sg = HNS_ROCE_V2_MAX_EXTEND_SGE_NUM;
caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM;
caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE;
caps->num_uars = HNS_ROCE_V2_UAR_NUM;
@@ -1222,6 +1308,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->reserved_mrws = 1;
caps->reserved_uars = 0;
caps->reserved_cqs = 0;
+ caps->reserved_qps = HNS_ROCE_V2_RSV_QPS;
caps->qpc_ba_pg_sz = 0;
caps->qpc_buf_pg_sz = 0;
@@ -1255,6 +1342,11 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
HNS_ROCE_CAP_FLAG_RQ_INLINE |
HNS_ROCE_CAP_FLAG_RECORD_DB |
HNS_ROCE_CAP_FLAG_SQ_RECORD_DB;
+
+ if (hr_dev->pci_dev->revision == 0x21)
+ caps->flags |= HNS_ROCE_CAP_FLAG_MW |
+ HNS_ROCE_CAP_FLAG_FRMR;
+
caps->pkey_table_len[0] = 1;
caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM;
@@ -1262,6 +1354,9 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->local_ca_ack_delay = 0;
caps->max_mtu = IB_MTU_4096;
+ if (hr_dev->pci_dev->revision == 0x21)
+ caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC;
+
ret = hns_roce_v2_set_bt(hr_dev);
if (ret)
dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n",
@@ -1690,10 +1785,11 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 0);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 0);
+ roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S,
(mr->access & IB_ACCESS_MW_BIND ? 1 : 0));
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S, 0);
+ roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S,
+ mr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S,
(mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0));
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S,
@@ -1817,6 +1913,88 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
return 0;
}
+static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr)
+{
+ struct hns_roce_v2_mpt_entry *mpt_entry;
+
+ mpt_entry = mb_buf;
+ memset(mpt_entry, 0, sizeof(*mpt_entry));
+
+ roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
+ V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
+ roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
+ V2_MPT_BYTE_4_PBL_HOP_NUM_S, 1);
+ roce_set_field(mpt_entry->byte_4_pd_hop_st,
+ V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
+ V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
+ mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
+ roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
+ V2_MPT_BYTE_4_PD_S, mr->pd);
+
+ roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 1);
+ roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
+ roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
+
+ roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_FRE_S, 1);
+ roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
+ roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 0);
+ roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
+
+ mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size);
+
+ mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3));
+ roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M,
+ V2_MPT_BYTE_48_PBL_BA_H_S,
+ upper_32_bits(mr->pbl_ba >> 3));
+
+ roce_set_field(mpt_entry->byte_64_buf_pa1,
+ V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
+ V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
+ mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
+
+ return 0;
+}
+
+static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
+{
+ struct hns_roce_v2_mpt_entry *mpt_entry;
+
+ mpt_entry = mb_buf;
+ memset(mpt_entry, 0, sizeof(*mpt_entry));
+
+ roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
+ V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
+ roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
+ V2_MPT_BYTE_4_PD_S, mw->pdn);
+ roce_set_field(mpt_entry->byte_4_pd_hop_st,
+ V2_MPT_BYTE_4_PBL_HOP_NUM_M,
+ V2_MPT_BYTE_4_PBL_HOP_NUM_S,
+ mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ?
+ 0 : mw->pbl_hop_num);
+ roce_set_field(mpt_entry->byte_4_pd_hop_st,
+ V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
+ V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
+ mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
+
+ roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
+ roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
+
+ roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
+ roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 1);
+ roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
+ roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BQP_S,
+ mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1);
+
+ roce_set_field(mpt_entry->byte_64_buf_pa1,
+ V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
+ V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
+ mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
+
+ mpt_entry->lkey = cpu_to_le32(mw->rkey);
+
+ return 0;
+}
+
static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n)
{
return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf,
@@ -2274,6 +2452,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
wc->src_qp = (u8)roce_get_field(cqe->byte_32,
V2_CQE_BYTE_32_RMT_QPN_M,
V2_CQE_BYTE_32_RMT_QPN_S);
+ wc->slid = 0;
wc->wc_flags |= (roce_get_bit(cqe->byte_32,
V2_CQE_BYTE_32_GRH_S) ?
IB_WC_GRH : 0);
@@ -2287,7 +2466,14 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
wc->smac[5] = roce_get_field(cqe->byte_28,
V2_CQE_BYTE_28_SMAC_5_M,
V2_CQE_BYTE_28_SMAC_5_S);
- wc->vlan_id = 0xffff;
+ if (roce_get_bit(cqe->byte_28, V2_CQE_BYTE_28_VID_VLD_S)) {
+ wc->vlan_id = (u16)roce_get_field(cqe->byte_28,
+ V2_CQE_BYTE_28_VID_M,
+ V2_CQE_BYTE_28_VID_S);
+ } else {
+ wc->vlan_id = 0xffff;
+ }
+
wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
wc->network_hdr_type = roce_get_field(cqe->byte_28,
V2_CQE_BYTE_28_PORT_TYPE_M,
@@ -2589,21 +2775,16 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_TX_ERR_S, 0);
roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_RX_ERR_S, 0);
- roce_set_field(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_MAPID_M,
- V2_QPC_BYTE_60_MAPID_S, 0);
+ roce_set_field(qpc_mask->byte_60_qpst_tempid, V2_QPC_BYTE_60_TEMPID_M,
+ V2_QPC_BYTE_60_TEMPID_S, 0);
- roce_set_bit(qpc_mask->byte_60_qpst_mapid,
- V2_QPC_BYTE_60_INNER_MAP_IND_S, 0);
- roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_MAP_IND_S,
- 0);
- roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_RQ_MAP_IND_S,
- 0);
- roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_EXT_MAP_IND_S,
- 0);
- roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_RLS_IND_S,
- 0);
- roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_EXT_IND_S,
- 0);
+ roce_set_field(qpc_mask->byte_60_qpst_tempid,
+ V2_QPC_BYTE_60_SCC_TOKEN_M, V2_QPC_BYTE_60_SCC_TOKEN_S,
+ 0);
+ roce_set_bit(qpc_mask->byte_60_qpst_tempid,
+ V2_QPC_BYTE_60_SQ_DB_DOING_S, 0);
+ roce_set_bit(qpc_mask->byte_60_qpst_tempid,
+ V2_QPC_BYTE_60_RQ_DB_DOING_S, 0);
roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CNP_TX_FLAG_S, 0);
roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CE_FLAG_S, 0);
@@ -2685,7 +2866,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_TAIL_MAX_M,
V2_QPC_BYTE_132_TRRL_TAIL_MAX_S, 0);
- roce_set_bit(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RSVD_RAQ_MAP_S, 0);
+ roce_set_bit(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RQ_RTY_WAIT_DO_S,
+ 0);
roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M,
V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S, 0);
roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_TAIL_M,
@@ -2694,8 +2876,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_144_raq,
V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M,
V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S, 0);
- roce_set_bit(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RAQ_RTY_INI_IND_S,
- 0);
roce_set_field(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RAQ_CREDIT_M,
V2_QPC_BYTE_144_RAQ_CREDIT_S, 0);
roce_set_bit(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RESP_RTY_FLG_S, 0);
@@ -2721,14 +2901,12 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
V2_QPC_BYTE_160_SQ_CONSUMER_IDX_M,
V2_QPC_BYTE_160_SQ_CONSUMER_IDX_S, 0);
- roce_set_field(context->byte_168_irrl_idx,
- V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
- V2_QPC_BYTE_168_SQ_SHIFT_BAK_S,
- ilog2((unsigned int)hr_qp->sq.wqe_cnt));
- roce_set_field(qpc_mask->byte_168_irrl_idx,
- V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
- V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, 0);
-
+ roce_set_bit(qpc_mask->byte_168_irrl_idx,
+ V2_QPC_BYTE_168_POLL_DB_WAIT_DO_S, 0);
+ roce_set_bit(qpc_mask->byte_168_irrl_idx,
+ V2_QPC_BYTE_168_SCC_TOKEN_FORBID_SQ_DEQ_S, 0);
+ roce_set_bit(qpc_mask->byte_168_irrl_idx,
+ V2_QPC_BYTE_168_WAIT_ACK_TIMEOUT_S, 0);
roce_set_bit(qpc_mask->byte_168_irrl_idx,
V2_QPC_BYTE_168_MSG_RTY_LP_FLG_S, 0);
roce_set_bit(qpc_mask->byte_168_irrl_idx,
@@ -2746,6 +2924,9 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_MSG_RNR_FLG_S,
0);
+ roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1);
+ roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 0);
+
roce_set_field(qpc_mask->byte_176_msg_pktn,
V2_QPC_BYTE_176_MSG_USE_PKTN_M,
V2_QPC_BYTE_176_MSG_USE_PKTN_S, 0);
@@ -2790,6 +2971,13 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
V2_QPC_BYTE_232_IRRL_SGE_IDX_M,
V2_QPC_BYTE_232_IRRL_SGE_IDX_S, 0);
+ roce_set_bit(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_SO_LP_VLD_S,
+ 0);
+ roce_set_bit(qpc_mask->byte_232_irrl_sge,
+ V2_QPC_BYTE_232_FENCE_LP_VLD_S, 0);
+ roce_set_bit(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_IRRL_LP_VLD_S,
+ 0);
+
qpc_mask->irrl_cur_sge_offset = 0;
roce_set_field(qpc_mask->byte_240_irrl_tail,
@@ -2955,13 +3143,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_56_dqpn_err,
V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0);
}
- roce_set_field(context->byte_168_irrl_idx,
- V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
- V2_QPC_BYTE_168_SQ_SHIFT_BAK_S,
- ilog2((unsigned int)hr_qp->sq.wqe_cnt));
- roce_set_field(qpc_mask->byte_168_irrl_idx,
- V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
- V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, 0);
}
static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
@@ -3271,13 +3452,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
* we should set all bits of the relevant fields in context mask to
* 0 at the same time, else set them to 0x1.
*/
- roce_set_field(context->byte_60_qpst_mapid,
- V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M,
- V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S, attr->retry_cnt);
- roce_set_field(qpc_mask->byte_60_qpst_mapid,
- V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M,
- V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S, 0);
-
context->sq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT);
roce_set_field(context->byte_168_irrl_idx,
V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M,
@@ -3538,6 +3712,17 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
memcpy(src_mac, gid_attr->ndev->dev_addr, ETH_ALEN);
}
+ if (is_vlan_dev(gid_attr->ndev)) {
+ roce_set_bit(context->byte_76_srqn_op_en,
+ V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1);
+ roce_set_bit(qpc_mask->byte_76_srqn_op_en,
+ V2_QPC_BYTE_76_RQ_VLAN_EN_S, 0);
+ roce_set_bit(context->byte_168_irrl_idx,
+ V2_QPC_BYTE_168_SQ_VLAN_EN_S, 1);
+ roce_set_bit(qpc_mask->byte_168_irrl_idx,
+ V2_QPC_BYTE_168_SQ_VLAN_EN_S, 0);
+ }
+
roce_set_field(context->byte_24_mtu_tc,
V2_QPC_BYTE_24_VLAN_ID_M,
V2_QPC_BYTE_24_VLAN_ID_S, vlan);
@@ -3584,8 +3769,15 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
V2_QPC_BYTE_24_HOP_LIMIT_M,
V2_QPC_BYTE_24_HOP_LIMIT_S, 0);
- roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
- V2_QPC_BYTE_24_TC_S, grh->traffic_class);
+ if (hr_dev->pci_dev->revision == 0x21 &&
+ gid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ roce_set_field(context->byte_24_mtu_tc,
+ V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S,
+ grh->traffic_class >> 2);
+ else
+ roce_set_field(context->byte_24_mtu_tc,
+ V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S,
+ grh->traffic_class);
roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
V2_QPC_BYTE_24_TC_S, 0);
roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
@@ -3606,9 +3798,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask);
/* Every status migrate must change state */
- roce_set_field(context->byte_60_qpst_mapid, V2_QPC_BYTE_60_QP_ST_M,
+ roce_set_field(context->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M,
V2_QPC_BYTE_60_QP_ST_S, new_state);
- roce_set_field(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_QP_ST_M,
+ roce_set_field(qpc_mask->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M,
V2_QPC_BYTE_60_QP_ST_S, 0);
/* SW pass context to HW */
@@ -3728,7 +3920,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
goto out;
}
- state = roce_get_field(context->byte_60_qpst_mapid,
+ state = roce_get_field(context->byte_60_qpst_tempid,
V2_QPC_BYTE_60_QP_ST_M, V2_QPC_BYTE_60_QP_ST_S);
tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state);
if (tmp_qp_state == -1) {
@@ -3995,13 +4187,103 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
{
struct hns_roce_work *irq_work =
container_of(work, struct hns_roce_work, work);
+ struct device *dev = irq_work->hr_dev->dev;
u32 qpn = irq_work->qpn;
+ u32 cqn = irq_work->cqn;
switch (irq_work->event_type) {
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG:
+ dev_info(dev, "Path migrated succeeded.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
+ dev_warn(dev, "Path migration failed.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_COMM_EST:
+ dev_info(dev, "Communication established.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
+ dev_warn(dev, "Send queue drained.\n");
+ break;
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+ dev_err(dev, "Local work queue catastrophic error.\n");
+ hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
+ switch (irq_work->sub_type) {
+ case HNS_ROCE_LWQCE_QPC_ERROR:
+ dev_err(dev, "QP %d, QPC error.\n", qpn);
+ break;
+ case HNS_ROCE_LWQCE_MTU_ERROR:
+ dev_err(dev, "QP %d, MTU error.\n", qpn);
+ break;
+ case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
+ dev_err(dev, "QP %d, WQE BA addr error.\n", qpn);
+ break;
+ case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
+ dev_err(dev, "QP %d, WQE addr error.\n", qpn);
+ break;
+ case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
+ dev_err(dev, "QP %d, WQE shift error.\n", qpn);
+ break;
+ default:
+ dev_err(dev, "Unhandled sub_event type %d.\n",
+ irq_work->sub_type);
+ break;
+ }
+ break;
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
+ dev_err(dev, "Invalid request local work queue error.\n");
+ hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
+ break;
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+ dev_err(dev, "Local access violation work queue error.\n");
hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
+ switch (irq_work->sub_type) {
+ case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
+ dev_err(dev, "QP %d, R_key violation.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_LENGTH_ERROR:
+ dev_err(dev, "QP %d, length error.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_VA_ERROR:
+ dev_err(dev, "QP %d, VA error.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_PD_ERROR:
+ dev_err(dev, "QP %d, PD error.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
+ dev_err(dev, "QP %d, rw acc error.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
+ dev_err(dev, "QP %d, key state error.\n", qpn);
+ break;
+ case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
+ dev_err(dev, "QP %d, MR operation error.\n", qpn);
+ break;
+ default:
+ dev_err(dev, "Unhandled sub_event type %d.\n",
+ irq_work->sub_type);
+ break;
+ }
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
+ dev_warn(dev, "SRQ limit reach.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
+ dev_warn(dev, "SRQ last wqe reach.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
+ dev_err(dev, "SRQ catas error.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
+ dev_err(dev, "CQ 0x%x access err.\n", cqn);
+ break;
+ case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
+ dev_warn(dev, "CQ 0x%x overflow\n", cqn);
+ break;
+ case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
+ dev_warn(dev, "DB overflow.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_FLR:
+ dev_warn(dev, "Function level reset.\n");
break;
default:
break;
@@ -4011,7 +4293,8 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
}
static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq, u32 qpn)
+ struct hns_roce_eq *eq,
+ u32 qpn, u32 cqn)
{
struct hns_roce_work *irq_work;
@@ -4022,6 +4305,7 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle);
irq_work->hr_dev = hr_dev;
irq_work->qpn = qpn;
+ irq_work->cqn = cqn;
irq_work->event_type = eq->event_type;
irq_work->sub_type = eq->sub_type;
queue_work(hr_dev->irq_workq, &(irq_work->work));
@@ -4058,124 +4342,6 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
hns_roce_write64_k(doorbell, eq->doorbell);
}
-static void hns_roce_v2_wq_catas_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe,
- u32 qpn)
-{
- struct device *dev = hr_dev->dev;
- int sub_type;
-
- dev_warn(dev, "Local work queue catastrophic error.\n");
- sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M,
- HNS_ROCE_V2_AEQE_SUB_TYPE_S);
- switch (sub_type) {
- case HNS_ROCE_LWQCE_QPC_ERROR:
- dev_warn(dev, "QP %d, QPC error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_MTU_ERROR:
- dev_warn(dev, "QP %d, MTU error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
- dev_warn(dev, "QP %d, WQE BA addr error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
- dev_warn(dev, "QP %d, WQE addr error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
- dev_warn(dev, "QP %d, WQE shift error.\n", qpn);
- break;
- default:
- dev_err(dev, "Unhandled sub_event type %d.\n", sub_type);
- break;
- }
-}
-
-static void hns_roce_v2_local_wq_access_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe, u32 qpn)
-{
- struct device *dev = hr_dev->dev;
- int sub_type;
-
- dev_warn(dev, "Local access violation work queue error.\n");
- sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M,
- HNS_ROCE_V2_AEQE_SUB_TYPE_S);
- switch (sub_type) {
- case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
- dev_warn(dev, "QP %d, R_key violation.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_LENGTH_ERROR:
- dev_warn(dev, "QP %d, length error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_VA_ERROR:
- dev_warn(dev, "QP %d, VA error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_PD_ERROR:
- dev_err(dev, "QP %d, PD error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
- dev_warn(dev, "QP %d, rw acc error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
- dev_warn(dev, "QP %d, key state error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
- dev_warn(dev, "QP %d, MR operation error.\n", qpn);
- break;
- default:
- dev_err(dev, "Unhandled sub_event type %d.\n", sub_type);
- break;
- }
-}
-
-static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe,
- int event_type, u32 qpn)
-{
- struct device *dev = hr_dev->dev;
-
- switch (event_type) {
- case HNS_ROCE_EVENT_TYPE_COMM_EST:
- dev_warn(dev, "Communication established.\n");
- break;
- case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
- dev_warn(dev, "Send queue drained.\n");
- break;
- case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
- hns_roce_v2_wq_catas_err_handle(hr_dev, aeqe, qpn);
- break;
- case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
- dev_warn(dev, "Invalid request local work queue error.\n");
- break;
- case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
- hns_roce_v2_local_wq_access_err_handle(hr_dev, aeqe, qpn);
- break;
- default:
- break;
- }
-
- hns_roce_qp_event(hr_dev, qpn, event_type);
-}
-
-static void hns_roce_v2_cq_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe,
- int event_type, u32 cqn)
-{
- struct device *dev = hr_dev->dev;
-
- switch (event_type) {
- case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
- dev_warn(dev, "CQ 0x%x access err.\n", cqn);
- break;
- case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
- dev_warn(dev, "CQ 0x%x overflow\n", cqn);
- break;
- default:
- break;
- }
-
- hns_roce_cq_event(hr_dev, cqn, event_type);
-}
-
static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry)
{
u32 buf_chk_sz;
@@ -4251,31 +4417,23 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
switch (event_type) {
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
- dev_warn(dev, "Path migrated succeeded.\n");
- break;
case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
- dev_warn(dev, "Path migration failed.\n");
- break;
case HNS_ROCE_EVENT_TYPE_COMM_EST:
case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
- hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type,
- qpn);
+ hns_roce_qp_event(hr_dev, qpn, event_type);
break;
case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
- dev_warn(dev, "SRQ not support.\n");
break;
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
- hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type,
- cqn);
+ hns_roce_cq_event(hr_dev, cqn, event_type);
break;
case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
- dev_warn(dev, "DB overflow.\n");
break;
case HNS_ROCE_EVENT_TYPE_MB:
hns_roce_cmd_event(hr_dev,
@@ -4284,10 +4442,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
le64_to_cpu(aeqe->event.cmd.out_param));
break;
case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW:
- dev_warn(dev, "CEQ overflow.\n");
break;
case HNS_ROCE_EVENT_TYPE_FLR:
- dev_warn(dev, "Function level reset.\n");
break;
default:
dev_err(dev, "Unhandled event %d on EQ %d at idx %u.\n",
@@ -4304,7 +4460,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
dev_warn(dev, "cons_index overflow, set back to 0.\n");
eq->cons_index = 0;
}
- hns_roce_v2_init_irq_work(hr_dev, eq, qpn);
+ hns_roce_v2_init_irq_work(hr_dev, eq, qpn, cqn);
}
set_eq_cons_index_v2(eq);
@@ -5125,6 +5281,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
create_singlethread_workqueue("hns_roce_irq_workqueue");
if (!hr_dev->irq_workq) {
dev_err(dev, "Create irq workqueue failed!\n");
+ ret = -ENOMEM;
goto err_request_irq_fail;
}
@@ -5195,6 +5352,8 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
.set_mac = hns_roce_v2_set_mac,
.write_mtpt = hns_roce_v2_write_mtpt,
.rereg_write_mtpt = hns_roce_v2_rereg_write_mtpt,
+ .frmr_write_mtpt = hns_roce_v2_frmr_write_mtpt,
+ .mw_write_mtpt = hns_roce_v2_mw_write_mtpt,
.write_cqc = hns_roce_v2_write_cqc,
.set_hem = hns_roce_v2_set_hem,
.clear_hem = hns_roce_v2_clear_hem,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 14aa308befef..8bc820635bbd 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -50,6 +50,7 @@
#define HNS_ROCE_V2_MAX_CQE_NUM 0x10000
#define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0x100
#define HNS_ROCE_V2_MAX_SQ_SGE_NUM 0xff
+#define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000
#define HNS_ROCE_V2_MAX_SQ_INLINE 0x20
#define HNS_ROCE_V2_UAR_NUM 256
#define HNS_ROCE_V2_PHY_UAR_NUM 1
@@ -78,6 +79,7 @@
#define HNS_ROCE_INVALID_LKEY 0x100
#define HNS_ROCE_CMQ_TX_TIMEOUT 30000
#define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2
+#define HNS_ROCE_V2_RSV_QPS 8
#define HNS_ROCE_CONTEXT_HOP_NUM 1
#define HNS_ROCE_MTT_HOP_NUM 1
@@ -201,6 +203,7 @@ enum {
/* CMQ command */
enum hns_roce_opcode_type {
+ HNS_QUERY_FW_VER = 0x0001,
HNS_ROCE_OPC_QUERY_HW_VER = 0x8000,
HNS_ROCE_OPC_CFG_GLOBAL_PARAM = 0x8001,
HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004,
@@ -324,6 +327,7 @@ struct hns_roce_v2_cq_context {
enum{
V2_MPT_ST_VALID = 0x1,
+ V2_MPT_ST_FREE = 0x2,
};
enum hns_roce_v2_qp_state {
@@ -350,7 +354,7 @@ struct hns_roce_v2_qp_context {
__le32 dmac;
__le32 byte_52_udpspn_dmac;
__le32 byte_56_dqpn_err;
- __le32 byte_60_qpst_mapid;
+ __le32 byte_60_qpst_tempid;
__le32 qkey_xrcd;
__le32 byte_68_rq_db;
__le32 rq_db_record_addr;
@@ -492,26 +496,15 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_56_LP_PKTN_INI_S 28
#define V2_QPC_BYTE_56_LP_PKTN_INI_M GENMASK(31, 28)
-#define V2_QPC_BYTE_60_MAPID_S 0
-#define V2_QPC_BYTE_60_MAPID_M GENMASK(12, 0)
+#define V2_QPC_BYTE_60_TEMPID_S 0
+#define V2_QPC_BYTE_60_TEMPID_M GENMASK(7, 0)
-#define V2_QPC_BYTE_60_INNER_MAP_IND_S 13
+#define V2_QPC_BYTE_60_SCC_TOKEN_S 8
+#define V2_QPC_BYTE_60_SCC_TOKEN_M GENMASK(26, 8)
-#define V2_QPC_BYTE_60_SQ_MAP_IND_S 14
+#define V2_QPC_BYTE_60_SQ_DB_DOING_S 27
-#define V2_QPC_BYTE_60_RQ_MAP_IND_S 15
-
-#define V2_QPC_BYTE_60_TEMPID_S 16
-#define V2_QPC_BYTE_60_TEMPID_M GENMASK(22, 16)
-
-#define V2_QPC_BYTE_60_EXT_MAP_IND_S 23
-
-#define V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S 24
-#define V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M GENMASK(26, 24)
-
-#define V2_QPC_BYTE_60_SQ_RLS_IND_S 27
-
-#define V2_QPC_BYTE_60_SQ_EXT_IND_S 28
+#define V2_QPC_BYTE_60_RQ_DB_DOING_S 28
#define V2_QPC_BYTE_60_QP_ST_S 29
#define V2_QPC_BYTE_60_QP_ST_M GENMASK(31, 29)
@@ -534,6 +527,7 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_76_RQIE_S 28
+#define V2_QPC_BYTE_76_RQ_VLAN_EN_S 30
#define V2_QPC_BYTE_80_RX_CQN_S 0
#define V2_QPC_BYTE_80_RX_CQN_M GENMASK(23, 0)
@@ -588,7 +582,7 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_140_RR_MAX_S 12
#define V2_QPC_BYTE_140_RR_MAX_M GENMASK(14, 12)
-#define V2_QPC_BYTE_140_RSVD_RAQ_MAP_S 15
+#define V2_QPC_BYTE_140_RQ_RTY_WAIT_DO_S 15
#define V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S 16
#define V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M GENMASK(23, 16)
@@ -599,8 +593,6 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S 0
#define V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M GENMASK(23, 0)
-#define V2_QPC_BYTE_144_RAQ_RTY_INI_IND_S 24
-
#define V2_QPC_BYTE_144_RAQ_CREDIT_S 25
#define V2_QPC_BYTE_144_RAQ_CREDIT_M GENMASK(29, 25)
@@ -637,9 +629,10 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_168_LP_SGEN_INI_S 22
#define V2_QPC_BYTE_168_LP_SGEN_INI_M GENMASK(23, 22)
-#define V2_QPC_BYTE_168_SQ_SHIFT_BAK_S 24
-#define V2_QPC_BYTE_168_SQ_SHIFT_BAK_M GENMASK(27, 24)
-
+#define V2_QPC_BYTE_168_SQ_VLAN_EN_S 24
+#define V2_QPC_BYTE_168_POLL_DB_WAIT_DO_S 25
+#define V2_QPC_BYTE_168_SCC_TOKEN_FORBID_SQ_DEQ_S 26
+#define V2_QPC_BYTE_168_WAIT_ACK_TIMEOUT_S 27
#define V2_QPC_BYTE_168_IRRL_IDX_LSB_S 28
#define V2_QPC_BYTE_168_IRRL_IDX_LSB_M GENMASK(31, 28)
@@ -725,6 +718,10 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_232_IRRL_SGE_IDX_S 20
#define V2_QPC_BYTE_232_IRRL_SGE_IDX_M GENMASK(28, 20)
+#define V2_QPC_BYTE_232_SO_LP_VLD_S 29
+#define V2_QPC_BYTE_232_FENCE_LP_VLD_S 30
+#define V2_QPC_BYTE_232_IRRL_LP_VLD_S 31
+
#define V2_QPC_BYTE_240_IRRL_TAIL_REAL_S 0
#define V2_QPC_BYTE_240_IRRL_TAIL_REAL_M GENMASK(7, 0)
@@ -743,6 +740,9 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_244_RNR_CNT_S 27
#define V2_QPC_BYTE_244_RNR_CNT_M GENMASK(29, 27)
+#define V2_QPC_BYTE_244_LCL_OP_FLG_S 30
+#define V2_QPC_BYTE_244_IRRL_RD_FLG_S 31
+
#define V2_QPC_BYTE_248_IRRL_PSN_S 0
#define V2_QPC_BYTE_248_IRRL_PSN_M GENMASK(23, 0)
@@ -818,6 +818,11 @@ struct hns_roce_v2_cqe {
#define V2_CQE_BYTE_28_PORT_TYPE_S 16
#define V2_CQE_BYTE_28_PORT_TYPE_M GENMASK(17, 16)
+#define V2_CQE_BYTE_28_VID_S 18
+#define V2_CQE_BYTE_28_VID_M GENMASK(29, 18)
+
+#define V2_CQE_BYTE_28_VID_VLD_S 30
+
#define V2_CQE_BYTE_32_RMT_QPN_S 0
#define V2_CQE_BYTE_32_RMT_QPN_M GENMASK(23, 0)
@@ -878,8 +883,19 @@ struct hns_roce_v2_mpt_entry {
#define V2_MPT_BYTE_8_LW_EN_S 7
+#define V2_MPT_BYTE_8_MW_CNT_S 8
+#define V2_MPT_BYTE_8_MW_CNT_M GENMASK(31, 8)
+
+#define V2_MPT_BYTE_12_FRE_S 0
+
#define V2_MPT_BYTE_12_PA_S 1
+#define V2_MPT_BYTE_12_MR_MW_S 4
+
+#define V2_MPT_BYTE_12_BPD_S 5
+
+#define V2_MPT_BYTE_12_BQP_S 6
+
#define V2_MPT_BYTE_12_INNER_PA_VLD_S 7
#define V2_MPT_BYTE_12_MW_BIND_QPN_S 8
@@ -988,6 +1004,8 @@ struct hns_roce_v2_ud_send_wqe {
#define V2_UD_SEND_WQE_BYTE_40_PORTN_S 24
#define V2_UD_SEND_WQE_BYTE_40_PORTN_M GENMASK(26, 24)
+#define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30
+
#define V2_UD_SEND_WQE_BYTE_40_LBI_S 31
#define V2_UD_SEND_WQE_DMAC_0_S 0
@@ -1042,6 +1060,16 @@ struct hns_roce_v2_rc_send_wqe {
#define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12
+#define V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S 19
+
+#define V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S 20
+
+#define V2_RC_FRMR_WQE_BYTE_4_RR_S 21
+
+#define V2_RC_FRMR_WQE_BYTE_4_RW_S 22
+
+#define V2_RC_FRMR_WQE_BYTE_4_LW_S 23
+
#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0
#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0)
@@ -1051,6 +1079,16 @@ struct hns_roce_v2_rc_send_wqe {
#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
+struct hns_roce_wqe_frmr_seg {
+ __le32 pbl_size;
+ __le32 mode_buf_pg_sz;
+};
+
+#define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S 4
+#define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M GENMASK(7, 4)
+
+#define V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S 8
+
struct hns_roce_v2_wqe_data_seg {
__le32 len;
__le32 lkey;
@@ -1068,6 +1106,11 @@ struct hns_roce_query_version {
__le32 rsv[5];
};
+struct hns_roce_query_fw_info {
+ __le32 fw_ver;
+ __le32 rsv[5];
+};
+
struct hns_roce_cfg_llm_a {
__le32 base_addr_l;
__le32 base_addr_h;
@@ -1564,4 +1607,9 @@ struct hns_roce_eq_context {
#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0
#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0)
+struct hns_roce_wqe_atomic_seg {
+ __le64 fetchadd_swap_data;
+ __le64 cmp_data;
+};
+
#endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index c5cae9a38c04..1b3ee514f2ef 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -196,6 +196,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
memset(props, 0, sizeof(*props));
+ props->fw_ver = hr_dev->caps.fw_ver;
props->sys_image_guid = cpu_to_be64(hr_dev->sys_image_guid);
props->max_mr_size = (u64)(~(0ULL));
props->page_size_cap = hr_dev->caps.page_size_cap;
@@ -215,7 +216,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
props->max_pd = hr_dev->caps.num_pds;
props->max_qp_rd_atom = hr_dev->caps.max_qp_dest_rdma;
props->max_qp_init_rd_atom = hr_dev->caps.max_qp_init_rdma;
- props->atomic_cap = IB_ATOMIC_NONE;
+ props->atomic_cap = hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_ATOMIC ?
+ IB_ATOMIC_HCA : IB_ATOMIC_NONE;
props->max_pkeys = 1;
props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
@@ -344,8 +346,6 @@ static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev,
if (ret)
goto error_fail_uar_alloc;
- INIT_LIST_HEAD(&context->vma_list);
- mutex_init(&context->vma_list_mutex);
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) {
INIT_LIST_HEAD(&context->page_list);
mutex_init(&context->page_mutex);
@@ -376,76 +376,34 @@ static int hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
return 0;
}
-static void hns_roce_vma_open(struct vm_area_struct *vma)
-{
- vma->vm_ops = NULL;
-}
-
-static void hns_roce_vma_close(struct vm_area_struct *vma)
-{
- struct hns_roce_vma_data *vma_data;
-
- vma_data = (struct hns_roce_vma_data *)vma->vm_private_data;
- vma_data->vma = NULL;
- mutex_lock(vma_data->vma_list_mutex);
- list_del(&vma_data->list);
- mutex_unlock(vma_data->vma_list_mutex);
- kfree(vma_data);
-}
-
-static const struct vm_operations_struct hns_roce_vm_ops = {
- .open = hns_roce_vma_open,
- .close = hns_roce_vma_close,
-};
-
-static int hns_roce_set_vma_data(struct vm_area_struct *vma,
- struct hns_roce_ucontext *context)
-{
- struct list_head *vma_head = &context->vma_list;
- struct hns_roce_vma_data *vma_data;
-
- vma_data = kzalloc(sizeof(*vma_data), GFP_KERNEL);
- if (!vma_data)
- return -ENOMEM;
-
- vma_data->vma = vma;
- vma_data->vma_list_mutex = &context->vma_list_mutex;
- vma->vm_private_data = vma_data;
- vma->vm_ops = &hns_roce_vm_ops;
-
- mutex_lock(&context->vma_list_mutex);
- list_add(&vma_data->list, vma_head);
- mutex_unlock(&context->vma_list_mutex);
-
- return 0;
-}
-
static int hns_roce_mmap(struct ib_ucontext *context,
struct vm_area_struct *vma)
{
struct hns_roce_dev *hr_dev = to_hr_dev(context->device);
- if (((vma->vm_end - vma->vm_start) % PAGE_SIZE) != 0)
- return -EINVAL;
+ switch (vma->vm_pgoff) {
+ case 0:
+ return rdma_user_mmap_io(context, vma,
+ to_hr_ucontext(context)->uar.pfn,
+ PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot));
+
+ /* vm_pgoff: 1 -- TPTR */
+ case 1:
+ if (!hr_dev->tptr_dma_addr || !hr_dev->tptr_size)
+ return -EINVAL;
+ /*
+ * FIXME: using io_remap_pfn_range on the dma address returned
+ * by dma_alloc_coherent is totally wrong.
+ */
+ return rdma_user_mmap_io(context, vma,
+ hr_dev->tptr_dma_addr >> PAGE_SHIFT,
+ hr_dev->tptr_size,
+ vma->vm_page_prot);
- if (vma->vm_pgoff == 0) {
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- if (io_remap_pfn_range(vma, vma->vm_start,
- to_hr_ucontext(context)->uar.pfn,
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
- } else if (vma->vm_pgoff == 1 && hr_dev->tptr_dma_addr &&
- hr_dev->tptr_size) {
- /* vm_pgoff: 1 -- TPTR */
- if (io_remap_pfn_range(vma, vma->vm_start,
- hr_dev->tptr_dma_addr >> PAGE_SHIFT,
- hr_dev->tptr_size,
- vma->vm_page_prot))
- return -EAGAIN;
- } else
+ default:
return -EINVAL;
-
- return hns_roce_set_vma_data(vma, to_hr_ucontext(context));
+ }
}
static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
@@ -471,21 +429,6 @@ static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
static void hns_roce_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
- struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
- struct hns_roce_vma_data *vma_data, *n;
- struct vm_area_struct *vma;
-
- mutex_lock(&context->vma_list_mutex);
- list_for_each_entry_safe(vma_data, n, &context->vma_list, list) {
- vma = vma_data->vma;
- zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE);
-
- vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
- vma->vm_ops = NULL;
- list_del(&vma_data->list);
- kfree(vma_data);
- }
- mutex_unlock(&context->vma_list_mutex);
}
static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
@@ -508,7 +451,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
spin_lock_init(&iboe->lock);
ib_dev = &hr_dev->ib_dev;
- strlcpy(ib_dev->name, "hns_%d", IB_DEVICE_NAME_MAX);
ib_dev->owner = THIS_MODULE;
ib_dev->node_type = RDMA_NODE_IB_CA;
@@ -584,12 +526,27 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR);
}
+ /* MW */
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) {
+ ib_dev->alloc_mw = hns_roce_alloc_mw;
+ ib_dev->dealloc_mw = hns_roce_dealloc_mw;
+ ib_dev->uverbs_cmd_mask |=
+ (1ULL << IB_USER_VERBS_CMD_ALLOC_MW) |
+ (1ULL << IB_USER_VERBS_CMD_DEALLOC_MW);
+ }
+
+ /* FRMR */
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) {
+ ib_dev->alloc_mr = hns_roce_alloc_mr;
+ ib_dev->map_mr_sg = hns_roce_map_mr_sg;
+ }
+
/* OTHERS */
ib_dev->get_port_immutable = hns_roce_port_immutable;
ib_dev->disassociate_ucontext = hns_roce_disassociate_ucontext;
ib_dev->driver_id = RDMA_DRIVER_HNS;
- ret = ib_register_device(ib_dev, NULL);
+ ret = ib_register_device(ib_dev, "hns_%d", NULL);
if (ret) {
dev_err(dev, "ib_register_device failed!\n");
return ret;
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index eb26a5f6fc58..521ad2aa3a4e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -329,7 +329,7 @@ static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
u64 bt_idx;
u64 size;
- mhop_num = hr_dev->caps.pbl_hop_num;
+ mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num);
pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
@@ -351,7 +351,7 @@ static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
mr->pbl_size = npages;
mr->pbl_ba = mr->pbl_dma_addr;
- mr->pbl_hop_num = hr_dev->caps.pbl_hop_num;
+ mr->pbl_hop_num = mhop_num;
mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
return 0;
@@ -511,7 +511,6 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
mr->key = hw_index_to_key(index); /* MR key */
if (size == ~0ull) {
- mr->type = MR_TYPE_DMA;
mr->pbl_buf = NULL;
mr->pbl_dma_addr = 0;
/* PBL multi-hop addressing parameters */
@@ -522,7 +521,6 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
mr->pbl_l1_dma_addr = NULL;
mr->pbl_l0_dma_addr = 0;
} else {
- mr->type = MR_TYPE_MR;
if (!hr_dev->caps.pbl_hop_num) {
mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
&(mr->pbl_dma_addr),
@@ -548,9 +546,9 @@ static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev,
u32 mhop_num;
u64 bt_idx;
- npages = ib_umem_page_count(mr->umem);
+ npages = mr->pbl_size;
pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
- mhop_num = hr_dev->caps.pbl_hop_num;
+ mhop_num = (mr->type == MR_TYPE_FRMR) ? 1 : hr_dev->caps.pbl_hop_num;
if (mhop_num == HNS_ROCE_HOP_NUM_0)
return;
@@ -636,7 +634,8 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
}
if (mr->size != ~0ULL) {
- npages = ib_umem_page_count(mr->umem);
+ if (mr->type == MR_TYPE_MR)
+ npages = ib_umem_page_count(mr->umem);
if (!hr_dev->caps.pbl_hop_num)
dma_free_coherent(dev, (unsigned int)(npages * 8),
@@ -674,7 +673,10 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
goto err_table;
}
- ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
+ if (mr->type != MR_TYPE_FRMR)
+ ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
+ else
+ ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr);
if (ret) {
dev_err(dev, "Write mtpt fail!\n");
goto err_page;
@@ -855,6 +857,8 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
if (mr == NULL)
return ERR_PTR(-ENOMEM);
+ mr->type = MR_TYPE_DMA;
+
/* Allocate memory region key */
ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0,
~0ULL, acc, 0, mr);
@@ -1031,6 +1035,8 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
}
+ mr->type = MR_TYPE_MR;
+
ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length,
access_flags, n, mr);
if (ret)
@@ -1201,3 +1207,193 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr)
return ret;
}
+
+struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+ struct device *dev = hr_dev->dev;
+ struct hns_roce_mr *mr;
+ u64 length;
+ u32 page_size;
+ int ret;
+
+ page_size = 1 << (hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT);
+ length = max_num_sg * page_size;
+
+ if (mr_type != IB_MR_TYPE_MEM_REG)
+ return ERR_PTR(-EINVAL);
+
+ if (max_num_sg > HNS_ROCE_FRMR_MAX_PA) {
+ dev_err(dev, "max_num_sg larger than %d\n",
+ HNS_ROCE_FRMR_MAX_PA);
+ return ERR_PTR(-EINVAL);
+ }
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->type = MR_TYPE_FRMR;
+
+ /* Allocate memory region key */
+ ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, 0, length,
+ 0, max_num_sg, mr);
+ if (ret)
+ goto err_free;
+
+ ret = hns_roce_mr_enable(hr_dev, mr);
+ if (ret)
+ goto err_mr;
+
+ mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
+ mr->umem = NULL;
+
+ return &mr->ibmr;
+
+err_mr:
+ hns_roce_mr_free(to_hr_dev(pd->device), mr);
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(ret);
+}
+
+static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
+{
+ struct hns_roce_mr *mr = to_hr_mr(ibmr);
+
+ mr->pbl_buf[mr->npages++] = cpu_to_le64(addr);
+
+ return 0;
+}
+
+int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset)
+{
+ struct hns_roce_mr *mr = to_hr_mr(ibmr);
+
+ mr->npages = 0;
+
+ return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
+}
+
+static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mw *mw)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ if (mw->enabled) {
+ ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mw->rkey)
+ & (hr_dev->caps.num_mtpts - 1));
+ if (ret)
+ dev_warn(dev, "MW HW2SW_MPT failed (%d)\n", ret);
+
+ hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
+ key_to_hw_index(mw->rkey));
+ }
+
+ hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
+ key_to_hw_index(mw->rkey), BITMAP_NO_RR);
+}
+
+static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mw *mw)
+{
+ struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
+ struct hns_roce_cmd_mailbox *mailbox;
+ struct device *dev = hr_dev->dev;
+ unsigned long mtpt_idx = key_to_hw_index(mw->rkey);
+ int ret;
+
+ /* prepare HEM entry memory */
+ ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
+ if (ret)
+ return ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox)) {
+ ret = PTR_ERR(mailbox);
+ goto err_table;
+ }
+
+ ret = hr_dev->hw->mw_write_mtpt(mailbox->buf, mw);
+ if (ret) {
+ dev_err(dev, "MW write mtpt fail!\n");
+ goto err_page;
+ }
+
+ ret = hns_roce_sw2hw_mpt(hr_dev, mailbox,
+ mtpt_idx & (hr_dev->caps.num_mtpts - 1));
+ if (ret) {
+ dev_err(dev, "MW sw2hw_mpt failed (%d)\n", ret);
+ goto err_page;
+ }
+
+ mw->enabled = 1;
+
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return 0;
+
+err_page:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+err_table:
+ hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
+
+ return ret;
+}
+
+struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_pd->device);
+ struct hns_roce_mw *mw;
+ unsigned long index = 0;
+ int ret;
+
+ mw = kmalloc(sizeof(*mw), GFP_KERNEL);
+ if (!mw)
+ return ERR_PTR(-ENOMEM);
+
+ /* Allocate a key for mw from bitmap */
+ ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
+ if (ret)
+ goto err_bitmap;
+
+ mw->rkey = hw_index_to_key(index);
+
+ mw->ibmw.rkey = mw->rkey;
+ mw->ibmw.type = type;
+ mw->pdn = to_hr_pd(ib_pd)->pdn;
+ mw->pbl_hop_num = hr_dev->caps.pbl_hop_num;
+ mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
+ mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
+
+ ret = hns_roce_mw_enable(hr_dev, mw);
+ if (ret)
+ goto err_mw;
+
+ return &mw->ibmw;
+
+err_mw:
+ hns_roce_mw_free(hr_dev, mw);
+
+err_bitmap:
+ kfree(mw);
+
+ return ERR_PTR(ret);
+}
+
+int hns_roce_dealloc_mw(struct ib_mw *ibmw)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
+ struct hns_roce_mw *mw = to_hr_mw(ibmw);
+
+ hns_roce_mw_free(hr_dev, mw);
+ kfree(mw);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index efb7e961ca65..5ebf481a39d9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -31,6 +31,7 @@
* SOFTWARE.
*/
+#include <linux/pci.h>
#include <linux/platform_device.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_umem.h>
@@ -343,6 +344,7 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
{
u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz);
u8 max_sq_stride = ilog2(roundup_sq_stride);
+ u32 ex_sge_num;
u32 page_size;
u32 max_cnt;
@@ -372,7 +374,18 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
if (hr_qp->sq.max_gs > 2)
hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
(hr_qp->sq.max_gs - 2));
+
+ if ((hr_qp->sq.max_gs > 2) && (hr_dev->pci_dev->revision == 0x20)) {
+ if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
+ dev_err(hr_dev->dev,
+ "The extended sge cnt error! sge_cnt=%d\n",
+ hr_qp->sge.sge_cnt);
+ return -EINVAL;
+ }
+ }
+
hr_qp->sge.sge_shift = 4;
+ ex_sge_num = hr_qp->sge.sge_cnt;
/* Get buf size, SQ and RQ are aligned to page_szie */
if (hr_dev->caps.max_sq_sg <= 2) {
@@ -386,6 +399,8 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
hr_qp->sq.wqe_shift), PAGE_SIZE);
} else {
page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
+ hr_qp->sge.sge_cnt =
+ max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num);
hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt <<
hr_qp->rq.wqe_shift), page_size) +
HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt <<
@@ -394,7 +409,7 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
hr_qp->sq.wqe_shift), page_size);
hr_qp->sq.offset = 0;
- if (hr_qp->sge.sge_cnt) {
+ if (ex_sge_num) {
hr_qp->sge.offset = HNS_ROCE_ALOGN_UP(
(hr_qp->sq.wqe_cnt <<
hr_qp->sq.wqe_shift),
@@ -465,6 +480,14 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
hr_qp->sge.sge_shift = 4;
}
+ if ((hr_qp->sq.max_gs > 2) && hr_dev->pci_dev->revision == 0x20) {
+ if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
+ dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n",
+ hr_qp->sge.sge_cnt);
+ return -EINVAL;
+ }
+ }
+
/* Get buf size, SQ and RQ are aligned to PAGE_SIZE */
page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
hr_qp->sq.offset = 0;
@@ -472,6 +495,8 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
page_size);
if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) {
+ hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift),
+ (u32)hr_qp->sge.sge_cnt);
hr_qp->sge.offset = size;
size += HNS_ROCE_ALOGN_UP(hr_qp->sge.sge_cnt <<
hr_qp->sge.sge_shift, page_size);
@@ -952,8 +977,8 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
}
}
- if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
- IB_LINK_LAYER_ETHERNET)) {
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
+ attr_mask)) {
dev_err(dev, "ib_modify_qp_is_ok failed\n");
goto out;
}
@@ -1106,14 +1131,20 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
{
struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
int reserved_from_top = 0;
+ int reserved_from_bot;
int ret;
spin_lock_init(&qp_table->lock);
INIT_RADIX_TREE(&hr_dev->qp_table_tree, GFP_ATOMIC);
- /* A port include two SQP, six port total 12 */
+ /* In hw v1, a port include two SQP, six ports total 12 */
+ if (hr_dev->caps.max_sq_sg <= 2)
+ reserved_from_bot = SQP_NUM;
+ else
+ reserved_from_bot = hr_dev->caps.reserved_qps;
+
ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps,
- hr_dev->caps.num_qps - 1, SQP_NUM,
+ hr_dev->caps.num_qps - 1, reserved_from_bot,
reserved_from_top);
if (ret) {
dev_err(hr_dev->dev, "qp bitmap init failed!error=%d\n",
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 423818a7d333..771eb6bd0785 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -1689,7 +1689,7 @@ static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev,
unsigned long flags;
rtnl_lock();
- for_each_netdev_rcu(&init_net, ip_dev) {
+ for_each_netdev(&init_net, ip_dev) {
if ((((rdma_vlan_dev_vlan_id(ip_dev) < I40IW_NO_VLAN) &&
(rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev)) ||
(ip_dev == iwdev->netdev)) && (ip_dev->flags & IFF_UP)) {
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index e2e6c74a7452..102875872bea 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -2135,10 +2135,10 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr)
}
/**
- * i40iw_show_rev
+ * hw_rev_show
*/
-static ssize_t i40iw_show_rev(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t hw_rev_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct i40iw_ib_device *iwibdev = container_of(dev,
struct i40iw_ib_device,
@@ -2147,34 +2147,37 @@ static ssize_t i40iw_show_rev(struct device *dev,
return sprintf(buf, "%x\n", hw_rev);
}
+static DEVICE_ATTR_RO(hw_rev);
/**
- * i40iw_show_hca
+ * hca_type_show
*/
-static ssize_t i40iw_show_hca(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t hca_type_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
return sprintf(buf, "I40IW\n");
}
+static DEVICE_ATTR_RO(hca_type);
/**
- * i40iw_show_board
+ * board_id_show
*/
-static ssize_t i40iw_show_board(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%.*s\n", 32, "I40IW Board ID");
}
+static DEVICE_ATTR_RO(board_id);
-static DEVICE_ATTR(hw_rev, S_IRUGO, i40iw_show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, i40iw_show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, i40iw_show_board, NULL);
+static struct attribute *i40iw_dev_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ NULL
+};
-static struct device_attribute *i40iw_dev_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id
+static const struct attribute_group i40iw_attr_group = {
+ .attrs = i40iw_dev_attributes,
};
/**
@@ -2752,7 +2755,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
i40iw_pr_err("iwdev == NULL\n");
return NULL;
}
- strlcpy(iwibdev->ibdev.name, "i40iw%d", IB_DEVICE_NAME_MAX);
iwibdev->ibdev.owner = THIS_MODULE;
iwdev->iwibdev = iwibdev;
iwibdev->iwdev = iwdev;
@@ -2851,20 +2853,6 @@ void i40iw_port_ibevent(struct i40iw_device *iwdev)
}
/**
- * i40iw_unregister_rdma_device - unregister of iwarp from IB
- * @iwibdev: rdma device ptr
- */
-static void i40iw_unregister_rdma_device(struct i40iw_ib_device *iwibdev)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i)
- device_remove_file(&iwibdev->ibdev.dev,
- i40iw_dev_attributes[i]);
- ib_unregister_device(&iwibdev->ibdev);
-}
-
-/**
* i40iw_destroy_rdma_device - destroy rdma device and free resources
* @iwibdev: IB device ptr
*/
@@ -2873,7 +2861,7 @@ void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev)
if (!iwibdev)
return;
- i40iw_unregister_rdma_device(iwibdev);
+ ib_unregister_device(&iwibdev->ibdev);
kfree(iwibdev->ibdev.iwcm);
iwibdev->ibdev.iwcm = NULL;
wait_event_timeout(iwibdev->iwdev->close_wq,
@@ -2888,32 +2876,19 @@ void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev)
*/
int i40iw_register_rdma_device(struct i40iw_device *iwdev)
{
- int i, ret;
+ int ret;
struct i40iw_ib_device *iwibdev;
iwdev->iwibdev = i40iw_init_rdma_device(iwdev);
if (!iwdev->iwibdev)
return -ENOMEM;
iwibdev = iwdev->iwibdev;
-
+ rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group);
iwibdev->ibdev.driver_id = RDMA_DRIVER_I40IW;
- ret = ib_register_device(&iwibdev->ibdev, NULL);
+ ret = ib_register_device(&iwibdev->ibdev, "i40iw%d", NULL);
if (ret)
goto error;
- for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i) {
- ret =
- device_create_file(&iwibdev->ibdev.dev,
- i40iw_dev_attributes[i]);
- if (ret) {
- while (i > 0) {
- i--;
- device_remove_file(&iwibdev->ibdev.dev, i40iw_dev_attributes[i]);
- }
- ib_unregister_device(&iwibdev->ibdev);
- goto error;
- }
- }
return 0;
error:
kfree(iwdev->iwibdev->ibdev.iwcm);
diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig
index db4aa13ebae0..d1de3285fd88 100644
--- a/drivers/infiniband/hw/mlx4/Kconfig
+++ b/drivers/infiniband/hw/mlx4/Kconfig
@@ -1,6 +1,7 @@
config MLX4_INFINIBAND
tristate "Mellanox ConnectX HCA support"
depends on NETDEVICES && ETHERNET && PCI && INET
+ depends on INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
depends on MAY_USE_DEVLINK
select NET_VENDOR_MELLANOX
select MLX4_CORE
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index e5466d786bb1..8942f5f7f04d 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -807,15 +807,17 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
int err;
struct ib_port_attr pattr;
- if (in_wc && in_wc->qp->qp_num) {
- pr_debug("received MAD: slid:%d sqpn:%d "
- "dlid_bits:%d dqpn:%d wc_flags:0x%x, cls %x, mtd %x, atr %x\n",
- in_wc->slid, in_wc->src_qp,
- in_wc->dlid_path_bits,
- in_wc->qp->qp_num,
- in_wc->wc_flags,
- in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
- be16_to_cpu(in_mad->mad_hdr.attr_id));
+ if (in_wc && in_wc->qp) {
+ pr_debug("received MAD: port:%d slid:%d sqpn:%d "
+ "dlid_bits:%d dqpn:%d wc_flags:0x%x tid:%016llx cls:%x mtd:%x atr:%x\n",
+ port_num,
+ in_wc->slid, in_wc->src_qp,
+ in_wc->dlid_path_bits,
+ in_wc->qp->qp_num,
+ in_wc->wc_flags,
+ be64_to_cpu(in_mad->mad_hdr.tid),
+ in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
+ be16_to_cpu(in_mad->mad_hdr.attr_id));
if (in_wc->wc_flags & IB_WC_GRH) {
pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n",
be64_to_cpu(in_grh->sgid.global.subnet_prefix),
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 0bbeaaae47e0..0def2323459c 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1140,144 +1140,50 @@ static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
return 0;
}
-static void mlx4_ib_vma_open(struct vm_area_struct *area)
-{
- /* vma_open is called when a new VMA is created on top of our VMA.
- * This is done through either mremap flow or split_vma (usually due
- * to mlock, madvise, munmap, etc.). We do not support a clone of the
- * vma, as this VMA is strongly hardware related. Therefore we set the
- * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
- * calling us again and trying to do incorrect actions. We assume that
- * the original vma size is exactly a single page that there will be no
- * "splitting" operations on.
- */
- area->vm_ops = NULL;
-}
-
-static void mlx4_ib_vma_close(struct vm_area_struct *area)
-{
- struct mlx4_ib_vma_private_data *mlx4_ib_vma_priv_data;
-
- /* It's guaranteed that all VMAs opened on a FD are closed before the
- * file itself is closed, therefore no sync is needed with the regular
- * closing flow. (e.g. mlx4_ib_dealloc_ucontext) However need a sync
- * with accessing the vma as part of mlx4_ib_disassociate_ucontext.
- * The close operation is usually called under mm->mmap_sem except when
- * process is exiting. The exiting case is handled explicitly as part
- * of mlx4_ib_disassociate_ucontext.
- */
- mlx4_ib_vma_priv_data = (struct mlx4_ib_vma_private_data *)
- area->vm_private_data;
-
- /* set the vma context pointer to null in the mlx4_ib driver's private
- * data to protect against a race condition in mlx4_ib_dissassociate_ucontext().
- */
- mlx4_ib_vma_priv_data->vma = NULL;
-}
-
-static const struct vm_operations_struct mlx4_ib_vm_ops = {
- .open = mlx4_ib_vma_open,
- .close = mlx4_ib_vma_close
-};
-
static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
- int i;
- struct vm_area_struct *vma;
- struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
-
- /* need to protect from a race on closing the vma as part of
- * mlx4_ib_vma_close().
- */
- for (i = 0; i < HW_BAR_COUNT; i++) {
- vma = context->hw_bar_info[i].vma;
- if (!vma)
- continue;
-
- zap_vma_ptes(context->hw_bar_info[i].vma,
- context->hw_bar_info[i].vma->vm_start, PAGE_SIZE);
-
- context->hw_bar_info[i].vma->vm_flags &=
- ~(VM_SHARED | VM_MAYSHARE);
- /* context going to be destroyed, should not access ops any more */
- context->hw_bar_info[i].vma->vm_ops = NULL;
- }
-}
-
-static void mlx4_ib_set_vma_data(struct vm_area_struct *vma,
- struct mlx4_ib_vma_private_data *vma_private_data)
-{
- vma_private_data->vma = vma;
- vma->vm_private_data = vma_private_data;
- vma->vm_ops = &mlx4_ib_vm_ops;
}
static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
struct mlx4_ib_dev *dev = to_mdev(context->device);
- struct mlx4_ib_ucontext *mucontext = to_mucontext(context);
- if (vma->vm_end - vma->vm_start != PAGE_SIZE)
- return -EINVAL;
-
- if (vma->vm_pgoff == 0) {
- /* We prevent double mmaping on same context */
- if (mucontext->hw_bar_info[HW_BAR_DB].vma)
- return -EINVAL;
-
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
- if (io_remap_pfn_range(vma, vma->vm_start,
- to_mucontext(context)->uar.pfn,
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
-
- mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_DB]);
+ switch (vma->vm_pgoff) {
+ case 0:
+ return rdma_user_mmap_io(context, vma,
+ to_mucontext(context)->uar.pfn,
+ PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot));
- } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
- /* We prevent double mmaping on same context */
- if (mucontext->hw_bar_info[HW_BAR_BF].vma)
+ case 1:
+ if (dev->dev->caps.bf_reg_size == 0)
return -EINVAL;
+ return rdma_user_mmap_io(
+ context, vma,
+ to_mucontext(context)->uar.pfn +
+ dev->dev->caps.num_uars,
+ PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot));
- vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
-
- if (io_remap_pfn_range(vma, vma->vm_start,
- to_mucontext(context)->uar.pfn +
- dev->dev->caps.num_uars,
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
-
- mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_BF]);
-
- } else if (vma->vm_pgoff == 3) {
+ case 3: {
struct mlx4_clock_params params;
int ret;
- /* We prevent double mmaping on same context */
- if (mucontext->hw_bar_info[HW_BAR_CLOCK].vma)
- return -EINVAL;
-
ret = mlx4_get_internal_clock_params(dev->dev, &params);
-
if (ret)
return ret;
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- if (io_remap_pfn_range(vma, vma->vm_start,
- (pci_resource_start(dev->dev->persist->pdev,
- params.bar) +
- params.offset)
- >> PAGE_SHIFT,
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
-
- mlx4_ib_set_vma_data(vma,
- &mucontext->hw_bar_info[HW_BAR_CLOCK]);
- } else {
- return -EINVAL;
+ return rdma_user_mmap_io(
+ context, vma,
+ (pci_resource_start(dev->dev->persist->pdev,
+ params.bar) +
+ params.offset) >>
+ PAGE_SHIFT,
+ PAGE_SIZE, pgprot_noncached(vma->vm_page_prot));
}
- return 0;
+ default:
+ return -EINVAL;
+ }
}
static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
@@ -2133,39 +2039,43 @@ out:
return err;
}
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
}
+static DEVICE_ATTR_RO(hca_type);
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
return sprintf(buf, "%x\n", dev->dev->rev_id);
}
+static DEVICE_ATTR_RO(hw_rev);
-static ssize_t show_board(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
dev->dev->board_id);
}
+static DEVICE_ATTR_RO(board_id);
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+static struct attribute *mlx4_class_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ NULL
+};
-static struct device_attribute *mlx4_class_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id
+static const struct attribute_group mlx4_attr_group = {
+ .attrs = mlx4_class_attributes,
};
struct diag_counter {
@@ -2636,7 +2546,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->dev = dev;
ibdev->bond_next_port = 0;
- strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
ibdev->ib_dev.owner = THIS_MODULE;
ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
@@ -2898,8 +2807,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (mlx4_ib_alloc_diag_counters(ibdev))
goto err_steer_free_bitmap;
+ rdma_set_device_sysfs_group(&ibdev->ib_dev, &mlx4_attr_group);
ibdev->ib_dev.driver_id = RDMA_DRIVER_MLX4;
- if (ib_register_device(&ibdev->ib_dev, NULL))
+ if (ib_register_device(&ibdev->ib_dev, "mlx4_%d", NULL))
goto err_diag_counters;
if (mlx4_ib_mad_init(ibdev))
@@ -2922,12 +2832,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
goto err_notif;
}
- for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
- if (device_create_file(&ibdev->ib_dev.dev,
- mlx4_class_attributes[j]))
- goto err_notif;
- }
-
ibdev->ib_active = true;
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
devlink_port_type_ib_set(mlx4_get_devlink_port(dev, i),
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index 81ffc007e0a1..d844831179cf 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -673,7 +673,7 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work)
if (!list_empty(&group->pending_list))
req = list_first_entry(&group->pending_list,
struct mcast_req, group_list);
- if ((method == IB_MGMT_METHOD_GET_RESP)) {
+ if (method == IB_MGMT_METHOD_GET_RESP) {
if (req) {
send_reply_to_slave(req->func, group, &req->sa_mad, status);
--group->func[req->func].num_pend_reqs;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index e10dccc7958f..8850dfc3826d 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -80,16 +80,11 @@ enum hw_bar_type {
HW_BAR_COUNT
};
-struct mlx4_ib_vma_private_data {
- struct vm_area_struct *vma;
-};
-
struct mlx4_ib_ucontext {
struct ib_ucontext ibucontext;
struct mlx4_uar uar;
struct list_head db_page_list;
struct mutex db_page_mutex;
- struct mlx4_ib_vma_private_data hw_bar_info[HW_BAR_COUNT];
struct list_head wqn_ranges_list;
struct mutex wqn_ranges_mutex; /* protect wqn_ranges_list */
};
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 6dd3cd2c2f80..0711ca1dfb8f 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -2629,7 +2629,6 @@ enum {
static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
- enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
struct mlx4_ib_qp *qp = to_mqp(ibqp);
enum ib_qp_state cur_state, new_state;
@@ -2639,13 +2638,8 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
- if (cur_state != new_state || cur_state != IB_QPS_RESET) {
- int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
- ll = rdma_port_get_link_layer(&dev->ib_dev, port);
- }
-
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
- attr_mask, ll)) {
+ attr_mask)) {
pr_debug("qpn 0x%x: invalid attribute mask specified "
"for transition %d to %d. qp_type %d,"
" attr_mask 0x%x\n",
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index e219093d2764..752bdd536130 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -818,9 +818,7 @@ int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev)
if (!mlx4_is_master(dev->dev))
return 0;
- dev->iov_parent =
- kobject_create_and_add("iov",
- kobject_get(dev->ib_dev.ports_parent->parent));
+ dev->iov_parent = kobject_create_and_add("iov", &dev->ib_dev.dev.kobj);
if (!dev->iov_parent) {
ret = -ENOMEM;
goto err;
@@ -850,7 +848,6 @@ err_add_entries:
err_ports:
kobject_put(dev->iov_parent);
err:
- kobject_put(dev->ib_dev.ports_parent->parent);
pr_err("mlx4_ib_device_register_sysfs error (%d)\n", ret);
return ret;
}
@@ -886,5 +883,4 @@ void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device)
kobject_put(device->ports_parent);
kobject_put(device->iov_parent);
kobject_put(device->iov_parent);
- kobject_put(device->ib_dev.ports_parent->parent);
}
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index c84fef9a8a08..ca060a2e2b36 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -197,3 +197,132 @@ int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out)
return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPCNT,
0, 0);
}
+
+void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(destroy_tir_out)] = {};
+
+ MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR);
+ MLX5_SET(destroy_tir_in, in, tirn, tirn);
+ MLX5_SET(destroy_tir_in, in, uid, uid);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_tis_out)] = {0};
+
+ MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS);
+ MLX5_SET(destroy_tis_in, in, tisn, tisn);
+ MLX5_SET(destroy_tis_in, in, uid, uid);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {};
+
+ MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
+ MLX5_SET(destroy_rqt_in, in, rqtn, rqtn);
+ MLX5_SET(destroy_rqt_in, in, uid, uid);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn,
+ u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {0};
+ int err;
+
+ MLX5_SET(alloc_transport_domain_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *tdn = MLX5_GET(alloc_transport_domain_out, out,
+ transport_domain);
+
+ return err;
+}
+
+void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn,
+ u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)] = {0};
+
+ MLX5_SET(dealloc_transport_domain_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
+ MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid)
+{
+ u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {};
+
+ MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD);
+ MLX5_SET(dealloc_pd_in, in, pd, pdn);
+ MLX5_SET(dealloc_pd_in, in, uid, uid);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
+ u32 qpn, u16 uid)
+{
+ u32 out[MLX5_ST_SZ_DW(attach_to_mcg_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {};
+ void *gid;
+
+ MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG);
+ MLX5_SET(attach_to_mcg_in, in, qpn, qpn);
+ MLX5_SET(attach_to_mcg_in, in, uid, uid);
+ gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid);
+ memcpy(gid, mgid, sizeof(*mgid));
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
+ u32 qpn, u16 uid)
+{
+ u32 out[MLX5_ST_SZ_DW(detach_from_mcg_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {};
+ void *gid;
+
+ MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
+ MLX5_SET(detach_from_mcg_in, in, qpn, qpn);
+ MLX5_SET(detach_from_mcg_in, in, uid, uid);
+ gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid);
+ memcpy(gid, mgid, sizeof(*mgid));
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)] = {};
+ int err;
+
+ MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD);
+ MLX5_SET(alloc_xrcd_in, in, uid, uid);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd);
+ return err;
+}
+
+int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid)
+{
+ u32 out[MLX5_ST_SZ_DW(dealloc_xrcd_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {};
+
+ MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
+ MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn);
+ MLX5_SET(dealloc_xrcd_in, in, uid, uid);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 88cbb1c41703..c03c56455534 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -47,4 +47,18 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr,
u64 length, u32 alignment);
int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length);
+void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
+void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid);
+void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid);
+void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid);
+int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn,
+ u16 uid);
+void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn,
+ u16 uid);
+int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
+ u32 qpn, u16 uid);
+int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
+ u32 qpn, u16 uid);
+int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid);
+int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid);
#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index cca1820802b8..7d769b5538b4 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -874,6 +874,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD;
}
+ MLX5_SET(create_cq_in, *cqb, uid, to_mucontext(context)->devx_uid);
return 0;
err_cqb:
@@ -1454,7 +1455,7 @@ ex:
return err;
}
-int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq)
+int mlx5_ib_get_cqe_size(struct ib_cq *ibcq)
{
struct mlx5_ib_cq *cq;
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 66dc337e49a7..61aab7c0c513 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -19,7 +19,7 @@
#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
struct devx_obj {
struct mlx5_core_dev *mdev;
- u32 obj_id;
+ u64 obj_id;
u32 dinlen; /* destroy inbox length */
u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
};
@@ -45,13 +45,14 @@ static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file)
return to_mucontext(ib_uverbs_get_ucontext(file));
}
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev)
{
u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0};
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
u64 general_obj_types;
void *hdr;
int err;
+ u16 uid;
hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr);
@@ -60,9 +61,6 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *contex
!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UMEM))
return -EINVAL;
- if (!capable(CAP_NET_RAW))
- return -EPERM;
-
MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_UCTX);
@@ -70,19 +68,18 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *contex
if (err)
return err;
- context->devx_uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
- return 0;
+ uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ return uid;
}
-void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
- struct mlx5_ib_ucontext *context)
+void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid)
{
u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0};
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_UCTX);
- MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, context->devx_uid);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, uid);
mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
}
@@ -109,150 +106,218 @@ bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type)
}
}
+/*
+ * As the obj_id in the firmware is not globally unique the object type
+ * must be considered upon checking for a valid object id.
+ * For that the opcode of the creator command is encoded as part of the obj_id.
+ */
+static u64 get_enc_obj_id(u16 opcode, u32 obj_id)
+{
+ return ((u64)opcode << 32) | obj_id;
+}
+
static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
{
u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
- u32 obj_id;
+ u64 obj_id;
switch (opcode) {
case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
- obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_GENERAL_OBJECT,
+ MLX5_GET(general_obj_in_cmd_hdr, in,
+ obj_id));
break;
case MLX5_CMD_OP_QUERY_MKEY:
- obj_id = MLX5_GET(query_mkey_in, in, mkey_index);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_MKEY,
+ MLX5_GET(query_mkey_in, in,
+ mkey_index));
break;
case MLX5_CMD_OP_QUERY_CQ:
- obj_id = MLX5_GET(query_cq_in, in, cqn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
+ MLX5_GET(query_cq_in, in, cqn));
break;
case MLX5_CMD_OP_MODIFY_CQ:
- obj_id = MLX5_GET(modify_cq_in, in, cqn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
+ MLX5_GET(modify_cq_in, in, cqn));
break;
case MLX5_CMD_OP_QUERY_SQ:
- obj_id = MLX5_GET(query_sq_in, in, sqn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
+ MLX5_GET(query_sq_in, in, sqn));
break;
case MLX5_CMD_OP_MODIFY_SQ:
- obj_id = MLX5_GET(modify_sq_in, in, sqn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
+ MLX5_GET(modify_sq_in, in, sqn));
break;
case MLX5_CMD_OP_QUERY_RQ:
- obj_id = MLX5_GET(query_rq_in, in, rqn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+ MLX5_GET(query_rq_in, in, rqn));
break;
case MLX5_CMD_OP_MODIFY_RQ:
- obj_id = MLX5_GET(modify_rq_in, in, rqn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+ MLX5_GET(modify_rq_in, in, rqn));
break;
case MLX5_CMD_OP_QUERY_RMP:
- obj_id = MLX5_GET(query_rmp_in, in, rmpn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
+ MLX5_GET(query_rmp_in, in, rmpn));
break;
case MLX5_CMD_OP_MODIFY_RMP:
- obj_id = MLX5_GET(modify_rmp_in, in, rmpn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
+ MLX5_GET(modify_rmp_in, in, rmpn));
break;
case MLX5_CMD_OP_QUERY_RQT:
- obj_id = MLX5_GET(query_rqt_in, in, rqtn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
+ MLX5_GET(query_rqt_in, in, rqtn));
break;
case MLX5_CMD_OP_MODIFY_RQT:
- obj_id = MLX5_GET(modify_rqt_in, in, rqtn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
+ MLX5_GET(modify_rqt_in, in, rqtn));
break;
case MLX5_CMD_OP_QUERY_TIR:
- obj_id = MLX5_GET(query_tir_in, in, tirn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
+ MLX5_GET(query_tir_in, in, tirn));
break;
case MLX5_CMD_OP_MODIFY_TIR:
- obj_id = MLX5_GET(modify_tir_in, in, tirn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
+ MLX5_GET(modify_tir_in, in, tirn));
break;
case MLX5_CMD_OP_QUERY_TIS:
- obj_id = MLX5_GET(query_tis_in, in, tisn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
+ MLX5_GET(query_tis_in, in, tisn));
break;
case MLX5_CMD_OP_MODIFY_TIS:
- obj_id = MLX5_GET(modify_tis_in, in, tisn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
+ MLX5_GET(modify_tis_in, in, tisn));
break;
case MLX5_CMD_OP_QUERY_FLOW_TABLE:
- obj_id = MLX5_GET(query_flow_table_in, in, table_id);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
+ MLX5_GET(query_flow_table_in, in,
+ table_id));
break;
case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
- obj_id = MLX5_GET(modify_flow_table_in, in, table_id);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
+ MLX5_GET(modify_flow_table_in, in,
+ table_id));
break;
case MLX5_CMD_OP_QUERY_FLOW_GROUP:
- obj_id = MLX5_GET(query_flow_group_in, in, group_id);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_GROUP,
+ MLX5_GET(query_flow_group_in, in,
+ group_id));
break;
case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
- obj_id = MLX5_GET(query_fte_in, in, flow_index);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
+ MLX5_GET(query_fte_in, in,
+ flow_index));
break;
case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
- obj_id = MLX5_GET(set_fte_in, in, flow_index);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
+ MLX5_GET(set_fte_in, in, flow_index));
break;
case MLX5_CMD_OP_QUERY_Q_COUNTER:
- obj_id = MLX5_GET(query_q_counter_in, in, counter_set_id);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_Q_COUNTER,
+ MLX5_GET(query_q_counter_in, in,
+ counter_set_id));
break;
case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
- obj_id = MLX5_GET(query_flow_counter_in, in, flow_counter_id);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_FLOW_COUNTER,
+ MLX5_GET(query_flow_counter_in, in,
+ flow_counter_id));
break;
case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
- obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT,
+ MLX5_GET(general_obj_in_cmd_hdr, in,
+ obj_id));
break;
case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
- obj_id = MLX5_GET(query_scheduling_element_in, in,
- scheduling_element_id);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
+ MLX5_GET(query_scheduling_element_in,
+ in, scheduling_element_id));
break;
case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
- obj_id = MLX5_GET(modify_scheduling_element_in, in,
- scheduling_element_id);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
+ MLX5_GET(modify_scheduling_element_in,
+ in, scheduling_element_id));
break;
case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
- obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT,
+ MLX5_GET(add_vxlan_udp_dport_in, in,
+ vxlan_udp_port));
break;
case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
- obj_id = MLX5_GET(query_l2_table_entry_in, in, table_index);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
+ MLX5_GET(query_l2_table_entry_in, in,
+ table_index));
break;
case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
- obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
+ MLX5_GET(set_l2_table_entry_in, in,
+ table_index));
break;
case MLX5_CMD_OP_QUERY_QP:
- obj_id = MLX5_GET(query_qp_in, in, qpn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(query_qp_in, in, qpn));
break;
case MLX5_CMD_OP_RST2INIT_QP:
- obj_id = MLX5_GET(rst2init_qp_in, in, qpn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(rst2init_qp_in, in, qpn));
break;
case MLX5_CMD_OP_INIT2RTR_QP:
- obj_id = MLX5_GET(init2rtr_qp_in, in, qpn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(init2rtr_qp_in, in, qpn));
break;
case MLX5_CMD_OP_RTR2RTS_QP:
- obj_id = MLX5_GET(rtr2rts_qp_in, in, qpn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(rtr2rts_qp_in, in, qpn));
break;
case MLX5_CMD_OP_RTS2RTS_QP:
- obj_id = MLX5_GET(rts2rts_qp_in, in, qpn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(rts2rts_qp_in, in, qpn));
break;
case MLX5_CMD_OP_SQERR2RTS_QP:
- obj_id = MLX5_GET(sqerr2rts_qp_in, in, qpn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(sqerr2rts_qp_in, in, qpn));
break;
case MLX5_CMD_OP_2ERR_QP:
- obj_id = MLX5_GET(qp_2err_in, in, qpn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(qp_2err_in, in, qpn));
break;
case MLX5_CMD_OP_2RST_QP:
- obj_id = MLX5_GET(qp_2rst_in, in, qpn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(qp_2rst_in, in, qpn));
break;
case MLX5_CMD_OP_QUERY_DCT:
- obj_id = MLX5_GET(query_dct_in, in, dctn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
+ MLX5_GET(query_dct_in, in, dctn));
break;
case MLX5_CMD_OP_QUERY_XRQ:
- obj_id = MLX5_GET(query_xrq_in, in, xrqn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
+ MLX5_GET(query_xrq_in, in, xrqn));
break;
case MLX5_CMD_OP_QUERY_XRC_SRQ:
- obj_id = MLX5_GET(query_xrc_srq_in, in, xrc_srqn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
+ MLX5_GET(query_xrc_srq_in, in,
+ xrc_srqn));
break;
case MLX5_CMD_OP_ARM_XRC_SRQ:
- obj_id = MLX5_GET(arm_xrc_srq_in, in, xrc_srqn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
+ MLX5_GET(arm_xrc_srq_in, in, xrc_srqn));
break;
case MLX5_CMD_OP_QUERY_SRQ:
- obj_id = MLX5_GET(query_srq_in, in, srqn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SRQ,
+ MLX5_GET(query_srq_in, in, srqn));
break;
case MLX5_CMD_OP_ARM_RQ:
- obj_id = MLX5_GET(arm_rq_in, in, srq_number);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+ MLX5_GET(arm_rq_in, in, srq_number));
break;
case MLX5_CMD_OP_DRAIN_DCT:
case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
- obj_id = MLX5_GET(drain_dct_in, in, dctn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
+ MLX5_GET(drain_dct_in, in, dctn));
break;
case MLX5_CMD_OP_ARM_XRQ:
- obj_id = MLX5_GET(arm_xrq_in, in, xrqn);
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
+ MLX5_GET(arm_xrq_in, in, xrqn));
break;
default:
return false;
@@ -264,11 +329,102 @@ static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
return false;
}
-static bool devx_is_obj_create_cmd(const void *in)
+static void devx_set_umem_valid(const void *in)
{
u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
switch (opcode) {
+ case MLX5_CMD_OP_CREATE_MKEY:
+ MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
+ break;
+ case MLX5_CMD_OP_CREATE_CQ:
+ {
+ void *cqc;
+
+ MLX5_SET(create_cq_in, in, cq_umem_valid, 1);
+ cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+ MLX5_SET(cqc, cqc, dbr_umem_valid, 1);
+ break;
+ }
+ case MLX5_CMD_OP_CREATE_QP:
+ {
+ void *qpc;
+
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+ MLX5_SET(qpc, qpc, dbr_umem_valid, 1);
+ MLX5_SET(create_qp_in, in, wq_umem_valid, 1);
+ break;
+ }
+
+ case MLX5_CMD_OP_CREATE_RQ:
+ {
+ void *rqc, *wq;
+
+ rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
+ wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ MLX5_SET(wq, wq, dbr_umem_valid, 1);
+ MLX5_SET(wq, wq, wq_umem_valid, 1);
+ break;
+ }
+
+ case MLX5_CMD_OP_CREATE_SQ:
+ {
+ void *sqc, *wq;
+
+ sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+ MLX5_SET(wq, wq, dbr_umem_valid, 1);
+ MLX5_SET(wq, wq, wq_umem_valid, 1);
+ break;
+ }
+
+ case MLX5_CMD_OP_MODIFY_CQ:
+ MLX5_SET(modify_cq_in, in, cq_umem_valid, 1);
+ break;
+
+ case MLX5_CMD_OP_CREATE_RMP:
+ {
+ void *rmpc, *wq;
+
+ rmpc = MLX5_ADDR_OF(create_rmp_in, in, ctx);
+ wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
+ MLX5_SET(wq, wq, dbr_umem_valid, 1);
+ MLX5_SET(wq, wq, wq_umem_valid, 1);
+ break;
+ }
+
+ case MLX5_CMD_OP_CREATE_XRQ:
+ {
+ void *xrqc, *wq;
+
+ xrqc = MLX5_ADDR_OF(create_xrq_in, in, xrq_context);
+ wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
+ MLX5_SET(wq, wq, dbr_umem_valid, 1);
+ MLX5_SET(wq, wq, wq_umem_valid, 1);
+ break;
+ }
+
+ case MLX5_CMD_OP_CREATE_XRC_SRQ:
+ {
+ void *xrc_srqc;
+
+ MLX5_SET(create_xrc_srq_in, in, xrc_srq_umem_valid, 1);
+ xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, in,
+ xrc_srq_context_entry);
+ MLX5_SET(xrc_srqc, xrc_srqc, dbr_umem_valid, 1);
+ break;
+ }
+
+ default:
+ return;
+ }
+}
+
+static bool devx_is_obj_create_cmd(const void *in, u16 *opcode)
+{
+ *opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+ switch (*opcode) {
case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
case MLX5_CMD_OP_CREATE_MKEY:
case MLX5_CMD_OP_CREATE_CQ:
@@ -385,12 +541,49 @@ static bool devx_is_obj_query_cmd(const void *in)
}
}
+static bool devx_is_whitelist_cmd(void *in)
+{
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_QUERY_HCA_CAP:
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in)
+{
+ if (devx_is_whitelist_cmd(cmd_in)) {
+ struct mlx5_ib_dev *dev;
+
+ if (c->devx_uid)
+ return c->devx_uid;
+
+ dev = to_mdev(c->ibucontext.device);
+ if (dev->devx_whitelist_uid)
+ return dev->devx_whitelist_uid;
+
+ return -EOPNOTSUPP;
+ }
+
+ if (!c->devx_uid)
+ return -EINVAL;
+
+ if (!capable(CAP_NET_RAW))
+ return -EPERM;
+
+ return c->devx_uid;
+}
static bool devx_is_general_cmd(void *in)
{
u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
switch (opcode) {
case MLX5_CMD_OP_QUERY_HCA_CAP:
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
case MLX5_CMD_OP_QUERY_VPORT_STATE:
case MLX5_CMD_OP_QUERY_ADAPTER:
case MLX5_CMD_OP_QUERY_ISSI:
@@ -498,14 +691,16 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT);
void *cmd_out;
int err;
+ int uid;
c = devx_ufile2uctx(file);
if (IS_ERR(c))
return PTR_ERR(c);
dev = to_mdev(c->ibucontext.device);
- if (!c->devx_uid)
- return -EPERM;
+ uid = devx_get_uid(c, cmd_in);
+ if (uid < 0)
+ return uid;
/* Only white list of some general HCA commands are allowed for this method. */
if (!devx_is_general_cmd(cmd_in))
@@ -515,7 +710,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
if (IS_ERR(cmd_out))
return PTR_ERR(cmd_out);
- MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
err = mlx5_cmd_exec(dev->mdev, cmd_in,
uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN),
cmd_out, cmd_out_len);
@@ -726,11 +921,15 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
struct devx_obj *obj;
int err;
+ int uid;
+ u32 obj_id;
+ u16 opcode;
- if (!c->devx_uid)
- return -EPERM;
+ uid = devx_get_uid(c, cmd_in);
+ if (uid < 0)
+ return uid;
- if (!devx_is_obj_create_cmd(cmd_in))
+ if (!devx_is_obj_create_cmd(cmd_in, &opcode))
return -EINVAL;
cmd_out = uverbs_zalloc(attrs, cmd_out_len);
@@ -741,7 +940,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
if (!obj)
return -ENOMEM;
- MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
+ devx_set_umem_valid(cmd_in);
+
err = mlx5_cmd_exec(dev->mdev, cmd_in,
uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN),
cmd_out, cmd_out_len);
@@ -750,13 +951,15 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
uobj->object = obj;
obj->mdev = dev->mdev;
- devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen, &obj->obj_id);
+ devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen,
+ &obj_id);
WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
if (err)
goto obj_destroy;
+ obj->obj_id = get_enc_obj_id(opcode, obj_id);
return 0;
obj_destroy:
@@ -778,9 +981,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
struct devx_obj *obj = uobj->object;
void *cmd_out;
int err;
+ int uid;
- if (!c->devx_uid)
- return -EPERM;
+ uid = devx_get_uid(c, cmd_in);
+ if (uid < 0)
+ return uid;
if (!devx_is_obj_modify_cmd(cmd_in))
return -EINVAL;
@@ -792,7 +997,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
if (IS_ERR(cmd_out))
return PTR_ERR(cmd_out);
- MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
+ devx_set_umem_valid(cmd_in);
+
err = mlx5_cmd_exec(obj->mdev, cmd_in,
uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
cmd_out, cmd_out_len);
@@ -815,9 +1022,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
struct devx_obj *obj = uobj->object;
void *cmd_out;
int err;
+ int uid;
- if (!c->devx_uid)
- return -EPERM;
+ uid = devx_get_uid(c, cmd_in);
+ if (uid < 0)
+ return uid;
if (!devx_is_obj_query_cmd(cmd_in))
return -EINVAL;
@@ -829,7 +1038,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
if (IS_ERR(cmd_out))
return PTR_ERR(cmd_out);
- MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
err = mlx5_cmd_exec(obj->mdev, cmd_in,
uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
cmd_out, cmd_out_len);
@@ -928,6 +1137,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
int err;
if (!c->devx_uid)
+ return -EINVAL;
+
+ if (!capable(CAP_NET_RAW))
return -EPERM;
obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
index 1a29f47f836e..f86cdcafdafc 100644
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ b/drivers/infiniband/hw/mlx5/flow.c
@@ -7,7 +7,9 @@
#include <rdma/ib_verbs.h>
#include <rdma/uverbs_types.h>
#include <rdma/uverbs_ioctl.h>
+#include <rdma/uverbs_std_types.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
#include <rdma/ib_umem.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
@@ -16,6 +18,24 @@
#define UVERBS_MODULE_NAME mlx5_ib
#include <rdma/uverbs_named_ioctl.h>
+static int
+mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
+ enum mlx5_flow_namespace_type *namespace)
+{
+ switch (table_type) {
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX:
+ *namespace = MLX5_FLOW_NAMESPACE_BYPASS;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX:
+ *namespace = MLX5_FLOW_NAMESPACE_EGRESS;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
[MLX5_IB_FLOW_TYPE_NORMAL] = {
.type = UVERBS_ATTR_TYPE_PTR_IN,
@@ -38,11 +58,15 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
},
};
+#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
{
+ struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
struct mlx5_ib_flow_handler *flow_handler;
struct mlx5_ib_flow_matcher *fs_matcher;
+ struct ib_uobject **arr_flow_actions;
+ struct ib_uflow_resources *uflow_res;
void *devx_obj;
int dest_id, dest_type;
void *cmd_in;
@@ -52,6 +76,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
struct ib_uobject *uobj =
uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
+ int len, ret, i;
if (!capable(CAP_NET_RAW))
return -EPERM;
@@ -61,7 +86,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
dest_qp = uverbs_attr_is_valid(attrs,
MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
- if ((dest_devx && dest_qp) || (!dest_devx && !dest_qp))
+ fs_matcher = uverbs_attr_get_obj(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS &&
+ ((dest_devx && dest_qp) || (!dest_devx && !dest_qp)))
+ return -EINVAL;
+
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS &&
+ (dest_devx || dest_qp))
return -EINVAL;
if (dest_devx) {
@@ -75,7 +107,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
*/
if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type))
return -EINVAL;
- } else {
+ } else if (dest_qp) {
struct mlx5_ib_qp *mqp;
qp = uverbs_attr_get_obj(attrs,
@@ -92,6 +124,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
else
dest_id = mqp->raw_packet_qp.rq.tirn;
dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ } else {
+ dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
}
if (dev->rep)
@@ -101,16 +135,48 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
inlen = uverbs_attr_get_len(attrs,
MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
- fs_matcher = uverbs_attr_get_obj(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
- flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, cmd_in, inlen,
+
+ uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
+ if (!uflow_res)
+ return -ENOMEM;
+
+ len = uverbs_attr_get_uobjs_arr(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
+ for (i = 0; i < len; i++) {
+ struct mlx5_ib_flow_action *maction =
+ to_mflow_act(arr_flow_actions[i]->object);
+
+ ret = parse_flow_flow_action(maction, false, &flow_act);
+ if (ret)
+ goto err_out;
+ flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE,
+ arr_flow_actions[i]->object);
+ }
+
+ ret = uverbs_copy_from(&flow_act.flow_tag, attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_TAG);
+ if (!ret) {
+ if (flow_act.flow_tag >= BIT(24)) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+ flow_act.flags |= FLOW_ACT_HAS_TAG;
+ }
+
+ flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, &flow_act,
+ cmd_in, inlen,
dest_id, dest_type);
- if (IS_ERR(flow_handler))
- return PTR_ERR(flow_handler);
+ if (IS_ERR(flow_handler)) {
+ ret = PTR_ERR(flow_handler);
+ goto err_out;
+ }
- ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev);
+ ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res);
return 0;
+err_out:
+ ib_uverbs_flow_resources_free(uflow_res);
+ return ret;
}
static int flow_matcher_cleanup(struct ib_uobject *uobject,
@@ -134,12 +200,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
struct mlx5_ib_flow_matcher *obj;
+ u32 flags;
int err;
obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
if (!obj)
return -ENOMEM;
+ obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
obj->mask_len = uverbs_attr_get_len(
attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
err = uverbs_copy_from(&obj->matcher_mask,
@@ -165,6 +233,19 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
if (err)
goto end;
+ err = uverbs_get_flags32(&flags, attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
+ IB_FLOW_ATTR_FLAGS_EGRESS);
+ if (err)
+ goto end;
+
+ if (flags) {
+ err = mlx5_ib_ft_type_to_namespace(
+ MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX, &obj->ns_type);
+ if (err)
+ goto end;
+ }
+
uobj->object = obj;
obj->mdev = dev->mdev;
atomic_set(&obj->usecnt, 0);
@@ -175,6 +256,248 @@ end:
return err;
}
+void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
+{
+ switch (maction->flow_action_raw.sub_type) {
+ case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
+ mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
+ maction->flow_action_raw.action_id);
+ break;
+ case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
+ mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
+ maction->flow_action_raw.action_id);
+ break;
+ case MLX5_IB_FLOW_ACTION_DECAP:
+ break;
+ default:
+ break;
+ }
+}
+
+static struct ib_flow_action *
+mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
+ enum mlx5_ib_uapi_flow_table_type ft_type,
+ u8 num_actions, void *in)
+{
+ enum mlx5_flow_namespace_type namespace;
+ struct mlx5_ib_flow_action *maction;
+ int ret;
+
+ ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
+ if (ret)
+ return ERR_PTR(-EINVAL);
+
+ maction = kzalloc(sizeof(*maction), GFP_KERNEL);
+ if (!maction)
+ return ERR_PTR(-ENOMEM);
+
+ ret = mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in,
+ &maction->flow_action_raw.action_id);
+
+ if (ret) {
+ kfree(maction);
+ return ERR_PTR(ret);
+ }
+ maction->flow_action_raw.sub_type =
+ MLX5_IB_FLOW_ACTION_MODIFY_HEADER;
+ maction->flow_action_raw.dev = dev;
+
+ return &maction->ib_action;
+}
+
+static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
+{
+ return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ max_modify_header_actions) ||
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, max_modify_header_actions);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE);
+ struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
+ enum mlx5_ib_uapi_flow_table_type ft_type;
+ struct ib_flow_action *action;
+ size_t num_actions;
+ void *in;
+ int len;
+ int ret;
+
+ if (!mlx5_ib_modify_header_supported(mdev))
+ return -EOPNOTSUPP;
+
+ in = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
+ len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
+
+ if (len % MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto))
+ return -EINVAL;
+
+ ret = uverbs_get_const(&ft_type, attrs,
+ MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
+ if (ret)
+ return ret;
+
+ num_actions = len / MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto),
+ action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
+ if (IS_ERR(action))
+ return PTR_ERR(action);
+
+ uverbs_flow_action_fill_action(action, uobj, uobj->context->device,
+ IB_FLOW_ACTION_UNSPECIFIED);
+
+ return 0;
+}
+
+static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev,
+ u8 packet_reformat_type,
+ u8 ft_type)
+{
+ switch (packet_reformat_type) {
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
+ return MLX5_CAP_FLOWTABLE(ibdev->mdev,
+ encap_general_header);
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
+ return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev,
+ reformat_l2_to_l3_tunnel);
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
+ return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev,
+ reformat_l3_tunnel_to_l2);
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
+ return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap);
+ break;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt)
+{
+ switch (dv_prt) {
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
+ *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
+ *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
+ *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mlx5_ib_flow_action_create_packet_reformat_ctx(
+ struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_action *maction,
+ u8 ft_type, u8 dv_prt,
+ void *in, size_t len)
+{
+ enum mlx5_flow_namespace_type namespace;
+ u8 prm_prt;
+ int ret;
+
+ ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
+ if (ret)
+ return ret;
+
+ ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt);
+ if (ret)
+ return ret;
+
+ ret = mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len,
+ in, namespace,
+ &maction->flow_action_raw.action_id);
+ if (ret)
+ return ret;
+
+ maction->flow_action_raw.sub_type =
+ MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
+ maction->flow_action_raw.dev = dev;
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE);
+ struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
+ enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt;
+ enum mlx5_ib_uapi_flow_table_type ft_type;
+ struct mlx5_ib_flow_action *maction;
+ int ret;
+
+ ret = uverbs_get_const(&ft_type, attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_const(&dv_prt, attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE);
+ if (ret)
+ return ret;
+
+ if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type))
+ return -EOPNOTSUPP;
+
+ maction = kzalloc(sizeof(*maction), GFP_KERNEL);
+ if (!maction)
+ return -ENOMEM;
+
+ if (dv_prt ==
+ MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) {
+ maction->flow_action_raw.sub_type =
+ MLX5_IB_FLOW_ACTION_DECAP;
+ maction->flow_action_raw.dev = mdev;
+ } else {
+ void *in;
+ int len;
+
+ in = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
+ if (IS_ERR(in)) {
+ ret = PTR_ERR(in);
+ goto free_maction;
+ }
+
+ len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
+
+ ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev,
+ maction, ft_type, dv_prt, in, len);
+ if (ret)
+ goto free_maction;
+ }
+
+ uverbs_flow_action_fill_action(&maction->ib_action, uobj,
+ uobj->context->device,
+ IB_FLOW_ACTION_UNSPECIFIED);
+ return 0;
+
+free_maction:
+ kfree(maction);
+ return ret;
+}
+
DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_CREATE_FLOW,
UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
@@ -195,7 +518,15 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ACCESS_READ),
UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
MLX5_IB_OBJECT_DEVX_OBJ,
- UVERBS_ACCESS_READ));
+ UVERBS_ACCESS_READ),
+ UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_READ, 1,
+ MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL));
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
MLX5_IB_METHOD_DESTROY_FLOW,
@@ -210,6 +541,44 @@ ADD_UVERBS_METHODS(mlx5_ib_fs,
&UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
+ UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES(
+ set_action_in_add_action_in_auto)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
+ enum mlx5_ib_uapi_flow_table_type,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
+ UVERBS_ATTR_MIN_SIZE(1),
+ UA_ALLOC_AND_COPY,
+ UA_OPTIONAL),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
+ enum mlx5_ib_uapi_flow_action_packet_reformat_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
+ enum mlx5_ib_uapi_flow_table_type,
+ UA_MANDATORY));
+
+ADD_UVERBS_METHODS(
+ mlx5_ib_flow_actions,
+ UVERBS_OBJECT_FLOW_ACTION,
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER),
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT));
+
+DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
MLX5_IB_OBJECT_FLOW_MATCHER,
@@ -224,7 +593,10 @@ DECLARE_UVERBS_NAMED_METHOD(
UA_MANDATORY),
UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
UVERBS_ATTR_TYPE(u8),
- UA_MANDATORY));
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
+ enum ib_flow_flags,
+ UA_OPTIONAL));
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
@@ -247,6 +619,7 @@ int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root)
root[i++] = &flow_objects;
root[i++] = &mlx5_ib_fs;
+ root[i++] = &mlx5_ib_flow_actions;
return i;
}
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index 35a0e04c38f2..584ff2ea7810 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -39,9 +39,6 @@ static const struct mlx5_ib_profile rep_profile = {
STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
mlx5_ib_stage_post_ib_reg_umr_init,
NULL),
- STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
- mlx5_ib_stage_class_attr_init,
- NULL),
};
static int
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index af32899bb72a..e9c428071df3 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1571,14 +1571,57 @@ static void deallocate_uars(struct mlx5_ib_dev *dev,
mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
}
-static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
+int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
+{
+ int err = 0;
+
+ mutex_lock(&dev->lb.mutex);
+ if (td)
+ dev->lb.user_td++;
+ if (qp)
+ dev->lb.qps++;
+
+ if (dev->lb.user_td == 2 ||
+ dev->lb.qps == 1) {
+ if (!dev->lb.enabled) {
+ err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
+ dev->lb.enabled = true;
+ }
+ }
+
+ mutex_unlock(&dev->lb.mutex);
+
+ return err;
+}
+
+void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
+{
+ mutex_lock(&dev->lb.mutex);
+ if (td)
+ dev->lb.user_td--;
+ if (qp)
+ dev->lb.qps--;
+
+ if (dev->lb.user_td == 1 &&
+ dev->lb.qps == 0) {
+ if (dev->lb.enabled) {
+ mlx5_nic_vport_update_local_lb(dev->mdev, false);
+ dev->lb.enabled = false;
+ }
+ }
+
+ mutex_unlock(&dev->lb.mutex);
+}
+
+static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn,
+ u16 uid)
{
int err;
if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
return 0;
- err = mlx5_core_alloc_transport_domain(dev->mdev, tdn);
+ err = mlx5_cmd_alloc_transport_domain(dev->mdev, tdn, uid);
if (err)
return err;
@@ -1587,35 +1630,23 @@ static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
!MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
return err;
- mutex_lock(&dev->lb_mutex);
- dev->user_td++;
-
- if (dev->user_td == 2)
- err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
-
- mutex_unlock(&dev->lb_mutex);
- return err;
+ return mlx5_ib_enable_lb(dev, true, false);
}
-static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn)
+static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn,
+ u16 uid)
{
if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
return;
- mlx5_core_dealloc_transport_domain(dev->mdev, tdn);
+ mlx5_cmd_dealloc_transport_domain(dev->mdev, tdn, uid);
if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
(!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) &&
!MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
return;
- mutex_lock(&dev->lb_mutex);
- dev->user_td--;
-
- if (dev->user_td < 2)
- mlx5_nic_vport_update_local_lb(dev->mdev, false);
-
- mutex_unlock(&dev->lb_mutex);
+ mlx5_ib_disable_lb(dev, true, false);
}
static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
@@ -1727,30 +1758,24 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
#endif
- err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
- if (err)
- goto out_uars;
-
if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
- /* Block DEVX on Infiniband as of SELinux */
- if (mlx5_ib_port_link_layer(ibdev, 1) != IB_LINK_LAYER_ETHERNET) {
- err = -EPERM;
- goto out_td;
- }
-
- err = mlx5_ib_devx_create(dev, context);
- if (err)
- goto out_td;
+ err = mlx5_ib_devx_create(dev);
+ if (err < 0)
+ goto out_uars;
+ context->devx_uid = err;
}
+ err = mlx5_ib_alloc_transport_domain(dev, &context->tdn,
+ context->devx_uid);
+ if (err)
+ goto out_devx;
+
if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey);
if (err)
goto out_mdev;
}
- INIT_LIST_HEAD(&context->vma_private_list);
- mutex_init(&context->vma_private_list_mutex);
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
@@ -1826,13 +1851,21 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
context->lib_caps = req.lib_caps;
print_lib_caps(dev, context->lib_caps);
+ if (mlx5_lag_is_active(dev->mdev)) {
+ u8 port = mlx5_core_native_port_num(dev->mdev);
+
+ atomic_set(&context->tx_port_affinity,
+ atomic_add_return(
+ 1, &dev->roce[port].tx_port_affinity));
+ }
+
return &context->ibucontext;
out_mdev:
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
+out_devx:
if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
- mlx5_ib_devx_destroy(dev, context);
-out_td:
- mlx5_ib_dealloc_transport_domain(dev, context->tdn);
+ mlx5_ib_devx_destroy(dev, context->devx_uid);
out_uars:
deallocate_uars(dev, context);
@@ -1855,11 +1888,18 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
struct mlx5_bfreg_info *bfregi;
- if (context->devx_uid)
- mlx5_ib_devx_destroy(dev, context);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ /* All umem's must be destroyed before destroying the ucontext. */
+ mutex_lock(&ibcontext->per_mm_list_lock);
+ WARN_ON(!list_empty(&ibcontext->per_mm_list));
+ mutex_unlock(&ibcontext->per_mm_list_lock);
+#endif
bfregi = &context->bfregi;
- mlx5_ib_dealloc_transport_domain(dev, context->tdn);
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
+
+ if (context->devx_uid)
+ mlx5_ib_devx_destroy(dev, context->devx_uid);
deallocate_uars(dev, context);
kfree(bfregi->sys_pages);
@@ -1900,94 +1940,9 @@ static int get_extended_index(unsigned long offset)
return get_arg(offset) | ((offset >> 16) & 0xff) << 8;
}
-static void mlx5_ib_vma_open(struct vm_area_struct *area)
-{
- /* vma_open is called when a new VMA is created on top of our VMA. This
- * is done through either mremap flow or split_vma (usually due to
- * mlock, madvise, munmap, etc.) We do not support a clone of the VMA,
- * as this VMA is strongly hardware related. Therefore we set the
- * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
- * calling us again and trying to do incorrect actions. We assume that
- * the original VMA size is exactly a single page, and therefore all
- * "splitting" operation will not happen to it.
- */
- area->vm_ops = NULL;
-}
-
-static void mlx5_ib_vma_close(struct vm_area_struct *area)
-{
- struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data;
-
- /* It's guaranteed that all VMAs opened on a FD are closed before the
- * file itself is closed, therefore no sync is needed with the regular
- * closing flow. (e.g. mlx5 ib_dealloc_ucontext)
- * However need a sync with accessing the vma as part of
- * mlx5_ib_disassociate_ucontext.
- * The close operation is usually called under mm->mmap_sem except when
- * process is exiting.
- * The exiting case is handled explicitly as part of
- * mlx5_ib_disassociate_ucontext.
- */
- mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data;
-
- /* setting the vma context pointer to null in the mlx5_ib driver's
- * private data, to protect a race condition in
- * mlx5_ib_disassociate_ucontext().
- */
- mlx5_ib_vma_priv_data->vma = NULL;
- mutex_lock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
- list_del(&mlx5_ib_vma_priv_data->list);
- mutex_unlock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
- kfree(mlx5_ib_vma_priv_data);
-}
-
-static const struct vm_operations_struct mlx5_ib_vm_ops = {
- .open = mlx5_ib_vma_open,
- .close = mlx5_ib_vma_close
-};
-
-static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
- struct mlx5_ib_ucontext *ctx)
-{
- struct mlx5_ib_vma_private_data *vma_prv;
- struct list_head *vma_head = &ctx->vma_private_list;
-
- vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL);
- if (!vma_prv)
- return -ENOMEM;
-
- vma_prv->vma = vma;
- vma_prv->vma_private_list_mutex = &ctx->vma_private_list_mutex;
- vma->vm_private_data = vma_prv;
- vma->vm_ops = &mlx5_ib_vm_ops;
-
- mutex_lock(&ctx->vma_private_list_mutex);
- list_add(&vma_prv->list, vma_head);
- mutex_unlock(&ctx->vma_private_list_mutex);
-
- return 0;
-}
static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
- struct vm_area_struct *vma;
- struct mlx5_ib_vma_private_data *vma_private, *n;
- struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
-
- mutex_lock(&context->vma_private_list_mutex);
- list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
- list) {
- vma = vma_private->vma;
- zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE);
- /* context going to be destroyed, should
- * not access ops any more.
- */
- vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
- vma->vm_ops = NULL;
- list_del(&vma_private->list);
- kfree(vma_private);
- }
- mutex_unlock(&context->vma_private_list_mutex);
}
static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
@@ -2010,9 +1965,6 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
struct vm_area_struct *vma,
struct mlx5_ib_ucontext *context)
{
- phys_addr_t pfn;
- int err;
-
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
@@ -2025,13 +1977,8 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
if (!dev->mdev->clock_info_page)
return -EOPNOTSUPP;
- pfn = page_to_pfn(dev->mdev->clock_info_page);
- err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE,
- vma->vm_page_prot);
- if (err)
- return err;
-
- return mlx5_ib_set_vma_data(vma, context);
+ return rdma_user_mmap_page(&context->ibucontext, vma,
+ dev->mdev->clock_info_page, PAGE_SIZE);
}
static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
@@ -2121,21 +2068,15 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
pfn = uar_index2pfn(dev, uar_index);
mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
- vma->vm_page_prot = prot;
- err = io_remap_pfn_range(vma, vma->vm_start, pfn,
- PAGE_SIZE, vma->vm_page_prot);
+ err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE,
+ prot);
if (err) {
mlx5_ib_err(dev,
- "io_remap_pfn_range failed with error=%d, mmap_cmd=%s\n",
+ "rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n",
err, mmap_cmd2str(cmd));
- err = -EAGAIN;
goto err;
}
- err = mlx5_ib_set_vma_data(vma, context);
- if (err)
- goto err;
-
if (dyn_uar)
bfregi->sys_pages[idx] = uar_index;
return 0;
@@ -2160,7 +2101,6 @@ static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
size_t map_size = vma->vm_end - vma->vm_start;
u32 npages = map_size >> PAGE_SHIFT;
phys_addr_t pfn;
- pgprot_t prot;
if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) !=
page_idx + npages)
@@ -2170,14 +2110,8 @@ static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >>
PAGE_SHIFT) +
page_idx;
- prot = pgprot_writecombine(vma->vm_page_prot);
- vma->vm_page_prot = prot;
-
- if (io_remap_pfn_range(vma, vma->vm_start, pfn, map_size,
- vma->vm_page_prot))
- return -EAGAIN;
-
- return mlx5_ib_set_vma_data(vma, mctx);
+ return rdma_user_mmap_io(context, vma, pfn, map_size,
+ pgprot_writecombine(vma->vm_page_prot));
}
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
@@ -2318,21 +2252,30 @@ static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
struct mlx5_ib_alloc_pd_resp resp;
struct mlx5_ib_pd *pd;
int err;
+ u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {};
+ u16 uid = 0;
pd = kmalloc(sizeof(*pd), GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
- err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
+ uid = context ? to_mucontext(context)->devx_uid : 0;
+ MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD);
+ MLX5_SET(alloc_pd_in, in, uid, uid);
+ err = mlx5_cmd_exec(to_mdev(ibdev)->mdev, in, sizeof(in),
+ out, sizeof(out));
if (err) {
kfree(pd);
return ERR_PTR(err);
}
+ pd->pdn = MLX5_GET(alloc_pd_out, out, pd);
+ pd->uid = uid;
if (context) {
resp.pdn = pd->pdn;
if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
- mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
+ mlx5_cmd_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn, uid);
kfree(pd);
return ERR_PTR(-EFAULT);
}
@@ -2346,7 +2289,7 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
struct mlx5_ib_dev *mdev = to_mdev(pd->device);
struct mlx5_ib_pd *mpd = to_mpd(pd);
- mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
+ mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid);
kfree(mpd);
return 0;
@@ -2452,20 +2395,50 @@ static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
offsetof(typeof(filter), field) -\
sizeof(filter.field))
-static int parse_flow_flow_action(const union ib_flow_spec *ib_spec,
- const struct ib_flow_attr *flow_attr,
- struct mlx5_flow_act *action)
+int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
+ bool is_egress,
+ struct mlx5_flow_act *action)
{
- struct mlx5_ib_flow_action *maction = to_mflow_act(ib_spec->action.act);
switch (maction->ib_action.type) {
case IB_FLOW_ACTION_ESP:
+ if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT))
+ return -EINVAL;
/* Currently only AES_GCM keymat is supported by the driver */
action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx;
- action->action |= flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS ?
+ action->action |= is_egress ?
MLX5_FLOW_CONTEXT_ACTION_ENCRYPT :
MLX5_FLOW_CONTEXT_ACTION_DECRYPT;
return 0;
+ case IB_FLOW_ACTION_UNSPECIFIED:
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
+ if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ return -EINVAL;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ action->modify_id = maction->flow_action_raw.action_id;
+ return 0;
+ }
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_DECAP) {
+ if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
+ return -EINVAL;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ return 0;
+ }
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
+ if (action->action &
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
+ return -EINVAL;
+ action->action |=
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ action->reformat_id =
+ maction->flow_action_raw.action_id;
+ return 0;
+ }
+ /* fall through */
default:
return -EOPNOTSUPP;
}
@@ -2802,7 +2775,8 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
break;
case IB_FLOW_SPEC_ACTION_HANDLE:
- ret = parse_flow_flow_action(ib_spec, flow_attr, action);
+ ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
+ flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
if (ret)
return ret;
break;
@@ -2883,7 +2857,7 @@ is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
* rules would be supported, always return VALID_SPEC_NA.
*/
if (!is_crypto)
- return egress ? VALID_SPEC_INVALID : VALID_SPEC_NA;
+ return VALID_SPEC_NA;
return is_crypto && is_ipsec &&
(!egress || (!is_drop && !(flow_act->flags & FLOW_ACT_HAS_TAG))) ?
@@ -3026,14 +3000,15 @@ enum flow_table_type {
static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
struct mlx5_ib_flow_prio *prio,
int priority,
- int num_entries, int num_groups)
+ int num_entries, int num_groups,
+ u32 flags)
{
struct mlx5_flow_table *ft;
ft = mlx5_create_auto_grouped_flow_table(ns, priority,
num_entries,
num_groups,
- 0, 0);
+ 0, flags);
if (IS_ERR(ft))
return ERR_CAST(ft);
@@ -3053,26 +3028,43 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
int max_table_size;
int num_entries;
int num_groups;
+ u32 flags = 0;
int priority;
max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
log_max_ft_size));
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- if (ft_type == MLX5_IB_FT_TX)
- priority = 0;
- else if (flow_is_multicast_only(flow_attr) &&
- !dont_trap)
+ enum mlx5_flow_namespace_type fn_type;
+
+ if (flow_is_multicast_only(flow_attr) &&
+ !dont_trap)
priority = MLX5_IB_FLOW_MCAST_PRIO;
else
priority = ib_prio_to_core_prio(flow_attr->priority,
dont_trap);
- ns = mlx5_get_flow_namespace(dev->mdev,
- ft_type == MLX5_IB_FT_TX ?
- MLX5_FLOW_NAMESPACE_EGRESS :
- MLX5_FLOW_NAMESPACE_BYPASS);
+ if (ft_type == MLX5_IB_FT_RX) {
+ fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
+ prio = &dev->flow_db->prios[priority];
+ if (!dev->rep &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (!dev->rep &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ reformat_l3_tunnel_to_l2))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ } else {
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
+ log_max_ft_size));
+ fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
+ prio = &dev->flow_db->egress_prios[priority];
+ if (!dev->rep &&
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ }
+ ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
num_entries = MLX5_FS_MAX_ENTRIES;
num_groups = MLX5_FS_MAX_TYPES;
- prio = &dev->flow_db->prios[priority];
} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
ns = mlx5_get_flow_namespace(dev->mdev,
@@ -3104,7 +3096,8 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
ft = prio->flow_table;
if (!ft)
- return _get_prio(ns, prio, priority, num_entries, num_groups);
+ return _get_prio(ns, prio, priority, num_entries, num_groups,
+ flags);
return prio;
}
@@ -3271,6 +3264,9 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
if (!is_valid_attr(dev->mdev, flow_attr))
return ERR_PTR(-EINVAL);
+ if (dev->rep && is_egress)
+ return ERR_PTR(-EINVAL);
+
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
handler = kzalloc(sizeof(*handler), GFP_KERNEL);
if (!handler || !spec) {
@@ -3661,34 +3657,54 @@ free_ucmd:
return ERR_PTR(err);
}
-static struct mlx5_ib_flow_prio *_get_flow_table(struct mlx5_ib_dev *dev,
- int priority, bool mcast)
+static struct mlx5_ib_flow_prio *
+_get_flow_table(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_matcher *fs_matcher,
+ bool mcast)
{
- int max_table_size;
struct mlx5_flow_namespace *ns = NULL;
struct mlx5_ib_flow_prio *prio;
+ int max_table_size;
+ u32 flags = 0;
+ int priority;
+
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
+ max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ log_max_ft_size));
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ reformat_l3_tunnel_to_l2))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ } else { /* Can only be MLX5_FLOW_NAMESPACE_EGRESS */
+ max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
+ log_max_ft_size));
+ if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ }
- max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- log_max_ft_size));
if (max_table_size < MLX5_FS_MAX_ENTRIES)
return ERR_PTR(-ENOMEM);
if (mcast)
priority = MLX5_IB_FLOW_MCAST_PRIO;
else
- priority = ib_prio_to_core_prio(priority, false);
+ priority = ib_prio_to_core_prio(fs_matcher->priority, false);
- ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS);
+ ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type);
if (!ns)
return ERR_PTR(-ENOTSUPP);
- prio = &dev->flow_db->prios[priority];
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS)
+ prio = &dev->flow_db->prios[priority];
+ else
+ prio = &dev->flow_db->egress_prios[priority];
if (prio->flow_table)
return prio;
return _get_prio(ns, prio, priority, MLX5_FS_MAX_ENTRIES,
- MLX5_FS_MAX_TYPES);
+ MLX5_FS_MAX_TYPES, flags);
}
static struct mlx5_ib_flow_handler *
@@ -3696,10 +3712,10 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
struct mlx5_flow_destination *dst,
struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_act *flow_act,
void *cmd_in, int inlen)
{
struct mlx5_ib_flow_handler *handler;
- struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
struct mlx5_flow_spec *spec;
struct mlx5_flow_table *ft = ft_prio->flow_table;
int err = 0;
@@ -3718,9 +3734,8 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
fs_matcher->mask_len);
spec->match_criteria_enable = fs_matcher->match_criteria_enable;
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
handler->rule = mlx5_add_flow_rules(ft, spec,
- &flow_act, dst, 1);
+ flow_act, dst, 1);
if (IS_ERR(handler->rule)) {
err = PTR_ERR(handler->rule);
@@ -3782,12 +3797,12 @@ static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
struct mlx5_ib_flow_handler *
mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_act *flow_act,
void *cmd_in, int inlen, int dest_id,
int dest_type)
{
struct mlx5_flow_destination *dst;
struct mlx5_ib_flow_prio *ft_prio;
- int priority = fs_matcher->priority;
struct mlx5_ib_flow_handler *handler;
bool mcast;
int err;
@@ -3805,7 +3820,7 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
mutex_lock(&dev->flow_db->lock);
- ft_prio = _get_flow_table(dev, priority, mcast);
+ ft_prio = _get_flow_table(dev, fs_matcher, mcast);
if (IS_ERR(ft_prio)) {
err = PTR_ERR(ft_prio);
goto unlock;
@@ -3814,13 +3829,18 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
dst->type = dest_type;
dst->tir_num = dest_id;
- } else {
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
dst->ft_num = dest_id;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ } else {
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
}
- handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, cmd_in,
- inlen);
+ handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act,
+ cmd_in, inlen);
if (IS_ERR(handler)) {
err = PTR_ERR(handler);
@@ -3998,6 +4018,9 @@ static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
*/
mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx);
break;
+ case IB_FLOW_ACTION_UNSPECIFIED:
+ mlx5_ib_destroy_flow_action_raw(maction);
+ break;
default:
WARN_ON(true);
break;
@@ -4012,13 +4035,17 @@ static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_ib_qp *mqp = to_mqp(ibqp);
int err;
+ u16 uid;
+
+ uid = ibqp->pd ?
+ to_mpd(ibqp->pd)->uid : 0;
if (mqp->flags & MLX5_IB_QP_UNDERLAY) {
mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n");
return -EOPNOTSUPP;
}
- err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
+ err = mlx5_cmd_attach_mcg(dev->mdev, gid, ibqp->qp_num, uid);
if (err)
mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
ibqp->qp_num, gid->raw);
@@ -4030,8 +4057,11 @@ static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
int err;
+ u16 uid;
- err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
+ uid = ibqp->pd ?
+ to_mpd(ibqp->pd)->uid : 0;
+ err = mlx5_cmd_detach_mcg(dev->mdev, gid, ibqp->qp_num, uid);
if (err)
mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
ibqp->qp_num, gid->raw);
@@ -4052,16 +4082,17 @@ static int init_node_data(struct mlx5_ib_dev *dev)
return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
}
-static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t fw_pages_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages);
}
+static DEVICE_ATTR_RO(fw_pages);
-static ssize_t show_reg_pages(struct device *device,
+static ssize_t reg_pages_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
@@ -4069,44 +4100,47 @@ static ssize_t show_reg_pages(struct device *device,
return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
}
+static DEVICE_ATTR_RO(reg_pages);
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
}
+static DEVICE_ATTR_RO(hca_type);
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "%x\n", dev->mdev->rev_id);
}
+static DEVICE_ATTR_RO(hw_rev);
-static ssize_t show_board(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
dev->mdev->board_id);
}
+static DEVICE_ATTR_RO(board_id);
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
-static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
-static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
+static struct attribute *mlx5_class_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ &dev_attr_fw_pages.attr,
+ &dev_attr_reg_pages.attr,
+ NULL,
+};
-static struct device_attribute *mlx5_class_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id,
- &dev_attr_fw_pages,
- &dev_attr_reg_pages,
+static const struct attribute_group mlx5_attr_group = {
+ .attrs = mlx5_class_attributes,
};
static void pkey_change_handler(struct work_struct *work)
@@ -5631,7 +5665,6 @@ void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
- const char *name;
int err;
int i;
@@ -5664,12 +5697,6 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
if (mlx5_use_mad_ifc(dev))
get_ext_port_caps(dev);
- if (!mlx5_lag_is_active(mdev))
- name = "mlx5_%d";
- else
- name = "mlx5_bond_%d";
-
- strlcpy(dev->ib_dev.name, name, IB_DEVICE_NAME_MAX);
dev->ib_dev.owner = THIS_MODULE;
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
@@ -5876,7 +5903,7 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
if ((MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
(MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) ||
MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
- mutex_init(&dev->lb_mutex);
+ mutex_init(&dev->lb.mutex);
return 0;
}
@@ -6083,7 +6110,14 @@ static int mlx5_ib_stage_populate_specs(struct mlx5_ib_dev *dev)
int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
{
- return ib_register_device(&dev->ib_dev, NULL);
+ const char *name;
+
+ rdma_set_device_sysfs_group(&dev->ib_dev, &mlx5_attr_group);
+ if (!mlx5_lag_is_active(dev->mdev))
+ name = "mlx5_%d";
+ else
+ name = "mlx5_bond_%d";
+ return ib_register_device(&dev->ib_dev, name, NULL);
}
void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
@@ -6113,21 +6147,6 @@ static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
cancel_delay_drop(dev);
}
-int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
-{
- int err;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
- err = device_create_file(&dev->ib_dev.dev,
- mlx5_class_attributes[i]);
- if (err)
- return err;
- }
-
- return 0;
-}
-
static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev)
{
mlx5_ib_register_vport_reps(dev);
@@ -6151,6 +6170,8 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
profile->stage[stage].cleanup(dev);
}
+ if (dev->devx_whitelist_uid)
+ mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
ib_dealloc_device((struct ib_device *)dev);
}
@@ -6159,8 +6180,7 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
{
int err;
int i;
-
- printk_once(KERN_INFO "%s", mlx5_version);
+ int uid;
for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
if (profile->stage[i].init) {
@@ -6170,6 +6190,10 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
}
}
+ uid = mlx5_ib_devx_create(dev);
+ if (uid > 0)
+ dev->devx_whitelist_uid = uid;
+
dev->profile = profile;
dev->ib_active = true;
@@ -6230,9 +6254,6 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
mlx5_ib_stage_delay_drop_init,
mlx5_ib_stage_delay_drop_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
- mlx5_ib_stage_class_attr_init,
- NULL),
};
static const struct mlx5_ib_profile nic_rep_profile = {
@@ -6275,9 +6296,6 @@ static const struct mlx5_ib_profile nic_rep_profile = {
STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
mlx5_ib_stage_post_ib_reg_umr_init,
NULL),
- STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
- mlx5_ib_stage_class_attr_init,
- NULL),
STAGE_CREATE(MLX5_IB_STAGE_REP_REG,
mlx5_ib_stage_rep_reg_init,
mlx5_ib_stage_rep_reg_cleanup),
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index f3dbd75a0a96..549234988bb4 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -57,7 +57,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
int entry;
unsigned long page_shift = umem->page_shift;
- if (umem->odp_data) {
+ if (umem->is_odp) {
*ncont = ib_umem_page_count(umem);
*count = *ncont << (page_shift - PAGE_SHIFT);
*shift = page_shift;
@@ -152,14 +152,13 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
struct scatterlist *sg;
int entry;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- const bool odp = umem->odp_data != NULL;
-
- if (odp) {
+ if (umem->is_odp) {
WARN_ON(shift != 0);
WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE));
for (i = 0; i < num_pages; ++i) {
- dma_addr_t pa = umem->odp_data->dma_list[offset + i];
+ dma_addr_t pa =
+ to_ib_umem_odp(umem)->dma_list[offset + i];
pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 289c18db2611..b651a7a6fde9 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -39,8 +39,10 @@
#include <rdma/ib_smi.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cq.h>
+#include <linux/mlx5/fs.h>
#include <linux/mlx5/qp.h>
#include <linux/mlx5/srq.h>
+#include <linux/mlx5/fs.h>
#include <linux/types.h>
#include <linux/mlx5/transobj.h>
#include <rdma/ib_user_verbs.h>
@@ -48,17 +50,17 @@
#include <rdma/uverbs_ioctl.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
-#define mlx5_ib_dbg(dev, format, arg...) \
-pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
- __LINE__, current->pid, ##arg)
+#define mlx5_ib_dbg(_dev, format, arg...) \
+ dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \
+ __LINE__, current->pid, ##arg)
-#define mlx5_ib_err(dev, format, arg...) \
-pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
- __LINE__, current->pid, ##arg)
+#define mlx5_ib_err(_dev, format, arg...) \
+ dev_err(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \
+ __LINE__, current->pid, ##arg)
-#define mlx5_ib_warn(dev, format, arg...) \
-pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
- __LINE__, current->pid, ##arg)
+#define mlx5_ib_warn(_dev, format, arg...) \
+ dev_warn(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \
+ __LINE__, current->pid, ##arg)
#define field_avail(type, fld, sz) (offsetof(type, fld) + \
sizeof(((type *)0)->fld) <= (sz))
@@ -114,13 +116,6 @@ enum {
MLX5_MEMIC_BASE_SIZE = 1 << MLX5_MEMIC_BASE_ALIGN,
};
-struct mlx5_ib_vma_private_data {
- struct list_head list;
- struct vm_area_struct *vma;
- /* protect vma_private_list add/del */
- struct mutex *vma_private_list_mutex;
-};
-
struct mlx5_ib_ucontext {
struct ib_ucontext ibucontext;
struct list_head db_page_list;
@@ -132,13 +127,12 @@ struct mlx5_ib_ucontext {
u8 cqe_version;
/* Transport Domain number */
u32 tdn;
- struct list_head vma_private_list;
- /* protect vma_private_list add/del */
- struct mutex vma_private_list_mutex;
u64 lib_caps;
DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES);
u16 devx_uid;
+ /* For RoCE LAG TX affinity */
+ atomic_t tx_port_affinity;
};
static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -149,6 +143,13 @@ static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibuconte
struct mlx5_ib_pd {
struct ib_pd ibpd;
u32 pdn;
+ u16 uid;
+};
+
+enum {
+ MLX5_IB_FLOW_ACTION_MODIFY_HEADER,
+ MLX5_IB_FLOW_ACTION_PACKET_REFORMAT,
+ MLX5_IB_FLOW_ACTION_DECAP,
};
#define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1)
@@ -180,6 +181,7 @@ struct mlx5_ib_flow_matcher {
struct mlx5_ib_match_params matcher_mask;
int mask_len;
enum mlx5_ib_flow_type flow_type;
+ enum mlx5_flow_namespace_type ns_type;
u16 priority;
struct mlx5_core_dev *mdev;
atomic_t usecnt;
@@ -188,6 +190,7 @@ struct mlx5_ib_flow_matcher {
struct mlx5_ib_flow_db {
struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT];
+ struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT];
struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS];
struct mlx5_ib_flow_prio egress[MLX5_IB_NUM_EGRESS_FTS];
struct mlx5_flow_table *lag_demux_ft;
@@ -322,6 +325,7 @@ enum {
struct mlx5_ib_rwq_ind_table {
struct ib_rwq_ind_table ib_rwq_ind_tbl;
u32 rqtn;
+ u16 uid;
};
struct mlx5_ib_ubuffer {
@@ -428,7 +432,7 @@ struct mlx5_ib_qp {
struct list_head cq_send_list;
struct mlx5_rate_limit rl;
u32 underlay_qpn;
- bool tunnel_offload_en;
+ u32 flags_en;
/* storage for qp sub type when core qp type is IB_QPT_DRIVER */
enum ib_qp_type qp_sub_type;
};
@@ -536,6 +540,7 @@ struct mlx5_ib_srq {
struct mlx5_ib_xrcd {
struct ib_xrcd ibxrcd;
u32 xrcdn;
+ u16 uid;
};
enum mlx5_ib_mtt_access_flags {
@@ -700,7 +705,7 @@ struct mlx5_roce {
rwlock_t netdev_lock;
struct net_device *netdev;
struct notifier_block nb;
- atomic_t next_port;
+ atomic_t tx_port_affinity;
enum ib_port_state last_port_state;
struct mlx5_ib_dev *dev;
u8 native_port_num;
@@ -815,6 +820,11 @@ struct mlx5_ib_flow_action {
u64 ib_flags;
struct mlx5_accel_esp_xfrm *ctx;
} esp_aes_gcm;
+ struct {
+ struct mlx5_ib_dev *dev;
+ u32 sub_type;
+ u32 action_id;
+ } flow_action_raw;
};
};
@@ -859,9 +869,20 @@ to_mcounters(struct ib_counters *ibcntrs)
return container_of(ibcntrs, struct mlx5_ib_mcounters, ibcntrs);
}
+int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
+ bool is_egress,
+ struct mlx5_flow_act *action);
+struct mlx5_ib_lb_state {
+ /* protect the user_td */
+ struct mutex mutex;
+ u32 user_td;
+ int qps;
+ bool enabled;
+};
+
struct mlx5_ib_dev {
struct ib_device ib_dev;
- const struct uverbs_object_tree_def *driver_trees[6];
+ const struct uverbs_object_tree_def *driver_trees[7];
struct mlx5_core_dev *mdev;
struct mlx5_roce roce[MLX5_MAX_PORTS];
int num_ports;
@@ -900,13 +921,12 @@ struct mlx5_ib_dev {
const struct mlx5_ib_profile *profile;
struct mlx5_eswitch_rep *rep;
- /* protect the user_td */
- struct mutex lb_mutex;
- u32 user_td;
+ struct mlx5_ib_lb_state lb;
u8 umr_fence;
struct list_head ib_dev_list;
u64 sys_image_guid;
struct mlx5_memic memic;
+ u16 devx_whitelist_uid;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -1017,6 +1037,8 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr);
int mlx5_ib_destroy_srq(struct ib_srq *srq);
int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
+int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp);
+void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp);
struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
@@ -1106,7 +1128,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
int page_shift, __be64 *pas, int access_flags);
void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
-int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
+int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
@@ -1141,7 +1163,7 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
int __init mlx5_ib_odp_init(void);
void mlx5_ib_odp_cleanup(void);
-void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
+void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
unsigned long end);
void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent);
void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
@@ -1180,7 +1202,6 @@ void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev);
int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev);
void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev);
int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev);
-int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev);
void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
const struct mlx5_ib_profile *profile,
int stage);
@@ -1229,22 +1250,20 @@ void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
u8 port_num);
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
- struct mlx5_ib_ucontext *context);
-void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
- struct mlx5_ib_ucontext *context);
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev);
+void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void);
struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
- void *cmd_in, int inlen, int dest_id, int dest_type);
+ struct mlx5_flow_act *flow_act, void *cmd_in, int inlen,
+ int dest_id, int dest_type);
bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root);
+void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction);
#else
static inline int
-mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
- struct mlx5_ib_ucontext *context) { return -EOPNOTSUPP; };
-static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
- struct mlx5_ib_ucontext *context) {}
+mlx5_ib_devx_create(struct mlx5_ib_dev *dev) { return -EOPNOTSUPP; };
+static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {}
static inline const struct uverbs_object_tree_def *
mlx5_ib_get_devx_tree(void) { return NULL; }
static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id,
@@ -1257,6 +1276,11 @@ mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root)
{
return 0;
}
+static inline void
+mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
+{
+ return;
+};
#endif
static inline void init_query_mad(struct ib_smp *mad)
{
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index e22314837645..9b195d65a13e 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -98,7 +98,7 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
static void update_odp_mr(struct mlx5_ib_mr *mr)
{
- if (mr->umem->odp_data) {
+ if (mr->umem->is_odp) {
/*
* This barrier prevents the compiler from moving the
* setting of umem->odp_data->private to point to our
@@ -107,7 +107,7 @@ static void update_odp_mr(struct mlx5_ib_mr *mr)
* handle invalidations.
*/
smp_wmb();
- mr->umem->odp_data->private = mr;
+ to_ib_umem_odp(mr->umem)->private = mr;
/*
* Make sure we will see the new
* umem->odp_data->private value in the invalidation
@@ -691,7 +691,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
init_completion(&ent->compl);
INIT_WORK(&ent->work, cache_work_func);
INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
- queue_work(cache->wq, &ent->work);
if (i > MR_CACHE_LAST_STD_ENTRY) {
mlx5_odp_init_mr_cache_entry(ent);
@@ -711,6 +710,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
ent->limit = dev->mdev->profile->mr_cache[i].limit;
else
ent->limit = 0;
+ queue_work(cache->wq, &ent->work);
}
err = mlx5_mr_cache_debugfs_init(dev);
@@ -1627,14 +1627,16 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
struct ib_umem *umem = mr->umem;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (umem && umem->odp_data) {
+ if (umem && umem->is_odp) {
+ struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem);
+
/* Prevent new page faults from succeeding */
mr->live = 0;
/* Wait for all running page-fault handlers to finish. */
synchronize_srcu(&dev->mr_srcu);
/* Destroy all page mappings */
- if (umem->odp_data->page_list)
- mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
+ if (umem_odp->page_list)
+ mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem),
ib_umem_end(umem));
else
mlx5_ib_free_implicit_mr(mr);
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index d216e0d2921d..b04eb6775326 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -61,13 +61,21 @@ static int check_parent(struct ib_umem_odp *odp,
return mr && mr->parent == parent && !odp->dying;
}
+struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr)
+{
+ if (WARN_ON(!mr || !mr->umem || !mr->umem->is_odp))
+ return NULL;
+
+ return to_ib_umem_odp(mr->umem)->per_mm;
+}
+
static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp)
{
struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent;
- struct ib_ucontext *ctx = odp->umem->context;
+ struct ib_ucontext_per_mm *per_mm = odp->per_mm;
struct rb_node *rb;
- down_read(&ctx->umem_rwsem);
+ down_read(&per_mm->umem_rwsem);
while (1) {
rb = rb_next(&odp->interval_tree.rb);
if (!rb)
@@ -79,19 +87,19 @@ static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp)
not_found:
odp = NULL;
end:
- up_read(&ctx->umem_rwsem);
+ up_read(&per_mm->umem_rwsem);
return odp;
}
-static struct ib_umem_odp *odp_lookup(struct ib_ucontext *ctx,
- u64 start, u64 length,
+static struct ib_umem_odp *odp_lookup(u64 start, u64 length,
struct mlx5_ib_mr *parent)
{
+ struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(parent);
struct ib_umem_odp *odp;
struct rb_node *rb;
- down_read(&ctx->umem_rwsem);
- odp = rbt_ib_umem_lookup(&ctx->umem_tree, start, length);
+ down_read(&per_mm->umem_rwsem);
+ odp = rbt_ib_umem_lookup(&per_mm->umem_tree, start, length);
if (!odp)
goto end;
@@ -102,13 +110,13 @@ static struct ib_umem_odp *odp_lookup(struct ib_ucontext *ctx,
if (!rb)
goto not_found;
odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
- if (ib_umem_start(odp->umem) > start + length)
+ if (ib_umem_start(&odp->umem) > start + length)
goto not_found;
}
not_found:
odp = NULL;
end:
- up_read(&ctx->umem_rwsem);
+ up_read(&per_mm->umem_rwsem);
return odp;
}
@@ -116,7 +124,6 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
size_t nentries, struct mlx5_ib_mr *mr, int flags)
{
struct ib_pd *pd = mr->ibmr.pd;
- struct ib_ucontext *ctx = pd->uobject->context;
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct ib_umem_odp *odp;
unsigned long va;
@@ -131,13 +138,13 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
return;
}
- odp = odp_lookup(ctx, offset * MLX5_IMR_MTT_SIZE,
- nentries * MLX5_IMR_MTT_SIZE, mr);
+ odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE,
+ nentries * MLX5_IMR_MTT_SIZE, mr);
for (i = 0; i < nentries; i++, pklm++) {
pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
va = (offset + i) * MLX5_IMR_MTT_SIZE;
- if (odp && odp->umem->address == va) {
+ if (odp && odp->umem.address == va) {
struct mlx5_ib_mr *mtt = odp->private;
pklm->key = cpu_to_be32(mtt->ibmr.lkey);
@@ -153,13 +160,13 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
static void mr_leaf_free_action(struct work_struct *work)
{
struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work);
- int idx = ib_umem_start(odp->umem) >> MLX5_IMR_MTT_SHIFT;
+ int idx = ib_umem_start(&odp->umem) >> MLX5_IMR_MTT_SHIFT;
struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent;
mr->parent = NULL;
synchronize_srcu(&mr->dev->mr_srcu);
- ib_umem_release(odp->umem);
+ ib_umem_release(&odp->umem);
if (imr->live)
mlx5_ib_update_xlt(imr, idx, 1, 0,
MLX5_IB_UPD_XLT_INDIRECT |
@@ -170,22 +177,24 @@ static void mr_leaf_free_action(struct work_struct *work)
wake_up(&imr->q_leaf_free);
}
-void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
+void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
unsigned long end)
{
struct mlx5_ib_mr *mr;
const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT /
sizeof(struct mlx5_mtt)) - 1;
u64 idx = 0, blk_start_idx = 0;
+ struct ib_umem *umem;
int in_block = 0;
u64 addr;
- if (!umem || !umem->odp_data) {
+ if (!umem_odp) {
pr_err("invalidation called on NULL umem or non-ODP umem\n");
return;
}
+ umem = &umem_odp->umem;
- mr = umem->odp_data->private;
+ mr = umem_odp->private;
if (!mr || !mr->ibmr.pd)
return;
@@ -208,7 +217,7 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
* estimate the cost of another UMR vs. the cost of bigger
* UMR.
*/
- if (umem->odp_data->dma_list[idx] &
+ if (umem_odp->dma_list[idx] &
(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) {
if (!in_block) {
blk_start_idx = idx;
@@ -237,13 +246,13 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
* needed.
*/
- ib_umem_odp_unmap_dma_pages(umem, start, end);
+ ib_umem_odp_unmap_dma_pages(umem_odp, start, end);
if (unlikely(!umem->npages && mr->parent &&
- !umem->odp_data->dying)) {
- WRITE_ONCE(umem->odp_data->dying, 1);
+ !umem_odp->dying)) {
+ WRITE_ONCE(umem_odp->dying, 1);
atomic_inc(&mr->parent->num_leaf_free);
- schedule_work(&umem->odp_data->work);
+ schedule_work(&umem_odp->work);
}
}
@@ -366,16 +375,15 @@ fail:
static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr,
u64 io_virt, size_t bcnt)
{
- struct ib_ucontext *ctx = mr->ibmr.pd->uobject->context;
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device);
struct ib_umem_odp *odp, *result = NULL;
+ struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
u64 addr = io_virt & MLX5_IMR_MTT_MASK;
int nentries = 0, start_idx = 0, ret;
struct mlx5_ib_mr *mtt;
- struct ib_umem *umem;
- mutex_lock(&mr->umem->odp_data->umem_mutex);
- odp = odp_lookup(ctx, addr, 1, mr);
+ mutex_lock(&odp_mr->umem_mutex);
+ odp = odp_lookup(addr, 1, mr);
mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n",
io_virt, bcnt, addr, odp);
@@ -385,22 +393,23 @@ next_mr:
if (nentries)
nentries++;
} else {
- umem = ib_alloc_odp_umem(ctx, addr, MLX5_IMR_MTT_SIZE);
- if (IS_ERR(umem)) {
- mutex_unlock(&mr->umem->odp_data->umem_mutex);
- return ERR_CAST(umem);
+ odp = ib_alloc_odp_umem(odp_mr->per_mm, addr,
+ MLX5_IMR_MTT_SIZE);
+ if (IS_ERR(odp)) {
+ mutex_unlock(&odp_mr->umem_mutex);
+ return ERR_CAST(odp);
}
- mtt = implicit_mr_alloc(mr->ibmr.pd, umem, 0, mr->access_flags);
+ mtt = implicit_mr_alloc(mr->ibmr.pd, &odp->umem, 0,
+ mr->access_flags);
if (IS_ERR(mtt)) {
- mutex_unlock(&mr->umem->odp_data->umem_mutex);
- ib_umem_release(umem);
+ mutex_unlock(&odp_mr->umem_mutex);
+ ib_umem_release(&odp->umem);
return ERR_CAST(mtt);
}
- odp = umem->odp_data;
odp->private = mtt;
- mtt->umem = umem;
+ mtt->umem = &odp->umem;
mtt->mmkey.iova = addr;
mtt->parent = mr;
INIT_WORK(&odp->work, mr_leaf_free_action);
@@ -417,7 +426,7 @@ next_mr:
addr += MLX5_IMR_MTT_SIZE;
if (unlikely(addr < io_virt + bcnt)) {
odp = odp_next(odp);
- if (odp && odp->umem->address != addr)
+ if (odp && odp->umem.address != addr)
odp = NULL;
goto next_mr;
}
@@ -432,7 +441,7 @@ next_mr:
}
}
- mutex_unlock(&mr->umem->odp_data->umem_mutex);
+ mutex_unlock(&odp_mr->umem_mutex);
return result;
}
@@ -460,36 +469,36 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
return imr;
}
-static int mr_leaf_free(struct ib_umem *umem, u64 start,
- u64 end, void *cookie)
+static int mr_leaf_free(struct ib_umem_odp *umem_odp, u64 start, u64 end,
+ void *cookie)
{
- struct mlx5_ib_mr *mr = umem->odp_data->private, *imr = cookie;
+ struct mlx5_ib_mr *mr = umem_odp->private, *imr = cookie;
+ struct ib_umem *umem = &umem_odp->umem;
if (mr->parent != imr)
return 0;
- ib_umem_odp_unmap_dma_pages(umem,
- ib_umem_start(umem),
+ ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem),
ib_umem_end(umem));
- if (umem->odp_data->dying)
+ if (umem_odp->dying)
return 0;
- WRITE_ONCE(umem->odp_data->dying, 1);
+ WRITE_ONCE(umem_odp->dying, 1);
atomic_inc(&imr->num_leaf_free);
- schedule_work(&umem->odp_data->work);
+ schedule_work(&umem_odp->work);
return 0;
}
void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
{
- struct ib_ucontext *ctx = imr->ibmr.pd->uobject->context;
+ struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr);
- down_read(&ctx->umem_rwsem);
- rbt_ib_umem_for_each_in_range(&ctx->umem_tree, 0, ULLONG_MAX,
+ down_read(&per_mm->umem_rwsem);
+ rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, 0, ULLONG_MAX,
mr_leaf_free, true, imr);
- up_read(&ctx->umem_rwsem);
+ up_read(&per_mm->umem_rwsem);
wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
}
@@ -497,6 +506,7 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
u64 io_virt, size_t bcnt, u32 *bytes_mapped)
{
+ struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
u64 access_mask = ODP_READ_ALLOWED_BIT;
int npages = 0, page_shift, np;
u64 start_idx, page_mask;
@@ -505,7 +515,7 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
size_t size;
int ret;
- if (!mr->umem->odp_data->page_list) {
+ if (!odp_mr->page_list) {
odp = implicit_mr_get_data(mr, io_virt, bcnt);
if (IS_ERR(odp))
@@ -513,11 +523,11 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
mr = odp->private;
} else {
- odp = mr->umem->odp_data;
+ odp = odp_mr;
}
next_mr:
- size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
+ size = min_t(size_t, bcnt, ib_umem_end(&odp->umem) - io_virt);
page_shift = mr->umem->page_shift;
page_mask = ~(BIT(page_shift) - 1);
@@ -533,7 +543,7 @@ next_mr:
*/
smp_rmb();
- ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size,
+ ret = ib_umem_odp_map_dma_pages(to_ib_umem_odp(mr->umem), io_virt, size,
access_mask, current_seq);
if (ret < 0)
@@ -542,7 +552,8 @@ next_mr:
np = ret;
mutex_lock(&odp->umem_mutex);
- if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
+ if (!ib_umem_mmu_notifier_retry(to_ib_umem_odp(mr->umem),
+ current_seq)) {
/*
* No need to check whether the MTTs really belong to
* this MR, since ib_umem_odp_map_dma_pages already
@@ -575,7 +586,7 @@ next_mr:
io_virt += size;
next = odp_next(odp);
- if (unlikely(!next || next->umem->address != io_virt)) {
+ if (unlikely(!next || next->umem.address != io_virt)) {
mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
io_virt, next);
return -EAGAIN;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index daf1eb84cd31..6841c0f9237f 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -37,6 +37,7 @@
#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
#include "ib_rep.h"
+#include "cmd.h"
/* not supported currently */
static int wq_signature;
@@ -850,6 +851,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
goto err_umem;
}
+ MLX5_SET(create_qp_in, *in, uid, to_mpd(pd)->uid);
pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
if (ubuffer->umem)
mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0);
@@ -1051,7 +1053,8 @@ static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
static int is_connected(enum ib_qp_type qp_type)
{
- if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC)
+ if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC ||
+ qp_type == MLX5_IB_QPT_DCI)
return 1;
return 0;
@@ -1059,11 +1062,13 @@ static int is_connected(enum ib_qp_type qp_type)
static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
struct mlx5_ib_qp *qp,
- struct mlx5_ib_sq *sq, u32 tdn)
+ struct mlx5_ib_sq *sq, u32 tdn,
+ struct ib_pd *pd)
{
u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+ MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid);
MLX5_SET(tisc, tisc, transport_domain, tdn);
if (qp->flags & MLX5_IB_QP_UNDERLAY)
MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn);
@@ -1072,9 +1077,9 @@ static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
}
static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
- struct mlx5_ib_sq *sq)
+ struct mlx5_ib_sq *sq, struct ib_pd *pd)
{
- mlx5_core_destroy_tis(dev->mdev, sq->tisn);
+ mlx5_cmd_destroy_tis(dev->mdev, sq->tisn, to_mpd(pd)->uid);
}
static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
@@ -1114,6 +1119,7 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
goto err_umem;
}
+ MLX5_SET(create_sq_in, in, uid, to_mpd(pd)->uid);
sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
MLX5_SET(sqc, sqc, flush_in_error_en, 1);
if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe))
@@ -1188,7 +1194,7 @@ static size_t get_rq_pas_size(void *qpc)
static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
struct mlx5_ib_rq *rq, void *qpin,
- size_t qpinlen)
+ size_t qpinlen, struct ib_pd *pd)
{
struct mlx5_ib_qp *mqp = rq->base.container_mibqp;
__be64 *pas;
@@ -1209,6 +1215,7 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
if (!in)
return -ENOMEM;
+ MLX5_SET(create_rq_in, in, uid, to_mpd(pd)->uid);
rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
if (!(rq->flags & MLX5_IB_RQ_CVLAN_STRIPPING))
MLX5_SET(rqc, rqc, vsd, 1);
@@ -1256,10 +1263,23 @@ static bool tunnel_offload_supported(struct mlx5_core_dev *dev)
MLX5_CAP_ETH(dev, tunnel_stateless_geneve_rx));
}
+static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq,
+ u32 qp_flags_en,
+ struct ib_pd *pd)
+{
+ if (qp_flags_en & (MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC))
+ mlx5_ib_disable_lb(dev, false, true);
+ mlx5_cmd_destroy_tir(dev->mdev, rq->tirn, to_mpd(pd)->uid);
+}
+
static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
struct mlx5_ib_rq *rq, u32 tdn,
- bool tunnel_offload_en)
+ u32 *qp_flags_en,
+ struct ib_pd *pd)
{
+ u8 lb_flag = 0;
u32 *in;
void *tirc;
int inlen;
@@ -1270,33 +1290,45 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
if (!in)
return -ENOMEM;
+ MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid);
tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn);
MLX5_SET(tirc, tirc, transport_domain, tdn);
- if (tunnel_offload_en)
+ if (*qp_flags_en & MLX5_QP_FLAG_TUNNEL_OFFLOADS)
MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
- if (dev->rep)
- MLX5_SET(tirc, tirc, self_lb_block,
- MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST);
+ if (*qp_flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC)
+ lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
+
+ if (*qp_flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)
+ lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
+
+ if (dev->rep) {
+ lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
+ *qp_flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
+ }
+
+ MLX5_SET(tirc, tirc, self_lb_block, lb_flag);
err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
+ if (!err && MLX5_GET(tirc, tirc, self_lb_block)) {
+ err = mlx5_ib_enable_lb(dev, false, true);
+
+ if (err)
+ destroy_raw_packet_qp_tir(dev, rq, 0, pd);
+ }
kvfree(in);
return err;
}
-static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
- struct mlx5_ib_rq *rq)
-{
- mlx5_core_destroy_tir(dev->mdev, rq->tirn);
-}
-
static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
u32 *in, size_t inlen,
- struct ib_pd *pd)
+ struct ib_pd *pd,
+ struct ib_udata *udata,
+ struct mlx5_ib_create_qp_resp *resp)
{
struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
@@ -1306,9 +1338,10 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext);
int err;
u32 tdn = mucontext->tdn;
+ u16 uid = to_mpd(pd)->uid;
if (qp->sq.wqe_cnt) {
- err = create_raw_packet_qp_tis(dev, qp, sq, tdn);
+ err = create_raw_packet_qp_tis(dev, qp, sq, tdn, pd);
if (err)
return err;
@@ -1316,6 +1349,13 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (err)
goto err_destroy_tis;
+ if (uid) {
+ resp->tisn = sq->tisn;
+ resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TISN;
+ resp->sqn = sq->base.mqp.qpn;
+ resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_SQN;
+ }
+
sq->base.container_mibqp = qp;
sq->base.mqp.event = mlx5_ib_qp_event;
}
@@ -1327,22 +1367,32 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING;
if (qp->flags & MLX5_IB_QP_PCI_WRITE_END_PADDING)
rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING;
- err = create_raw_packet_qp_rq(dev, rq, in, inlen);
+ err = create_raw_packet_qp_rq(dev, rq, in, inlen, pd);
if (err)
goto err_destroy_sq;
-
- err = create_raw_packet_qp_tir(dev, rq, tdn,
- qp->tunnel_offload_en);
+ err = create_raw_packet_qp_tir(dev, rq, tdn, &qp->flags_en, pd);
if (err)
goto err_destroy_rq;
+
+ if (uid) {
+ resp->rqn = rq->base.mqp.qpn;
+ resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_RQN;
+ resp->tirn = rq->tirn;
+ resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
+ }
}
qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn :
rq->base.mqp.qpn;
+ err = ib_copy_to_udata(udata, resp, min(udata->outlen, sizeof(*resp)));
+ if (err)
+ goto err_destroy_tir;
return 0;
+err_destroy_tir:
+ destroy_raw_packet_qp_tir(dev, rq, qp->flags_en, pd);
err_destroy_rq:
destroy_raw_packet_qp_rq(dev, rq);
err_destroy_sq:
@@ -1350,7 +1400,7 @@ err_destroy_sq:
return err;
destroy_raw_packet_qp_sq(dev, sq);
err_destroy_tis:
- destroy_raw_packet_qp_tis(dev, sq);
+ destroy_raw_packet_qp_tis(dev, sq, pd);
return err;
}
@@ -1363,13 +1413,13 @@ static void destroy_raw_packet_qp(struct mlx5_ib_dev *dev,
struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
if (qp->rq.wqe_cnt) {
- destroy_raw_packet_qp_tir(dev, rq);
+ destroy_raw_packet_qp_tir(dev, rq, qp->flags_en, qp->ibqp.pd);
destroy_raw_packet_qp_rq(dev, rq);
}
if (qp->sq.wqe_cnt) {
destroy_raw_packet_qp_sq(dev, sq);
- destroy_raw_packet_qp_tis(dev, sq);
+ destroy_raw_packet_qp_tis(dev, sq, qp->ibqp.pd);
}
}
@@ -1387,7 +1437,11 @@ static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp,
static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
{
- mlx5_core_destroy_tir(dev->mdev, qp->rss_qp.tirn);
+ if (qp->flags_en & (MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC))
+ mlx5_ib_disable_lb(dev, false, true);
+ mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn,
+ to_mpd(qp->ibqp.pd)->uid);
}
static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
@@ -1410,6 +1464,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
u32 tdn = mucontext->tdn;
struct mlx5_ib_create_qp_rss ucmd = {};
size_t required_cmd_sz;
+ u8 lb_flag = 0;
if (init_attr->qp_type != IB_QPT_RAW_PACKET)
return -EOPNOTSUPP;
@@ -1444,7 +1499,9 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
return -EOPNOTSUPP;
}
- if (ucmd.flags & ~MLX5_QP_FLAG_TUNNEL_OFFLOADS) {
+ if (ucmd.flags & ~(MLX5_QP_FLAG_TUNNEL_OFFLOADS |
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)) {
mlx5_ib_dbg(dev, "invalid flags\n");
return -EOPNOTSUPP;
}
@@ -1461,6 +1518,16 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
return -EOPNOTSUPP;
}
+ if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC || dev->rep) {
+ lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
+ qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
+ }
+
+ if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) {
+ lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
+ qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC;
+ }
+
err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));
if (err) {
mlx5_ib_dbg(dev, "copy failed\n");
@@ -1472,6 +1539,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (!in)
return -ENOMEM;
+ MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid);
tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
MLX5_SET(tirc, tirc, disp_type,
MLX5_TIRC_DISP_TYPE_INDIRECT);
@@ -1484,6 +1552,8 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)
MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
+ MLX5_SET(tirc, tirc, self_lb_block, lb_flag);
+
if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_INNER)
hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner);
else
@@ -1580,26 +1650,141 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
create_tir:
- if (dev->rep)
- MLX5_SET(tirc, tirc, self_lb_block,
- MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST);
-
err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
+ if (!err && MLX5_GET(tirc, tirc, self_lb_block)) {
+ err = mlx5_ib_enable_lb(dev, false, true);
+
+ if (err)
+ mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn,
+ to_mpd(pd)->uid);
+ }
+
if (err)
goto err;
+ if (mucontext->devx_uid) {
+ resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
+ resp.tirn = qp->rss_qp.tirn;
+ }
+
+ err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));
+ if (err)
+ goto err_copy;
+
kvfree(in);
/* qpn is reserved for that QP */
qp->trans_qp.base.mqp.qpn = 0;
qp->flags |= MLX5_IB_QP_RSS;
return 0;
+err_copy:
+ mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn, mucontext->devx_uid);
err:
kvfree(in);
return err;
}
+static void configure_responder_scat_cqe(struct ib_qp_init_attr *init_attr,
+ void *qpc)
+{
+ int rcqe_sz;
+
+ if (init_attr->qp_type == MLX5_IB_QPT_DCI)
+ return;
+
+ rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq);
+
+ if (rcqe_sz == 128) {
+ MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
+ return;
+ }
+
+ if (init_attr->qp_type != MLX5_IB_QPT_DCT)
+ MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE);
+}
+
+static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev,
+ struct ib_qp_init_attr *init_attr,
+ struct mlx5_ib_create_qp *ucmd,
+ void *qpc)
+{
+ enum ib_qp_type qpt = init_attr->qp_type;
+ int scqe_sz;
+ bool allow_scat_cqe = 0;
+
+ if (qpt == IB_QPT_UC || qpt == IB_QPT_UD)
+ return;
+
+ if (ucmd)
+ allow_scat_cqe = ucmd->flags & MLX5_QP_FLAG_ALLOW_SCATTER_CQE;
+
+ if (!allow_scat_cqe && init_attr->sq_sig_type != IB_SIGNAL_ALL_WR)
+ return;
+
+ scqe_sz = mlx5_ib_get_cqe_size(init_attr->send_cq);
+ if (scqe_sz == 128) {
+ MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA64_CQE);
+ return;
+ }
+
+ if (init_attr->qp_type != MLX5_IB_QPT_DCI ||
+ MLX5_CAP_GEN(dev->mdev, dc_req_scat_data_cqe))
+ MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE);
+}
+
+static int atomic_size_to_mode(int size_mask)
+{
+ /* driver does not support atomic_size > 256B
+ * and does not know how to translate bigger sizes
+ */
+ int supported_size_mask = size_mask & 0x1ff;
+ int log_max_size;
+
+ if (!supported_size_mask)
+ return -EOPNOTSUPP;
+
+ log_max_size = __fls(supported_size_mask);
+
+ if (log_max_size > 3)
+ return log_max_size;
+
+ return MLX5_ATOMIC_MODE_8B;
+}
+
+static int get_atomic_mode(struct mlx5_ib_dev *dev,
+ enum ib_qp_type qp_type)
+{
+ u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
+ u8 atomic = MLX5_CAP_GEN(dev->mdev, atomic);
+ int atomic_mode = -EOPNOTSUPP;
+ int atomic_size_mask;
+
+ if (!atomic)
+ return -EOPNOTSUPP;
+
+ if (qp_type == MLX5_IB_QPT_DCT)
+ atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
+ else
+ atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
+
+ if ((atomic_operations & MLX5_ATOMIC_OPS_EXTENDED_CMP_SWAP) ||
+ (atomic_operations & MLX5_ATOMIC_OPS_EXTENDED_FETCH_ADD))
+ atomic_mode = atomic_size_to_mode(atomic_size_mask);
+
+ if (atomic_mode <= 0 &&
+ (atomic_operations & MLX5_ATOMIC_OPS_CMP_SWAP &&
+ atomic_operations & MLX5_ATOMIC_OPS_FETCH_ADD))
+ atomic_mode = MLX5_ATOMIC_MODE_IB_COMP;
+
+ return atomic_mode;
+}
+
+static inline bool check_flags_mask(uint64_t input, uint64_t supported)
+{
+ return (input & ~supported) == 0;
+}
+
static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, struct mlx5_ib_qp *qp)
@@ -1697,20 +1882,47 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
return -EFAULT;
}
+ if (!check_flags_mask(ucmd.flags,
+ MLX5_QP_FLAG_SIGNATURE |
+ MLX5_QP_FLAG_SCATTER_CQE |
+ MLX5_QP_FLAG_TUNNEL_OFFLOADS |
+ MLX5_QP_FLAG_BFREG_INDEX |
+ MLX5_QP_FLAG_TYPE_DCT |
+ MLX5_QP_FLAG_TYPE_DCI |
+ MLX5_QP_FLAG_ALLOW_SCATTER_CQE))
+ return -EINVAL;
+
err = get_qp_user_index(to_mucontext(pd->uobject->context),
&ucmd, udata->inlen, &uidx);
if (err)
return err;
qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
- qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
+ if (MLX5_CAP_GEN(dev->mdev, sctr_data_cqe))
+ qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) {
if (init_attr->qp_type != IB_QPT_RAW_PACKET ||
!tunnel_offload_supported(mdev)) {
mlx5_ib_dbg(dev, "Tunnel offload isn't supported\n");
return -EOPNOTSUPP;
}
- qp->tunnel_offload_en = true;
+ qp->flags_en |= MLX5_QP_FLAG_TUNNEL_OFFLOADS;
+ }
+
+ if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC) {
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
+ mlx5_ib_dbg(dev, "Self-LB UC isn't supported\n");
+ return -EOPNOTSUPP;
+ }
+ qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
+ }
+
+ if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) {
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
+ mlx5_ib_dbg(dev, "Self-LB UM isn't supported\n");
+ return -EOPNOTSUPP;
+ }
+ qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC;
}
if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) {
@@ -1811,23 +2023,10 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
MLX5_SET(qpc, qpc, cd_slave_receive, 1);
if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
- int rcqe_sz;
- int scqe_sz;
-
- rcqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->recv_cq);
- scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq);
-
- if (rcqe_sz == 128)
- MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
- else
- MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE);
-
- if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) {
- if (scqe_sz == 128)
- MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA64_CQE);
- else
- MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE);
- }
+ configure_responder_scat_cqe(init_attr, qpc);
+ configure_requester_scat_cqe(dev, init_attr,
+ (pd && pd->uobject) ? &ucmd : NULL,
+ qpc);
}
if (qp->rq.wqe_cnt) {
@@ -1911,7 +2110,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->flags & MLX5_IB_QP_UNDERLAY) {
qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
- err = create_raw_packet_qp(dev, qp, in, inlen, pd);
+ err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata,
+ &resp);
} else {
err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen);
}
@@ -2192,6 +2392,7 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd,
goto err_free;
}
+ MLX5_SET(create_dct_in, qp->dct.in, uid, to_mpd(pd)->uid);
dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry);
qp->qp_sub_type = MLX5_IB_QPT_DCT;
MLX5_SET(dctc, dctc, pd, to_mpd(pd)->pdn);
@@ -2200,6 +2401,9 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd,
MLX5_SET64(dctc, dctc, dc_access_key, ucmd->access_key);
MLX5_SET(dctc, dctc, user_index, uidx);
+ if (ucmd->flags & MLX5_QP_FLAG_SCATTER_CQE)
+ configure_responder_scat_cqe(attr, dctc);
+
qp->state = IB_QPS_RESET;
return &qp->ibqp;
@@ -2405,13 +2609,15 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp)
return 0;
}
-static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_attr *attr,
- int attr_mask)
+static int to_mlx5_access_flags(struct mlx5_ib_qp *qp,
+ const struct ib_qp_attr *attr,
+ int attr_mask, __be32 *hw_access_flags)
{
- u32 hw_access_flags = 0;
u8 dest_rd_atomic;
u32 access_flags;
+ struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
+
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
dest_rd_atomic = attr->max_dest_rd_atomic;
else
@@ -2426,13 +2632,25 @@ static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_att
access_flags &= IB_ACCESS_REMOTE_WRITE;
if (access_flags & IB_ACCESS_REMOTE_READ)
- hw_access_flags |= MLX5_QP_BIT_RRE;
- if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
- hw_access_flags |= (MLX5_QP_BIT_RAE | MLX5_ATOMIC_MODE_CX);
+ *hw_access_flags |= MLX5_QP_BIT_RRE;
+ if ((access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+ qp->ibqp.qp_type == IB_QPT_RC) {
+ int atomic_mode;
+
+ atomic_mode = get_atomic_mode(dev, qp->ibqp.qp_type);
+ if (atomic_mode < 0)
+ return -EOPNOTSUPP;
+
+ *hw_access_flags |= MLX5_QP_BIT_RAE;
+ *hw_access_flags |= atomic_mode << MLX5_ATOMIC_MODE_OFFSET;
+ }
+
if (access_flags & IB_ACCESS_REMOTE_WRITE)
- hw_access_flags |= MLX5_QP_BIT_RWE;
+ *hw_access_flags |= MLX5_QP_BIT_RWE;
+
+ *hw_access_flags = cpu_to_be32(*hw_access_flags);
- return cpu_to_be32(hw_access_flags);
+ return 0;
}
enum {
@@ -2458,7 +2676,8 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
}
static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
- struct mlx5_ib_sq *sq, u8 sl)
+ struct mlx5_ib_sq *sq, u8 sl,
+ struct ib_pd *pd)
{
void *in;
void *tisc;
@@ -2471,6 +2690,7 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
return -ENOMEM;
MLX5_SET(modify_tis_in, in, bitmask.prio, 1);
+ MLX5_SET(modify_tis_in, in, uid, to_mpd(pd)->uid);
tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1));
@@ -2483,7 +2703,8 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
}
static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
- struct mlx5_ib_sq *sq, u8 tx_affinity)
+ struct mlx5_ib_sq *sq, u8 tx_affinity,
+ struct ib_pd *pd)
{
void *in;
void *tisc;
@@ -2496,6 +2717,7 @@ static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
return -ENOMEM;
MLX5_SET(modify_tis_in, in, bitmask.lag_tx_port_affinity, 1);
+ MLX5_SET(modify_tis_in, in, uid, to_mpd(pd)->uid);
tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
MLX5_SET(tisc, tisc, lag_tx_port_affinity, tx_affinity);
@@ -2580,7 +2802,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
return modify_raw_packet_eth_prio(dev->mdev,
&qp->raw_packet_qp.sq,
- sl & 0xf);
+ sl & 0xf, qp->ibqp.pd);
return 0;
}
@@ -2728,9 +2950,9 @@ static int ib_mask_to_mlx5_opt(int ib_mask)
return result;
}
-static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
- struct mlx5_ib_rq *rq, int new_state,
- const struct mlx5_modify_raw_qp_param *raw_qp_param)
+static int modify_raw_packet_qp_rq(
+ struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, int new_state,
+ const struct mlx5_modify_raw_qp_param *raw_qp_param, struct ib_pd *pd)
{
void *in;
void *rqc;
@@ -2743,6 +2965,7 @@ static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
return -ENOMEM;
MLX5_SET(modify_rq_in, in, rq_state, rq->state);
+ MLX5_SET(modify_rq_in, in, uid, to_mpd(pd)->uid);
rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
MLX5_SET(rqc, rqc, state, new_state);
@@ -2753,8 +2976,9 @@ static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id);
} else
- pr_info_once("%s: RAW PACKET QP counters are not supported on current FW\n",
- dev->ib_dev.name);
+ dev_info_once(
+ &dev->ib_dev.dev,
+ "RAW PACKET QP counters are not supported on current FW\n");
}
err = mlx5_core_modify_rq(dev->mdev, rq->base.mqp.qpn, in, inlen);
@@ -2768,10 +2992,9 @@ out:
return err;
}
-static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
- struct mlx5_ib_sq *sq,
- int new_state,
- const struct mlx5_modify_raw_qp_param *raw_qp_param)
+static int modify_raw_packet_qp_sq(
+ struct mlx5_core_dev *dev, struct mlx5_ib_sq *sq, int new_state,
+ const struct mlx5_modify_raw_qp_param *raw_qp_param, struct ib_pd *pd)
{
struct mlx5_ib_qp *ibqp = sq->base.container_mibqp;
struct mlx5_rate_limit old_rl = ibqp->rl;
@@ -2788,6 +3011,7 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
if (!in)
return -ENOMEM;
+ MLX5_SET(modify_sq_in, in, uid, to_mpd(pd)->uid);
MLX5_SET(modify_sq_in, in, sq_state, sq->state);
sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
@@ -2890,7 +3114,8 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
}
if (modify_rq) {
- err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param);
+ err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param,
+ qp->ibqp.pd);
if (err)
return err;
}
@@ -2898,17 +3123,50 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (modify_sq) {
if (tx_affinity) {
err = modify_raw_packet_tx_affinity(dev->mdev, sq,
- tx_affinity);
+ tx_affinity,
+ qp->ibqp.pd);
if (err)
return err;
}
- return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, raw_qp_param);
+ return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state,
+ raw_qp_param, qp->ibqp.pd);
}
return 0;
}
+static unsigned int get_tx_affinity(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_pd *pd,
+ struct mlx5_ib_qp_base *qp_base,
+ u8 port_num)
+{
+ struct mlx5_ib_ucontext *ucontext = NULL;
+ unsigned int tx_port_affinity;
+
+ if (pd && pd->ibpd.uobject && pd->ibpd.uobject->context)
+ ucontext = to_mucontext(pd->ibpd.uobject->context);
+
+ if (ucontext) {
+ tx_port_affinity = (unsigned int)atomic_add_return(
+ 1, &ucontext->tx_port_affinity) %
+ MLX5_MAX_PORTS +
+ 1;
+ mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x ucontext %p\n",
+ tx_port_affinity, qp_base->mqp.qpn, ucontext);
+ } else {
+ tx_port_affinity =
+ (unsigned int)atomic_add_return(
+ 1, &dev->roce[port_num].tx_port_affinity) %
+ MLX5_MAX_PORTS +
+ 1;
+ mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x\n",
+ tx_port_affinity, qp_base->mqp.qpn);
+ }
+
+ return tx_port_affinity;
+}
+
static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state,
@@ -2974,6 +3232,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (!context)
return -ENOMEM;
+ pd = get_pd(qp);
context->flags = cpu_to_be32(mlx5_st << 16);
if (!(attr_mask & IB_QP_PATH_MIG_STATE)) {
@@ -3002,9 +3261,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
(ibqp->qp_type == IB_QPT_XRC_TGT)) {
if (mlx5_lag_is_active(dev->mdev)) {
u8 p = mlx5_core_native_port_num(dev->mdev);
- tx_affinity = (unsigned int)atomic_add_return(1,
- &dev->roce[p].next_port) %
- MLX5_MAX_PORTS + 1;
+ tx_affinity = get_tx_affinity(dev, pd, base, p);
context->flags |= cpu_to_be32(tx_affinity << 24);
}
}
@@ -3062,7 +3319,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
goto out;
}
- pd = get_pd(qp);
get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
&send_cq, &recv_cq);
@@ -3092,8 +3348,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
}
- if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
- context->params2 |= to_mlx5_access_flags(qp, attr, attr_mask);
+ if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
+ __be32 access_flags = 0;
+
+ err = to_mlx5_access_flags(qp, attr, attr_mask, &access_flags);
+ if (err)
+ goto out;
+
+ context->params2 |= access_flags;
+ }
if (attr_mask & IB_QP_MIN_RNR_TIMER)
context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
@@ -3243,7 +3506,9 @@ static bool modify_dci_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state new
int req = IB_QP_STATE;
int opt = 0;
- if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ if (new_state == IB_QPS_RESET) {
+ return is_valid_mask(attr_mask, req, opt);
+ } else if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
req |= IB_QP_PKEY_INDEX | IB_QP_PORT;
return is_valid_mask(attr_mask, req, opt);
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
@@ -3307,10 +3572,14 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
MLX5_SET(dctc, dctc, rwe, 1);
if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) {
- if (!mlx5_ib_dc_atomic_is_supported(dev))
+ int atomic_mode;
+
+ atomic_mode = get_atomic_mode(dev, MLX5_IB_QPT_DCT);
+ if (atomic_mode < 0)
return -EOPNOTSUPP;
+
+ MLX5_SET(dctc, dctc, atomic_mode, atomic_mode);
MLX5_SET(dctc, dctc, rae, 1);
- MLX5_SET(dctc, dctc, atomic_mode, MLX5_ATOMIC_MODE_DCT_CX);
}
MLX5_SET(dctc, dctc, pkey_index, attr->pkey_index);
MLX5_SET(dctc, dctc, port, attr->port_num);
@@ -3367,7 +3636,6 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
size_t required_cmd_sz;
int err = -EINVAL;
int port;
- enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
if (ibqp->rwq_ind_tbl)
return -ENOSYS;
@@ -3413,7 +3681,6 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) {
port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
- ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port);
}
if (qp->flags & MLX5_IB_QP_UNDERLAY) {
@@ -3424,7 +3691,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
}
} else if (qp_type != MLX5_IB_QPT_REG_UMR &&
qp_type != MLX5_IB_QPT_DCI &&
- !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) {
+ !ib_modify_qp_is_ok(cur_state, new_state, qp_type,
+ attr_mask)) {
mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
cur_state, new_state, ibqp->qp_type, attr_mask);
goto out;
@@ -4371,6 +4639,12 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
u8 next_fence = 0;
u8 fence;
+ if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
+ !drain)) {
+ *bad_wr = wr;
+ return -EIO;
+ }
+
if (unlikely(ibqp->qp_type == IB_QPT_GSI))
return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
@@ -4380,13 +4654,6 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
spin_lock_irqsave(&qp->sq.lock, flags);
- if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) {
- err = -EIO;
- *bad_wr = wr;
- nreq = 0;
- goto out;
- }
-
for (nreq = 0; wr; nreq++, wr = wr->next) {
if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
mlx5_ib_warn(dev, "\n");
@@ -4700,18 +4967,17 @@ static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
int ind;
int i;
+ if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
+ !drain)) {
+ *bad_wr = wr;
+ return -EIO;
+ }
+
if (unlikely(ibqp->qp_type == IB_QPT_GSI))
return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr);
spin_lock_irqsave(&qp->rq.lock, flags);
- if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) {
- err = -EIO;
- *bad_wr = wr;
- nreq = 0;
- goto out;
- }
-
ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
for (nreq = 0; wr; nreq++, wr = wr->next) {
@@ -5175,6 +5441,7 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_xrcd *xrcd;
int err;
+ u16 uid;
if (!MLX5_CAP_GEN(dev->mdev, xrc))
return ERR_PTR(-ENOSYS);
@@ -5183,12 +5450,14 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
if (!xrcd)
return ERR_PTR(-ENOMEM);
- err = mlx5_core_xrcd_alloc(dev->mdev, &xrcd->xrcdn);
+ uid = context ? to_mucontext(context)->devx_uid : 0;
+ err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, uid);
if (err) {
kfree(xrcd);
return ERR_PTR(-ENOMEM);
}
+ xrcd->uid = uid;
return &xrcd->ibxrcd;
}
@@ -5196,9 +5465,10 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
{
struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
+ u16 uid = to_mxrcd(xrcd)->uid;
int err;
- err = mlx5_core_xrcd_dealloc(dev->mdev, xrcdn);
+ err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, uid);
if (err)
mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
@@ -5268,6 +5538,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
if (!in)
return -ENOMEM;
+ MLX5_SET(create_rq_in, in, uid, to_mpd(pd)->uid);
rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
MLX5_SET(rqc, rqc, mem_rq_type,
MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
@@ -5443,8 +5714,7 @@ static int prepare_user_rq(struct ib_pd *pd,
err = create_user_rq(dev, pd, rwq, &ucmd);
if (err) {
mlx5_ib_dbg(dev, "err %d\n", err);
- if (err)
- return err;
+ return err;
}
rwq->user_index = ucmd.user_index;
@@ -5573,6 +5843,9 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
for (i = 0; i < sz; i++)
MLX5_SET(rqtc, rqtc, rq_num[i], init_attr->ind_tbl[i]->wq_num);
+ rwq_ind_tbl->uid = to_mpd(init_attr->ind_tbl[0]->pd)->uid;
+ MLX5_SET(create_rqt_in, in, uid, rwq_ind_tbl->uid);
+
err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn);
kvfree(in);
@@ -5591,7 +5864,7 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
return &rwq_ind_tbl->ib_rwq_ind_tbl;
err_copy:
- mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn);
+ mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
err:
kfree(rwq_ind_tbl);
return ERR_PTR(err);
@@ -5602,7 +5875,7 @@ int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl);
struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device);
- mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn);
+ mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
kfree(rwq_ind_tbl);
return 0;
@@ -5653,6 +5926,7 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
if (wq_state == IB_WQS_ERR)
wq_state = MLX5_RQC_STATE_ERR;
MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state);
+ MLX5_SET(modify_rq_in, in, uid, to_mpd(wq->pd)->uid);
MLX5_SET(rqc, rqc, state, wq_state);
if (wq_attr_mask & IB_WQ_FLAGS) {
@@ -5684,8 +5958,9 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
MLX5_SET(rqc, rqc, counter_set_id,
dev->port->cnts.set_id);
} else
- pr_info_once("%s: Receive WQ counters are not supported on current FW\n",
- dev->ib_dev.name);
+ dev_info_once(
+ &dev->ib_dev.dev,
+ "Receive WQ counters are not supported on current FW\n");
}
err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen);
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index d359fecf7a5b..d012e7dbcc38 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -144,6 +144,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
in->page_offset = offset;
+ in->uid = to_mpd(pd)->uid;
if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
in->type != IB_SRQT_BASIC)
in->user_index = uidx;
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 093f7755c843..2e5dc0a67cfc 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -58,8 +58,9 @@ static int mthca_update_rate(struct mthca_dev *dev, u8 port_num)
ret = ib_query_port(&dev->ib_dev, port_num, tprops);
if (ret) {
- printk(KERN_WARNING "ib_query_port failed (%d) for %s port %d\n",
- ret, dev->ib_dev.name, port_num);
+ dev_warn(&dev->ib_dev.dev,
+ "ib_query_port failed (%d) forport %d\n", ret,
+ port_num);
goto out;
}
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index f3e80dec1334..92c49bff22bc 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -986,7 +986,8 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
goto err_free_dev;
}
- if (mthca_cmd_init(mdev)) {
+ err = mthca_cmd_init(mdev);
+ if (err) {
mthca_err(mdev, "Failed to init command interface, aborting.\n");
goto err_free_dev;
}
@@ -1014,8 +1015,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
err = mthca_setup_hca(mdev);
if (err == -EBUSY && (mdev->mthca_flags & MTHCA_FLAG_MSI_X)) {
- if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
- pci_free_irq_vectors(pdev);
+ pci_free_irq_vectors(pdev);
mdev->mthca_flags &= ~MTHCA_FLAG_MSI_X;
err = mthca_setup_hca(mdev);
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 0d3473b4596e..691c6f048938 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1076,16 +1076,17 @@ static int mthca_unmap_fmr(struct list_head *fmr_list)
return err;
}
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mthca_dev *dev =
container_of(device, struct mthca_dev, ib_dev.dev);
return sprintf(buf, "%x\n", dev->rev_id);
}
+static DEVICE_ATTR_RO(hw_rev);
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mthca_dev *dev =
container_of(device, struct mthca_dev, ib_dev.dev);
@@ -1103,23 +1104,26 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
return sprintf(buf, "unknown\n");
}
}
+static DEVICE_ATTR_RO(hca_type);
-static ssize_t show_board(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mthca_dev *dev =
container_of(device, struct mthca_dev, ib_dev.dev);
return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id);
}
+static DEVICE_ATTR_RO(board_id);
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+static struct attribute *mthca_dev_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ NULL
+};
-static struct device_attribute *mthca_dev_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id
+static const struct attribute_group mthca_attr_group = {
+ .attrs = mthca_dev_attributes,
};
static int mthca_init_node_data(struct mthca_dev *dev)
@@ -1192,13 +1196,11 @@ static void get_dev_fw_str(struct ib_device *device, char *str)
int mthca_register_device(struct mthca_dev *dev)
{
int ret;
- int i;
ret = mthca_init_node_data(dev);
if (ret)
return ret;
- strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
dev->ib_dev.owner = THIS_MODULE;
dev->ib_dev.uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION;
@@ -1296,20 +1298,12 @@ int mthca_register_device(struct mthca_dev *dev)
mutex_init(&dev->cap_mask_mutex);
+ rdma_set_device_sysfs_group(&dev->ib_dev, &mthca_attr_group);
dev->ib_dev.driver_id = RDMA_DRIVER_MTHCA;
- ret = ib_register_device(&dev->ib_dev, NULL);
+ ret = ib_register_device(&dev->ib_dev, "mthca%d", NULL);
if (ret)
return ret;
- for (i = 0; i < ARRAY_SIZE(mthca_dev_attributes); ++i) {
- ret = device_create_file(&dev->ib_dev.dev,
- mthca_dev_attributes[i]);
- if (ret) {
- ib_unregister_device(&dev->ib_dev);
- return ret;
- }
- }
-
mthca_start_catas_poll(dev);
return 0;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 3d37f2373d63..9d178ee3c96a 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -872,8 +872,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
- if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
- IB_LINK_LAYER_UNSPECIFIED)) {
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
+ attr_mask)) {
mthca_dbg(dev, "Bad QP transition (transport %d) "
"%d->%d with attr 0x%08x\n",
qp->transport, cur_state, new_state,
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 42b68aa999fc..e00add6d78ec 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -456,9 +456,6 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
void __iomem *mmio_regs = NULL;
u8 hw_rev;
- assert(pcidev != NULL);
- assert(ent != NULL);
-
printk(KERN_INFO PFX "NetEffect RNIC driver v%s loading. (%s)\n",
DRV_VERSION, pci_name(pcidev));
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index bedaa02749fb..a895fe980d10 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -149,18 +149,9 @@ do { \
printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args); \
} while (0)
-#define assert(expr) \
-do { \
- if (!(expr)) { \
- printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n", \
- #expr, __FILE__, __func__, __LINE__); \
- } \
-} while (0)
-
#define NES_EVENT_TIMEOUT 1200000
#else
#define nes_debug(level, fmt, args...) no_printk(fmt, ##args)
-#define assert(expr) do {} while (0)
#define NES_EVENT_TIMEOUT 100000
#endif
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index bd0675d8f298..5517e392bc01 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -1443,7 +1443,7 @@ static int nes_init_2025_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_inde
mdelay(1);
nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- } while ((temp_phy_data2 == temp_phy_data));
+ } while (temp_phy_data2 == temp_phy_data);
/* wait for tracking */
counter = 0;
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 61014e251555..16f33454c198 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -146,8 +146,6 @@ static int nes_netdev_open(struct net_device *netdev)
struct list_head *list_pos, *list_temp;
unsigned long flags;
- assert(nesdev != NULL);
-
if (nesvnic->netdev_open == 1)
return 0;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 6940c7215961..92d1cadd4cfd 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -687,7 +687,7 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev,
}
nes_debug(NES_DBG_PD, "Allocating PD (%p) for ib device %s\n",
- nespd, nesvnic->nesibdev->ibdev.name);
+ nespd, dev_name(&nesvnic->nesibdev->ibdev.dev));
nespd->pd_id = (pd_num << (PAGE_SHIFT-12)) + nesadapter->base_pd;
@@ -2556,8 +2556,8 @@ static int nes_dereg_mr(struct ib_mr *ib_mr)
/**
* show_rev
*/
-static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct nes_ib_device *nesibdev =
container_of(dev, struct nes_ib_device, ibdev.dev);
@@ -2566,40 +2566,40 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
nes_debug(NES_DBG_INIT, "\n");
return sprintf(buf, "%x\n", nesvnic->nesdev->nesadapter->hw_rev);
}
-
+static DEVICE_ATTR_RO(hw_rev);
/**
* show_hca
*/
-static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t hca_type_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
nes_debug(NES_DBG_INIT, "\n");
return sprintf(buf, "NES020\n");
}
-
+static DEVICE_ATTR_RO(hca_type);
/**
* show_board
*/
-static ssize_t show_board(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
nes_debug(NES_DBG_INIT, "\n");
return sprintf(buf, "%.*s\n", 32, "NES020 Board ID");
}
+static DEVICE_ATTR_RO(board_id);
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
-
-static struct device_attribute *nes_dev_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id
+static struct attribute *nes_dev_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ NULL
};
+static const struct attribute_group nes_attr_group = {
+ .attrs = nes_dev_attributes,
+};
/**
* nes_query_qp
@@ -3640,7 +3640,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
if (nesibdev == NULL) {
return NULL;
}
- strlcpy(nesibdev->ibdev.name, "nes%d", IB_DEVICE_NAME_MAX);
nesibdev->ibdev.owner = THIS_MODULE;
nesibdev->ibdev.node_type = RDMA_NODE_RNIC;
@@ -3795,10 +3794,11 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev)
struct nes_vnic *nesvnic = nesibdev->nesvnic;
struct nes_device *nesdev = nesvnic->nesdev;
struct nes_adapter *nesadapter = nesdev->nesadapter;
- int i, ret;
+ int ret;
+ rdma_set_device_sysfs_group(&nesvnic->nesibdev->ibdev, &nes_attr_group);
nesvnic->nesibdev->ibdev.driver_id = RDMA_DRIVER_NES;
- ret = ib_register_device(&nesvnic->nesibdev->ibdev, NULL);
+ ret = ib_register_device(&nesvnic->nesibdev->ibdev, "nes%d", NULL);
if (ret) {
return ret;
}
@@ -3809,19 +3809,6 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev)
nesibdev->max_qp = (nesadapter->max_qp-NES_FIRST_QPN) / nesadapter->port_count;
nesibdev->max_pd = nesadapter->max_pd / nesadapter->port_count;
- for (i = 0; i < ARRAY_SIZE(nes_dev_attributes); ++i) {
- ret = device_create_file(&nesibdev->ibdev.dev, nes_dev_attributes[i]);
- if (ret) {
- while (i > 0) {
- i--;
- device_remove_file(&nesibdev->ibdev.dev,
- nes_dev_attributes[i]);
- }
- ib_unregister_device(&nesibdev->ibdev);
- return ret;
- }
- }
-
nesvnic->of_device_registered = 1;
return 0;
@@ -3834,15 +3821,9 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev)
static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev)
{
struct nes_vnic *nesvnic = nesibdev->nesvnic;
- int i;
- for (i = 0; i < ARRAY_SIZE(nes_dev_attributes); ++i) {
- device_remove_file(&nesibdev->ibdev.dev, nes_dev_attributes[i]);
- }
-
- if (nesvnic->of_device_registered) {
+ if (nesvnic->of_device_registered)
ib_unregister_device(&nesibdev->ibdev);
- }
nesvnic->of_device_registered = 0;
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index e578281471af..241a57a07485 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -792,7 +792,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
qp->srq->ibsrq.
srq_context);
} else if (dev_event) {
- pr_err("%s: Fatal event received\n", dev->ibdev.name);
+ dev_err(&dev->ibdev.dev, "Fatal event received\n");
ib_dispatch_event(&ib_evt);
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 7832ee3e0c84..873cc7f6fe61 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -114,9 +114,37 @@ static void get_dev_fw_str(struct ib_device *device, char *str)
snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", &dev->attr.fw_ver[0]);
}
+/* OCRDMA sysfs interface */
+static ssize_t hw_rev_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct ocrdma_dev *dev = dev_get_drvdata(device);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor);
+}
+static DEVICE_ATTR_RO(hw_rev);
+
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct ocrdma_dev *dev = dev_get_drvdata(device);
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]);
+}
+static DEVICE_ATTR_RO(hca_type);
+
+static struct attribute *ocrdma_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ NULL
+};
+
+static const struct attribute_group ocrdma_attr_group = {
+ .attrs = ocrdma_attributes,
+};
+
static int ocrdma_register_device(struct ocrdma_dev *dev)
{
- strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX);
ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid);
BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);
memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC,
@@ -213,8 +241,9 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
dev->ibdev.destroy_srq = ocrdma_destroy_srq;
dev->ibdev.post_srq_recv = ocrdma_post_srq_recv;
}
+ rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group);
dev->ibdev.driver_id = RDMA_DRIVER_OCRDMA;
- return ib_register_device(&dev->ibdev, NULL);
+ return ib_register_device(&dev->ibdev, "ocrdma%d", NULL);
}
static int ocrdma_alloc_resources(struct ocrdma_dev *dev)
@@ -260,42 +289,9 @@ static void ocrdma_free_resources(struct ocrdma_dev *dev)
kfree(dev->cq_tbl);
}
-/* OCRDMA sysfs interface */
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
-{
- struct ocrdma_dev *dev = dev_get_drvdata(device);
-
- return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor);
-}
-
-static ssize_t show_hca_type(struct device *device,
- struct device_attribute *attr, char *buf)
-{
- struct ocrdma_dev *dev = dev_get_drvdata(device);
-
- return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]);
-}
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL);
-
-static struct device_attribute *ocrdma_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type
-};
-
-static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
- device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]);
-}
-
static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
{
- int status = 0, i;
+ int status = 0;
u8 lstate = 0;
struct ocrdma_dev *dev;
@@ -331,9 +327,6 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
if (!status)
ocrdma_update_link_state(dev, lstate);
- for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
- if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i]))
- goto sysfs_err;
/* Init stats */
ocrdma_add_port_stats(dev);
/* Interrupt Moderation */
@@ -348,8 +341,6 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
dev_name(&dev->nic_info.pdev->dev), dev->id);
return dev;
-sysfs_err:
- ocrdma_remove_sysfiles(dev);
alloc_err:
ocrdma_free_resources(dev);
ocrdma_cleanup_hw(dev);
@@ -376,7 +367,6 @@ static void ocrdma_remove(struct ocrdma_dev *dev)
* of the registered clients.
*/
cancel_delayed_work_sync(&dev->eqd_work);
- ocrdma_remove_sysfiles(dev);
ib_unregister_device(&dev->ibdev);
ocrdma_rem_port_stats(dev);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index 24d20a4aa262..290d776edf48 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -764,7 +764,8 @@ void ocrdma_add_port_stats(struct ocrdma_dev *dev)
return;
/* Create post stats base dir */
- dev->dir = debugfs_create_dir(dev->ibdev.name, ocrdma_dbgfs_dir);
+ dev->dir =
+ debugfs_create_dir(dev_name(&dev->ibdev.dev), ocrdma_dbgfs_dir);
if (!dev->dir)
goto err;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index c158ca9fde6d..06d2a7f3304c 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -1480,8 +1480,7 @@ int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
new_qps = old_qps;
spin_unlock_irqrestore(&qp->q_lock, flags);
- if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask,
- IB_LINK_LAYER_ETHERNET)) {
+ if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) {
pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
"qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
__func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index a0af6d424aed..8d6ff9df49fe 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -133,6 +133,33 @@ static int qedr_iw_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
+/* QEDR sysfs interface */
+static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct qedr_dev *dev = dev_get_drvdata(device);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->pdev->vendor);
+}
+static DEVICE_ATTR_RO(hw_rev);
+
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ return scnprintf(buf, PAGE_SIZE, "%s\n", "HCA_TYPE_TO_SET");
+}
+static DEVICE_ATTR_RO(hca_type);
+
+static struct attribute *qedr_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ NULL
+};
+
+static const struct attribute_group qedr_attr_group = {
+ .attrs = qedr_attributes,
+};
+
static int qedr_iw_register_device(struct qedr_dev *dev)
{
dev->ibdev.node_type = RDMA_NODE_RNIC;
@@ -170,8 +197,6 @@ static int qedr_register_device(struct qedr_dev *dev)
{
int rc;
- strlcpy(dev->ibdev.name, "qedr%d", IB_DEVICE_NAME_MAX);
-
dev->ibdev.node_guid = dev->attr.node_guid;
memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC));
dev->ibdev.owner = THIS_MODULE;
@@ -262,9 +287,9 @@ static int qedr_register_device(struct qedr_dev *dev)
dev->ibdev.get_link_layer = qedr_link_layer;
dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str;
-
+ rdma_set_device_sysfs_group(&dev->ibdev, &qedr_attr_group);
dev->ibdev.driver_id = RDMA_DRIVER_QEDR;
- return ib_register_device(&dev->ibdev, NULL);
+ return ib_register_device(&dev->ibdev, "qedr%d", NULL);
}
/* This function allocates fast-path status block memory */
@@ -404,37 +429,6 @@ err1:
return rc;
}
-/* QEDR sysfs interface */
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
-{
- struct qedr_dev *dev = dev_get_drvdata(device);
-
- return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->pdev->vendor);
-}
-
-static ssize_t show_hca_type(struct device *device,
- struct device_attribute *attr, char *buf)
-{
- return scnprintf(buf, PAGE_SIZE, "%s\n", "HCA_TYPE_TO_SET");
-}
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL);
-
-static struct device_attribute *qedr_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type
-};
-
-static void qedr_remove_sysfiles(struct qedr_dev *dev)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(qedr_attributes); i++)
- device_remove_file(&dev->ibdev.dev, qedr_attributes[i]);
-}
-
static void qedr_pci_set_atomic(struct qedr_dev *dev, struct pci_dev *pdev)
{
int rc = pci_enable_atomic_ops_to_root(pdev,
@@ -855,7 +849,7 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev,
{
struct qed_dev_rdma_info dev_info;
struct qedr_dev *dev;
- int rc = 0, i;
+ int rc = 0;
dev = (struct qedr_dev *)ib_alloc_device(sizeof(*dev));
if (!dev) {
@@ -914,18 +908,12 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev,
goto reg_err;
}
- for (i = 0; i < ARRAY_SIZE(qedr_attributes); i++)
- if (device_create_file(&dev->ibdev.dev, qedr_attributes[i]))
- goto sysfs_err;
-
if (!test_and_set_bit(QEDR_ENET_STATE_BIT, &dev->enet_state))
qedr_ib_dispatch_event(dev, QEDR_PORT, IB_EVENT_PORT_ACTIVE);
DP_DEBUG(dev, QEDR_MSG_INIT, "qedr driver loaded successfully\n");
return dev;
-sysfs_err:
- ib_unregister_device(&dev->ibdev);
reg_err:
qedr_sync_free_irqs(dev);
irq_err:
@@ -944,7 +932,6 @@ static void qedr_remove(struct qedr_dev *dev)
/* First unregister with stack to stop all the active traffic
* of the registered clients.
*/
- qedr_remove_sysfiles(dev);
ib_unregister_device(&dev->ibdev);
qedr_stop_hw(dev);
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index a2d708dceb8d..53bbe6b4e6e6 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -43,7 +43,7 @@
#include "qedr_hsi_rdma.h"
#define QEDR_NODE_DESC "QLogic 579xx RoCE HCA"
-#define DP_NAME(dev) ((dev)->ibdev.name)
+#define DP_NAME(_dev) dev_name(&(_dev)->ibdev.dev)
#define IS_IWARP(_dev) ((_dev)->rdma_type == QED_RDMA_TYPE_IWARP)
#define IS_ROCE(_dev) ((_dev)->rdma_type == QED_RDMA_TYPE_ROCE)
diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
index 85578887421b..e1ac2fd60bb1 100644
--- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
@@ -519,9 +519,9 @@ static inline int qedr_gsi_build_packet(struct qedr_dev *dev,
}
if (ether_addr_equal(udh.eth.smac_h, udh.eth.dmac_h))
- packet->tx_dest = QED_ROCE_LL2_TX_DEST_LB;
+ packet->tx_dest = QED_LL2_TX_DEST_LB;
else
- packet->tx_dest = QED_ROCE_LL2_TX_DEST_NW;
+ packet->tx_dest = QED_LL2_TX_DEST_NW;
packet->roce_mode = roce_mode;
memcpy(packet->header.vaddr, ud_header_buffer, header_size);
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 8cc3df24e04e..82ee4b4a7084 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -1447,7 +1447,6 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd,
u64 pbl_base_addr, phy_prod_pair_addr;
struct ib_ucontext *ib_ctx = NULL;
struct qedr_srq_hwq_info *hw_srq;
- struct qedr_ucontext *ctx = NULL;
u32 page_cnt, page_size;
struct qedr_srq *srq;
int rc = 0;
@@ -1473,7 +1472,6 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd,
if (udata && ibpd->uobject && ibpd->uobject->context) {
ib_ctx = ibpd->uobject->context;
- ctx = get_qedr_ucontext(ib_ctx);
if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
DP_ERR(dev,
@@ -2240,8 +2238,7 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (rdma_protocol_roce(&dev->ibdev, 1)) {
if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
- ibqp->qp_type, attr_mask,
- IB_LINK_LAYER_ETHERNET)) {
+ ibqp->qp_type, attr_mask)) {
DP_ERR(dev,
"modify qp: invalid attribute mask=0x%x specified for\n"
"qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 3461df002f81..83d2349188db 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -1390,13 +1390,13 @@ static inline u32 qib_get_hdrqtail(const struct qib_ctxtdata *rcd)
*/
extern const char ib_qib_version[];
+extern const struct attribute_group qib_attr_group;
int qib_device_create(struct qib_devdata *);
void qib_device_remove(struct qib_devdata *);
int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
struct kobject *kobj);
-int qib_verbs_register_sysfs(struct qib_devdata *);
void qib_verbs_unregister_sysfs(struct qib_devdata *);
/* Hook for sysfs read of QSFP */
extern int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len);
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 344e401915f7..a81905df2d0f 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -378,25 +378,22 @@ void qib_flush_qp_waiters(struct rvt_qp *qp)
* qib_check_send_wqe - validate wr/wqe
* @qp - The qp
* @wqe - The built wqe
+ * @call_send - Determine if the send should be posted or scheduled
*
- * validate wr/wqe. This is called
- * prior to inserting the wqe into
- * the ring but after the wqe has been
- * setup.
- *
- * Returns 1 to force direct progress, 0 otherwise, -EINVAL on failure
+ * Returns 0 on success, -EINVAL on failure
*/
int qib_check_send_wqe(struct rvt_qp *qp,
- struct rvt_swqe *wqe)
+ struct rvt_swqe *wqe, bool *call_send)
{
struct rvt_ah *ah;
- int ret = 0;
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
case IB_QPT_UC:
if (wqe->length > 0x80000000U)
return -EINVAL;
+ if (wqe->length > qp->pmtu)
+ *call_send = false;
break;
case IB_QPT_SMI:
case IB_QPT_GSI:
@@ -405,12 +402,12 @@ int qib_check_send_wqe(struct rvt_qp *qp,
if (wqe->length > (1 << ah->log_pmtu))
return -EINVAL;
/* progress hint */
- ret = 1;
+ *call_send = true;
break;
default:
break;
}
- return ret;
+ return 0;
}
#ifdef CONFIG_DEBUG_FS
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index f35fdeb14347..6fa002940451 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -254,7 +254,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
goto bail;
}
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
- qib_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
+ rvt_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
/* will get called again */
goto done;
@@ -838,7 +838,7 @@ void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
qib_migrate_qp(qp);
qp->s_retry = qp->s_retry_cnt;
} else if (qp->s_last == qp->s_acked) {
- qib_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
+ rvt_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
return;
} else /* XXX need to handle delayed completion */
@@ -1221,7 +1221,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
ibp->rvp.n_other_naks++;
class_b:
if (qp->s_last == qp->s_acked) {
- qib_send_complete(qp, wqe, status);
+ rvt_send_complete(qp, wqe, status);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
}
break;
@@ -1425,7 +1425,8 @@ read_middle:
qp->s_rdma_read_len -= pmtu;
update_last_psn(qp, psn);
spin_unlock_irqrestore(&qp->s_lock, flags);
- qib_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0);
+ rvt_copy_sge(qp, &qp->s_rdma_read_sge,
+ data, pmtu, false, false);
goto bail;
case OP(RDMA_READ_RESPONSE_ONLY):
@@ -1471,7 +1472,8 @@ read_last:
if (unlikely(tlen != qp->s_rdma_read_len))
goto ack_len_err;
aeth = be32_to_cpu(ohdr->u.aeth);
- qib_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0);
+ rvt_copy_sge(qp, &qp->s_rdma_read_sge,
+ data, tlen, false, false);
WARN_ON(qp->s_rdma_read_sge.num_sge);
(void) do_rc_ack(qp, aeth, psn,
OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
@@ -1490,7 +1492,7 @@ ack_len_err:
status = IB_WC_LOC_LEN_ERR;
ack_err:
if (qp->s_last == qp->s_acked) {
- qib_send_complete(qp, wqe, status);
+ rvt_send_complete(qp, wqe, status);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
}
ack_done:
@@ -1844,7 +1846,7 @@ send_middle:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto nack_inv;
- qib_copy_sge(&qp->r_sge, data, pmtu, 1);
+ rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -1890,7 +1892,7 @@ send_last:
wc.byte_len = tlen + qp->r_rcv_len;
if (unlikely(wc.byte_len > qp->r_len))
goto nack_inv;
- qib_copy_sge(&qp->r_sge, data, tlen, 1);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
rvt_put_ss(&qp->r_sge);
qp->r_msn++;
if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index f8a7de795beb..1fa21938f310 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -171,307 +171,6 @@ err:
}
/**
- * qib_ruc_loopback - handle UC and RC lookback requests
- * @sqp: the sending QP
- *
- * This is called from qib_do_send() to
- * forward a WQE addressed to the same HCA.
- * Note that although we are single threaded due to the tasklet, we still
- * have to protect against post_send(). We don't have to worry about
- * receive interrupts since this is a connected protocol and all packets
- * will pass through here.
- */
-static void qib_ruc_loopback(struct rvt_qp *sqp)
-{
- struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
- struct qib_pportdata *ppd = ppd_from_ibp(ibp);
- struct qib_devdata *dd = ppd->dd;
- struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
- struct rvt_qp *qp;
- struct rvt_swqe *wqe;
- struct rvt_sge *sge;
- unsigned long flags;
- struct ib_wc wc;
- u64 sdata;
- atomic64_t *maddr;
- enum ib_wc_status send_status;
- int release;
- int ret;
-
- rcu_read_lock();
- /*
- * Note that we check the responder QP state after
- * checking the requester's state.
- */
- qp = rvt_lookup_qpn(rdi, &ibp->rvp, sqp->remote_qpn);
- if (!qp)
- goto done;
-
- spin_lock_irqsave(&sqp->s_lock, flags);
-
- /* Return if we are already busy processing a work request. */
- if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
- !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
- goto unlock;
-
- sqp->s_flags |= RVT_S_BUSY;
-
-again:
- if (sqp->s_last == READ_ONCE(sqp->s_head))
- goto clr_busy;
- wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
-
- /* Return if it is not OK to start a new work reqeust. */
- if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
- if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
- goto clr_busy;
- /* We are in the error state, flush the work request. */
- send_status = IB_WC_WR_FLUSH_ERR;
- goto flush_send;
- }
-
- /*
- * We can rely on the entry not changing without the s_lock
- * being held until we update s_last.
- * We increment s_cur to indicate s_last is in progress.
- */
- if (sqp->s_last == sqp->s_cur) {
- if (++sqp->s_cur >= sqp->s_size)
- sqp->s_cur = 0;
- }
- spin_unlock_irqrestore(&sqp->s_lock, flags);
-
- if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
- qp->ibqp.qp_type != sqp->ibqp.qp_type) {
- ibp->rvp.n_pkt_drops++;
- /*
- * For RC, the requester would timeout and retry so
- * shortcut the timeouts and just signal too many retries.
- */
- if (sqp->ibqp.qp_type == IB_QPT_RC)
- send_status = IB_WC_RETRY_EXC_ERR;
- else
- send_status = IB_WC_SUCCESS;
- goto serr;
- }
-
- memset(&wc, 0, sizeof(wc));
- send_status = IB_WC_SUCCESS;
-
- release = 1;
- sqp->s_sge.sge = wqe->sg_list[0];
- sqp->s_sge.sg_list = wqe->sg_list + 1;
- sqp->s_sge.num_sge = wqe->wr.num_sge;
- sqp->s_len = wqe->length;
- switch (wqe->wr.opcode) {
- case IB_WR_SEND_WITH_IMM:
- wc.wc_flags = IB_WC_WITH_IMM;
- wc.ex.imm_data = wqe->wr.ex.imm_data;
- /* FALLTHROUGH */
- case IB_WR_SEND:
- ret = rvt_get_rwqe(qp, false);
- if (ret < 0)
- goto op_err;
- if (!ret)
- goto rnr_nak;
- break;
-
- case IB_WR_RDMA_WRITE_WITH_IMM:
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
- goto inv_err;
- wc.wc_flags = IB_WC_WITH_IMM;
- wc.ex.imm_data = wqe->wr.ex.imm_data;
- ret = rvt_get_rwqe(qp, true);
- if (ret < 0)
- goto op_err;
- if (!ret)
- goto rnr_nak;
- /* FALLTHROUGH */
- case IB_WR_RDMA_WRITE:
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
- goto inv_err;
- if (wqe->length == 0)
- break;
- if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
- wqe->rdma_wr.remote_addr,
- wqe->rdma_wr.rkey,
- IB_ACCESS_REMOTE_WRITE)))
- goto acc_err;
- qp->r_sge.sg_list = NULL;
- qp->r_sge.num_sge = 1;
- qp->r_sge.total_len = wqe->length;
- break;
-
- case IB_WR_RDMA_READ:
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
- goto inv_err;
- if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
- wqe->rdma_wr.remote_addr,
- wqe->rdma_wr.rkey,
- IB_ACCESS_REMOTE_READ)))
- goto acc_err;
- release = 0;
- sqp->s_sge.sg_list = NULL;
- sqp->s_sge.num_sge = 1;
- qp->r_sge.sge = wqe->sg_list[0];
- qp->r_sge.sg_list = wqe->sg_list + 1;
- qp->r_sge.num_sge = wqe->wr.num_sge;
- qp->r_sge.total_len = wqe->length;
- break;
-
- case IB_WR_ATOMIC_CMP_AND_SWP:
- case IB_WR_ATOMIC_FETCH_AND_ADD:
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
- goto inv_err;
- if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
- wqe->atomic_wr.remote_addr,
- wqe->atomic_wr.rkey,
- IB_ACCESS_REMOTE_ATOMIC)))
- goto acc_err;
- /* Perform atomic OP and save result. */
- maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
- sdata = wqe->atomic_wr.compare_add;
- *(u64 *) sqp->s_sge.sge.vaddr =
- (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
- (u64) atomic64_add_return(sdata, maddr) - sdata :
- (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
- sdata, wqe->atomic_wr.swap);
- rvt_put_mr(qp->r_sge.sge.mr);
- qp->r_sge.num_sge = 0;
- goto send_comp;
-
- default:
- send_status = IB_WC_LOC_QP_OP_ERR;
- goto serr;
- }
-
- sge = &sqp->s_sge.sge;
- while (sqp->s_len) {
- u32 len = sqp->s_len;
-
- if (len > sge->length)
- len = sge->length;
- if (len > sge->sge_length)
- len = sge->sge_length;
- BUG_ON(len == 0);
- qib_copy_sge(&qp->r_sge, sge->vaddr, len, release);
- sge->vaddr += len;
- sge->length -= len;
- sge->sge_length -= len;
- if (sge->sge_length == 0) {
- if (!release)
- rvt_put_mr(sge->mr);
- if (--sqp->s_sge.num_sge)
- *sge = *sqp->s_sge.sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- break;
- sge->n = 0;
- }
- sge->vaddr =
- sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length =
- sge->mr->map[sge->m]->segs[sge->n].length;
- }
- sqp->s_len -= len;
- }
- if (release)
- rvt_put_ss(&qp->r_sge);
-
- if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
- goto send_comp;
-
- if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
- wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
- else
- wc.opcode = IB_WC_RECV;
- wc.wr_id = qp->r_wr_id;
- wc.status = IB_WC_SUCCESS;
- wc.byte_len = wqe->length;
- wc.qp = &qp->ibqp;
- wc.src_qp = qp->remote_qpn;
- wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
- wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
- wc.port_num = 1;
- /* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- wqe->wr.send_flags & IB_SEND_SOLICITED);
-
-send_comp:
- spin_lock_irqsave(&sqp->s_lock, flags);
- ibp->rvp.n_loop_pkts++;
-flush_send:
- sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
- qib_send_complete(sqp, wqe, send_status);
- goto again;
-
-rnr_nak:
- /* Handle RNR NAK */
- if (qp->ibqp.qp_type == IB_QPT_UC)
- goto send_comp;
- ibp->rvp.n_rnr_naks++;
- /*
- * Note: we don't need the s_lock held since the BUSY flag
- * makes this single threaded.
- */
- if (sqp->s_rnr_retry == 0) {
- send_status = IB_WC_RNR_RETRY_EXC_ERR;
- goto serr;
- }
- if (sqp->s_rnr_retry_cnt < 7)
- sqp->s_rnr_retry--;
- spin_lock_irqsave(&sqp->s_lock, flags);
- if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
- goto clr_busy;
- rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
- IB_AETH_CREDIT_SHIFT);
- goto clr_busy;
-
-op_err:
- send_status = IB_WC_REM_OP_ERR;
- wc.status = IB_WC_LOC_QP_OP_ERR;
- goto err;
-
-inv_err:
- send_status = IB_WC_REM_INV_REQ_ERR;
- wc.status = IB_WC_LOC_QP_OP_ERR;
- goto err;
-
-acc_err:
- send_status = IB_WC_REM_ACCESS_ERR;
- wc.status = IB_WC_LOC_PROT_ERR;
-err:
- /* responder goes to error state */
- rvt_rc_error(qp, wc.status);
-
-serr:
- spin_lock_irqsave(&sqp->s_lock, flags);
- qib_send_complete(sqp, wqe, send_status);
- if (sqp->ibqp.qp_type == IB_QPT_RC) {
- int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
-
- sqp->s_flags &= ~RVT_S_BUSY;
- spin_unlock_irqrestore(&sqp->s_lock, flags);
- if (lastwqe) {
- struct ib_event ev;
-
- ev.device = sqp->ibqp.device;
- ev.element.qp = &sqp->ibqp;
- ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
- sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
- }
- goto done;
- }
-clr_busy:
- sqp->s_flags &= ~RVT_S_BUSY;
-unlock:
- spin_unlock_irqrestore(&sqp->s_lock, flags);
-done:
- rcu_read_unlock();
-}
-
-/**
* qib_make_grh - construct a GRH header
* @ibp: a pointer to the IB port
* @hdr: a pointer to the GRH header being constructed
@@ -573,7 +272,7 @@ void qib_do_send(struct rvt_qp *qp)
qp->ibqp.qp_type == IB_QPT_UC) &&
(rdma_ah_get_dlid(&qp->remote_ah_attr) &
~((1 << ppd->lmc) - 1)) == ppd->lid) {
- qib_ruc_loopback(qp);
+ rvt_ruc_loopback(qp);
return;
}
@@ -613,42 +312,3 @@ void qib_do_send(struct rvt_qp *qp)
spin_unlock_irqrestore(&qp->s_lock, flags);
}
-
-/*
- * This should be called with s_lock held.
- */
-void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
- enum ib_wc_status status)
-{
- u32 old_last, last;
-
- if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
- return;
-
- last = qp->s_last;
- old_last = last;
- if (++last >= qp->s_size)
- last = 0;
- qp->s_last = last;
- /* See post_send() */
- barrier();
- rvt_put_swqe(wqe);
- if (qp->ibqp.qp_type == IB_QPT_UD ||
- qp->ibqp.qp_type == IB_QPT_SMI ||
- qp->ibqp.qp_type == IB_QPT_GSI)
- atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
-
- rvt_qp_swqe_complete(qp,
- wqe,
- ib_qib_wc_opcode[wqe->wr.opcode],
- status);
-
- if (qp->s_acked == old_last)
- qp->s_acked = last;
- if (qp->s_cur == old_last)
- qp->s_cur = last;
- if (qp->s_tail == old_last)
- qp->s_tail = last;
- if (qp->state == IB_QPS_SQD && last == qp->s_cur)
- qp->s_draining = 0;
-}
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index d0723d4aef5c..757d4c9d713d 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -651,7 +651,7 @@ unmap:
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)
rvt_error_qp(qp, IB_WC_GENERAL_ERR);
} else if (qp->s_wqe)
- qib_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
+ rvt_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
spin_unlock(&qp->s_lock);
spin_unlock(&qp->r_lock);
/* return zero to process the next send work request */
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index ca2638d8f35e..1cf4ca3f23e3 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -551,17 +551,18 @@ static struct kobj_type qib_diagc_ktype = {
* Start of per-unit (or driver, in some cases, but replicated
* per unit) functions (these get a device *)
*/
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
+ char *buf)
{
struct qib_ibdev *dev =
container_of(device, struct qib_ibdev, rdi.ibdev.dev);
return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
}
+static DEVICE_ATTR_RO(hw_rev);
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct qib_ibdev *dev =
container_of(device, struct qib_ibdev, rdi.ibdev.dev);
@@ -574,15 +575,18 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname);
return ret;
}
+static DEVICE_ATTR_RO(hca_type);
+static DEVICE_ATTR(board_id, 0444, hca_type_show, NULL);
-static ssize_t show_version(struct device *device,
+static ssize_t version_show(struct device *device,
struct device_attribute *attr, char *buf)
{
/* The string printed here is already newline-terminated. */
return scnprintf(buf, PAGE_SIZE, "%s", (char *)ib_qib_version);
}
+static DEVICE_ATTR_RO(version);
-static ssize_t show_boardversion(struct device *device,
+static ssize_t boardversion_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct qib_ibdev *dev =
@@ -592,9 +596,9 @@ static ssize_t show_boardversion(struct device *device,
/* The string printed here is already newline-terminated. */
return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
}
+static DEVICE_ATTR_RO(boardversion);
-
-static ssize_t show_localbus_info(struct device *device,
+static ssize_t localbus_info_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct qib_ibdev *dev =
@@ -604,9 +608,9 @@ static ssize_t show_localbus_info(struct device *device,
/* The string printed here is already newline-terminated. */
return scnprintf(buf, PAGE_SIZE, "%s", dd->lbus_info);
}
+static DEVICE_ATTR_RO(localbus_info);
-
-static ssize_t show_nctxts(struct device *device,
+static ssize_t nctxts_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct qib_ibdev *dev =
@@ -620,9 +624,10 @@ static ssize_t show_nctxts(struct device *device,
(dd->first_user_ctxt > dd->cfgctxts) ? 0 :
(dd->cfgctxts - dd->first_user_ctxt));
}
+static DEVICE_ATTR_RO(nctxts);
-static ssize_t show_nfreectxts(struct device *device,
- struct device_attribute *attr, char *buf)
+static ssize_t nfreectxts_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct qib_ibdev *dev =
container_of(device, struct qib_ibdev, rdi.ibdev.dev);
@@ -631,8 +636,9 @@ static ssize_t show_nfreectxts(struct device *device,
/* Return the number of free user ports (contexts) available. */
return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts);
}
+static DEVICE_ATTR_RO(nfreectxts);
-static ssize_t show_serial(struct device *device,
+static ssize_t serial_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct qib_ibdev *dev =
@@ -644,8 +650,9 @@ static ssize_t show_serial(struct device *device,
strcat(buf, "\n");
return strlen(buf);
}
+static DEVICE_ATTR_RO(serial);
-static ssize_t store_chip_reset(struct device *device,
+static ssize_t chip_reset_store(struct device *device,
struct device_attribute *attr, const char *buf,
size_t count)
{
@@ -663,11 +670,12 @@ static ssize_t store_chip_reset(struct device *device,
bail:
return ret < 0 ? ret : count;
}
+static DEVICE_ATTR_WO(chip_reset);
/*
* Dump tempsense regs. in decimal, to ease shell-scripts.
*/
-static ssize_t show_tempsense(struct device *device,
+static ssize_t tempsense_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct qib_ibdev *dev =
@@ -695,6 +703,7 @@ static ssize_t show_tempsense(struct device *device,
*(signed char *)(regvals + 7));
return ret;
}
+static DEVICE_ATTR_RO(tempsense);
/*
* end of per-unit (or driver, in some cases, but replicated
@@ -702,30 +711,23 @@ static ssize_t show_tempsense(struct device *device,
*/
/* start of per-unit file structures and support code */
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(version, S_IRUGO, show_version, NULL);
-static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL);
-static DEVICE_ATTR(nfreectxts, S_IRUGO, show_nfreectxts, NULL);
-static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
-static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
-static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL);
-static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL);
-static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset);
-
-static struct device_attribute *qib_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id,
- &dev_attr_version,
- &dev_attr_nctxts,
- &dev_attr_nfreectxts,
- &dev_attr_serial,
- &dev_attr_boardversion,
- &dev_attr_tempsense,
- &dev_attr_localbus_info,
- &dev_attr_chip_reset,
+static struct attribute *qib_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ &dev_attr_version.attr,
+ &dev_attr_nctxts.attr,
+ &dev_attr_nfreectxts.attr,
+ &dev_attr_serial.attr,
+ &dev_attr_boardversion.attr,
+ &dev_attr_tempsense.attr,
+ &dev_attr_localbus_info.attr,
+ &dev_attr_chip_reset.attr,
+ NULL,
+};
+
+const struct attribute_group qib_attr_group = {
+ .attrs = qib_attributes,
};
int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
@@ -827,27 +829,6 @@ bail:
}
/*
- * Register and create our files in /sys/class/infiniband.
- */
-int qib_verbs_register_sysfs(struct qib_devdata *dd)
-{
- struct ib_device *dev = &dd->verbs_dev.rdi.ibdev;
- int i, ret;
-
- for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) {
- ret = device_create_file(&dev->dev, qib_attributes[i]);
- if (ret)
- goto bail;
- }
-
- return 0;
-bail:
- for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i)
- device_remove_file(&dev->dev, qib_attributes[i]);
- return ret;
-}
-
-/*
* Unregister and remove our files in /sys/class/infiniband.
*/
void qib_verbs_unregister_sysfs(struct qib_devdata *dd)
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index 3e54bc11e0ae..30c70ad0f4bf 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -68,7 +68,7 @@ int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags)
goto bail;
}
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
- qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done;
}
@@ -359,7 +359,7 @@ send_first:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto rewind;
- qib_copy_sge(&qp->r_sge, data, pmtu, 0);
+ rvt_copy_sge(qp, &qp->r_sge, data, pmtu, false, false);
break;
case OP(SEND_LAST_WITH_IMMEDIATE):
@@ -385,7 +385,7 @@ send_last:
if (unlikely(wc.byte_len > qp->r_len))
goto rewind;
wc.opcode = IB_WC_RECV;
- qib_copy_sge(&qp->r_sge, data, tlen, 0);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, false, false);
rvt_put_ss(&qp->s_rdma_read_sge);
last_imm:
wc.wr_id = qp->r_wr_id;
@@ -449,7 +449,7 @@ rdma_first:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto drop;
- qib_copy_sge(&qp->r_sge, data, pmtu, 1);
+ rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -479,7 +479,7 @@ rdma_last_imm:
}
wc.byte_len = qp->r_len;
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
- qib_copy_sge(&qp->r_sge, data, tlen, 1);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
rvt_put_ss(&qp->r_sge);
goto last_imm;
@@ -495,7 +495,7 @@ rdma_last:
tlen -= (hdrsize + pad + 4);
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
- qib_copy_sge(&qp->r_sge, data, tlen, 1);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
rvt_put_ss(&qp->r_sge);
break;
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index f8d029a2390f..4d4c31ea4e2d 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -162,8 +162,8 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
const struct ib_global_route *grd = rdma_ah_read_grh(ah_attr);
qib_make_grh(ibp, &grh, grd, 0, 0);
- qib_copy_sge(&qp->r_sge, &grh,
- sizeof(grh), 1);
+ rvt_copy_sge(qp, &qp->r_sge, &grh,
+ sizeof(grh), true, false);
wc.wc_flags |= IB_WC_GRH;
} else
rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
@@ -179,7 +179,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (len > sge->sge_length)
len = sge->sge_length;
BUG_ON(len == 0);
- qib_copy_sge(&qp->r_sge, sge->vaddr, len, 1);
+ rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
@@ -260,7 +260,7 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
goto bail;
}
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
- qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done;
}
@@ -304,7 +304,7 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
qib_ud_loopback(qp, wqe);
spin_lock_irqsave(&qp->s_lock, tflags);
*flags = tflags;
- qib_send_complete(qp, wqe, IB_WC_SUCCESS);
+ rvt_send_complete(qp, wqe, IB_WC_SUCCESS);
goto done;
}
}
@@ -551,12 +551,13 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
goto drop;
}
if (has_grh) {
- qib_copy_sge(&qp->r_sge, &hdr->u.l.grh,
- sizeof(struct ib_grh), 1);
+ rvt_copy_sge(qp, &qp->r_sge, &hdr->u.l.grh,
+ sizeof(struct ib_grh), true, false);
wc.wc_flags |= IB_WC_GRH;
} else
rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
- qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
+ rvt_copy_sge(qp, &qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
+ true, false);
rvt_put_ss(&qp->r_sge);
if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
return;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 41babbc0db58..4b0f5761a646 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -131,27 +131,6 @@ const enum ib_wc_opcode ib_qib_wc_opcode[] = {
*/
__be64 ib_qib_sys_image_guid;
-/**
- * qib_copy_sge - copy data to SGE memory
- * @ss: the SGE state
- * @data: the data to copy
- * @length: the length of the data
- */
-void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, int release)
-{
- struct rvt_sge *sge = &ss->sge;
-
- while (length) {
- u32 len = rvt_get_sge_length(sge, length);
-
- WARN_ON_ONCE(len == 0);
- memcpy(sge->vaddr, data, len);
- rvt_update_sge(ss, len, release);
- data += len;
- length -= len;
- }
-}
-
/*
* Count the number of DMA descriptors needed to send length bytes of data.
* Don't modify the qib_sge_state to get the count.
@@ -752,7 +731,7 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
spin_lock(&qp->s_lock);
if (tx->wqe)
- qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
+ rvt_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
else if (qp->ibqp.qp_type == IB_QPT_RC) {
struct ib_header *hdr;
@@ -1025,7 +1004,7 @@ done:
}
if (qp->s_wqe) {
spin_lock_irqsave(&qp->s_lock, flags);
- qib_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
+ rvt_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
spin_unlock_irqrestore(&qp->s_lock, flags);
} else if (qp->ibqp.qp_type == IB_QPT_RC) {
spin_lock_irqsave(&qp->s_lock, flags);
@@ -1512,6 +1491,9 @@ static void qib_fill_device_attr(struct qib_devdata *dd)
rdi->dparms.props.max_mcast_grp;
/* post send table */
dd->verbs_dev.rdi.post_parms = qib_post_parms;
+
+ /* opcode translation table */
+ dd->verbs_dev.rdi.wc_opcode = ib_qib_wc_opcode;
}
/**
@@ -1588,7 +1570,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
dd->verbs_dev.rdi.driver_f.port_callback = qib_create_port_files;
dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev;
dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah;
- dd->verbs_dev.rdi.driver_f.check_send_wqe = qib_check_send_wqe;
+ dd->verbs_dev.rdi.driver_f.setup_wqe = qib_check_send_wqe;
dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah;
dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn;
dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc;
@@ -1631,6 +1613,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
dd->verbs_dev.rdi.dparms.node = dd->assigned_node_id;
dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_IBA_IB;
dd->verbs_dev.rdi.dparms.max_mad_size = IB_MGMT_MAD_SIZE;
+ dd->verbs_dev.rdi.dparms.sge_copy_mode = RVT_SGE_COPY_MEMCPY;
qib_fill_device_attr(dd);
@@ -1642,19 +1625,14 @@ int qib_register_ib_device(struct qib_devdata *dd)
i,
dd->rcd[ctxt]->pkeys);
}
+ rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev, &qib_attr_group);
ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_QIB);
if (ret)
goto err_tx;
- ret = qib_verbs_register_sysfs(dd);
- if (ret)
- goto err_class;
-
return ret;
-err_class:
- rvt_unregister_device(&dd->verbs_dev.rdi);
err_tx:
while (!list_empty(&dev->txreq_free)) {
struct list_head *l = dev->txreq_free.next;
@@ -1716,14 +1694,14 @@ void qib_unregister_ib_device(struct qib_devdata *dd)
* It is only used in post send, which doesn't hold
* the s_lock.
*/
-void _qib_schedule_send(struct rvt_qp *qp)
+bool _qib_schedule_send(struct rvt_qp *qp)
{
struct qib_ibport *ibp =
to_iport(qp->ibqp.device, qp->port_num);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
struct qib_qp_priv *priv = qp->priv;
- queue_work(ppd->qib_wq, &priv->s_work);
+ return queue_work(ppd->qib_wq, &priv->s_work);
}
/**
@@ -1733,8 +1711,9 @@ void _qib_schedule_send(struct rvt_qp *qp)
* This schedules qp progress. The s_lock
* should be held.
*/
-void qib_schedule_send(struct rvt_qp *qp)
+bool qib_schedule_send(struct rvt_qp *qp)
{
if (qib_send_ok(qp))
- _qib_schedule_send(qp);
+ return _qib_schedule_send(qp);
+ return false;
}
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 666613eef88f..a4426c24b0d1 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012 - 2017 Intel Corporation. All rights reserved.
+ * Copyright (c) 2012 - 2018 Intel Corporation. All rights reserved.
* Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
@@ -223,8 +223,8 @@ static inline int qib_send_ok(struct rvt_qp *qp)
!(qp->s_flags & RVT_S_ANY_WAIT_SEND));
}
-void _qib_schedule_send(struct rvt_qp *qp);
-void qib_schedule_send(struct rvt_qp *qp);
+bool _qib_schedule_send(struct rvt_qp *qp);
+bool qib_schedule_send(struct rvt_qp *qp);
static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
{
@@ -292,9 +292,6 @@ void qib_put_txreq(struct qib_verbs_txreq *tx);
int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr,
u32 hdrwords, struct rvt_sge_state *ss, u32 len);
-void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
- int release);
-
void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
@@ -303,7 +300,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
int qib_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
-int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ bool *call_send);
struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid);
@@ -333,9 +331,6 @@ void _qib_do_send(struct work_struct *work);
void qib_do_send(struct rvt_qp *qp);
-void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
- enum ib_wc_status status);
-
void qib_send_rc_ack(struct rvt_qp *qp);
int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags);
diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.c b/drivers/infiniband/hw/usnic/usnic_debugfs.c
index 92dc66cc2d50..a3115709fb03 100644
--- a/drivers/infiniband/hw/usnic/usnic_debugfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_debugfs.c
@@ -165,6 +165,5 @@ void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow)
void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow)
{
- if (!IS_ERR_OR_NULL(qp_flow->dbgfs_dentry))
- debugfs_remove(qp_flow->dbgfs_dentry);
+ debugfs_remove(qp_flow->dbgfs_dentry);
}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index f0538a460328..73bd00f8d2c8 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -76,7 +76,7 @@ static LIST_HEAD(usnic_ib_ibdev_list);
static int usnic_ib_dump_vf_hdr(void *obj, char *buf, int buf_sz)
{
struct usnic_ib_vf *vf = obj;
- return scnprintf(buf, buf_sz, "PF: %s ", vf->pf->ib_dev.name);
+ return scnprintf(buf, buf_sz, "PF: %s ", dev_name(&vf->pf->ib_dev.dev));
}
/* End callback dump funcs */
@@ -138,7 +138,7 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
netdev = us_ibdev->netdev;
switch (event) {
case NETDEV_REBOOT:
- usnic_info("PF Reset on %s\n", us_ibdev->ib_dev.name);
+ usnic_info("PF Reset on %s\n", dev_name(&us_ibdev->ib_dev.dev));
usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
ib_event.event = IB_EVENT_PORT_ERR;
ib_event.device = &us_ibdev->ib_dev;
@@ -151,7 +151,8 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
if (!us_ibdev->ufdev->link_up &&
netif_carrier_ok(netdev)) {
usnic_fwd_carrier_up(us_ibdev->ufdev);
- usnic_info("Link UP on %s\n", us_ibdev->ib_dev.name);
+ usnic_info("Link UP on %s\n",
+ dev_name(&us_ibdev->ib_dev.dev));
ib_event.event = IB_EVENT_PORT_ACTIVE;
ib_event.device = &us_ibdev->ib_dev;
ib_event.element.port_num = 1;
@@ -159,7 +160,8 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
} else if (us_ibdev->ufdev->link_up &&
!netif_carrier_ok(netdev)) {
usnic_fwd_carrier_down(us_ibdev->ufdev);
- usnic_info("Link DOWN on %s\n", us_ibdev->ib_dev.name);
+ usnic_info("Link DOWN on %s\n",
+ dev_name(&us_ibdev->ib_dev.dev));
usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
ib_event.event = IB_EVENT_PORT_ERR;
ib_event.device = &us_ibdev->ib_dev;
@@ -168,17 +170,17 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
} else {
usnic_dbg("Ignoring %s on %s\n",
netdev_cmd_to_name(event),
- us_ibdev->ib_dev.name);
+ dev_name(&us_ibdev->ib_dev.dev));
}
break;
case NETDEV_CHANGEADDR:
if (!memcmp(us_ibdev->ufdev->mac, netdev->dev_addr,
sizeof(us_ibdev->ufdev->mac))) {
usnic_dbg("Ignoring addr change on %s\n",
- us_ibdev->ib_dev.name);
+ dev_name(&us_ibdev->ib_dev.dev));
} else {
usnic_info(" %s old mac: %pM new mac: %pM\n",
- us_ibdev->ib_dev.name,
+ dev_name(&us_ibdev->ib_dev.dev),
us_ibdev->ufdev->mac,
netdev->dev_addr);
usnic_fwd_set_mac(us_ibdev->ufdev, netdev->dev_addr);
@@ -193,19 +195,19 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
case NETDEV_CHANGEMTU:
if (us_ibdev->ufdev->mtu != netdev->mtu) {
usnic_info("MTU Change on %s old: %u new: %u\n",
- us_ibdev->ib_dev.name,
+ dev_name(&us_ibdev->ib_dev.dev),
us_ibdev->ufdev->mtu, netdev->mtu);
usnic_fwd_set_mtu(us_ibdev->ufdev, netdev->mtu);
usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
} else {
usnic_dbg("Ignoring MTU change on %s\n",
- us_ibdev->ib_dev.name);
+ dev_name(&us_ibdev->ib_dev.dev));
}
break;
default:
usnic_dbg("Ignoring event %s on %s",
netdev_cmd_to_name(event),
- us_ibdev->ib_dev.name);
+ dev_name(&us_ibdev->ib_dev.dev));
}
mutex_unlock(&us_ibdev->usdev_lock);
}
@@ -267,7 +269,7 @@ static int usnic_ib_handle_inet_event(struct usnic_ib_dev *us_ibdev,
default:
usnic_info("Ignoring event %s on %s",
netdev_cmd_to_name(event),
- us_ibdev->ib_dev.name);
+ dev_name(&us_ibdev->ib_dev.dev));
}
mutex_unlock(&us_ibdev->usdev_lock);
@@ -364,7 +366,6 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS;
us_ibdev->ib_dev.dev.parent = &dev->dev;
us_ibdev->ib_dev.uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION;
- strlcpy(us_ibdev->ib_dev.name, "usnic_%d", IB_DEVICE_NAME_MAX);
us_ibdev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
@@ -416,7 +417,9 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
us_ibdev->ib_dev.driver_id = RDMA_DRIVER_USNIC;
- if (ib_register_device(&us_ibdev->ib_dev, NULL))
+ rdma_set_device_sysfs_group(&us_ibdev->ib_dev, &usnic_attr_group);
+
+ if (ib_register_device(&us_ibdev->ib_dev, "usnic_%d", NULL))
goto err_fwd_dealloc;
usnic_fwd_set_mtu(us_ibdev->ufdev, us_ibdev->netdev->mtu);
@@ -437,9 +440,9 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
kref_init(&us_ibdev->vf_cnt);
usnic_info("Added ibdev: %s netdev: %s with mac %pM Link: %u MTU: %u\n",
- us_ibdev->ib_dev.name, netdev_name(us_ibdev->netdev),
- us_ibdev->ufdev->mac, us_ibdev->ufdev->link_up,
- us_ibdev->ufdev->mtu);
+ dev_name(&us_ibdev->ib_dev.dev),
+ netdev_name(us_ibdev->netdev), us_ibdev->ufdev->mac,
+ us_ibdev->ufdev->link_up, us_ibdev->ufdev->mtu);
return us_ibdev;
err_fwd_dealloc:
@@ -452,7 +455,7 @@ err_dealloc:
static void usnic_ib_device_remove(struct usnic_ib_dev *us_ibdev)
{
- usnic_info("Unregistering %s\n", us_ibdev->ib_dev.name);
+ usnic_info("Unregistering %s\n", dev_name(&us_ibdev->ib_dev.dev));
usnic_ib_sysfs_unregister_usdev(us_ibdev);
usnic_fwd_dev_free(us_ibdev->ufdev);
ib_unregister_device(&us_ibdev->ib_dev);
@@ -591,7 +594,7 @@ static int usnic_ib_pci_probe(struct pci_dev *pdev,
mutex_unlock(&pf->usdev_lock);
usnic_info("Registering usnic VF %s into PF %s\n", pci_name(pdev),
- pf->ib_dev.name);
+ dev_name(&pf->ib_dev.dev));
usnic_ib_log_vf(vf);
return 0;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
index 4210ca14014d..a7e4b2ccfaf8 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
@@ -46,9 +46,8 @@
#include "usnic_ib_sysfs.h"
#include "usnic_log.h"
-static ssize_t usnic_ib_show_board(struct device *device,
- struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct usnic_ib_dev *us_ibdev =
container_of(device, struct usnic_ib_dev, ib_dev.dev);
@@ -60,13 +59,13 @@ static ssize_t usnic_ib_show_board(struct device *device,
return scnprintf(buf, PAGE_SIZE, "%hu\n", subsystem_device_id);
}
+static DEVICE_ATTR_RO(board_id);
/*
* Report the configuration for this PF
*/
static ssize_t
-usnic_ib_show_config(struct device *device, struct device_attribute *attr,
- char *buf)
+config_show(struct device *device, struct device_attribute *attr, char *buf)
{
struct usnic_ib_dev *us_ibdev;
char *ptr;
@@ -94,7 +93,7 @@ usnic_ib_show_config(struct device *device, struct device_attribute *attr,
n = scnprintf(ptr, left,
"%s: %s:%d.%d, %s, %pM, %u VFs\n Per VF:",
- us_ibdev->ib_dev.name,
+ dev_name(&us_ibdev->ib_dev.dev),
busname,
PCI_SLOT(us_ibdev->pdev->devfn),
PCI_FUNC(us_ibdev->pdev->devfn),
@@ -119,17 +118,17 @@ usnic_ib_show_config(struct device *device, struct device_attribute *attr,
UPDATE_PTR_LEFT(n, ptr, left);
} else {
n = scnprintf(ptr, left, "%s: no VFs\n",
- us_ibdev->ib_dev.name);
+ dev_name(&us_ibdev->ib_dev.dev));
UPDATE_PTR_LEFT(n, ptr, left);
}
mutex_unlock(&us_ibdev->usdev_lock);
return ptr - buf;
}
+static DEVICE_ATTR_RO(config);
static ssize_t
-usnic_ib_show_iface(struct device *device, struct device_attribute *attr,
- char *buf)
+iface_show(struct device *device, struct device_attribute *attr, char *buf)
{
struct usnic_ib_dev *us_ibdev;
@@ -138,10 +137,10 @@ usnic_ib_show_iface(struct device *device, struct device_attribute *attr,
return scnprintf(buf, PAGE_SIZE, "%s\n",
netdev_name(us_ibdev->netdev));
}
+static DEVICE_ATTR_RO(iface);
static ssize_t
-usnic_ib_show_max_vf(struct device *device, struct device_attribute *attr,
- char *buf)
+max_vf_show(struct device *device, struct device_attribute *attr, char *buf)
{
struct usnic_ib_dev *us_ibdev;
@@ -150,10 +149,10 @@ usnic_ib_show_max_vf(struct device *device, struct device_attribute *attr,
return scnprintf(buf, PAGE_SIZE, "%u\n",
kref_read(&us_ibdev->vf_cnt));
}
+static DEVICE_ATTR_RO(max_vf);
static ssize_t
-usnic_ib_show_qp_per_vf(struct device *device, struct device_attribute *attr,
- char *buf)
+qp_per_vf_show(struct device *device, struct device_attribute *attr, char *buf)
{
struct usnic_ib_dev *us_ibdev;
int qp_per_vf;
@@ -165,10 +164,10 @@ usnic_ib_show_qp_per_vf(struct device *device, struct device_attribute *attr,
return scnprintf(buf, PAGE_SIZE,
"%d\n", qp_per_vf);
}
+static DEVICE_ATTR_RO(qp_per_vf);
static ssize_t
-usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr,
- char *buf)
+cq_per_vf_show(struct device *device, struct device_attribute *attr, char *buf)
{
struct usnic_ib_dev *us_ibdev;
@@ -177,21 +176,20 @@ usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr,
return scnprintf(buf, PAGE_SIZE, "%d\n",
us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]);
}
+static DEVICE_ATTR_RO(cq_per_vf);
+
+static struct attribute *usnic_class_attributes[] = {
+ &dev_attr_board_id.attr,
+ &dev_attr_config.attr,
+ &dev_attr_iface.attr,
+ &dev_attr_max_vf.attr,
+ &dev_attr_qp_per_vf.attr,
+ &dev_attr_cq_per_vf.attr,
+ NULL
+};
-static DEVICE_ATTR(board_id, S_IRUGO, usnic_ib_show_board, NULL);
-static DEVICE_ATTR(config, S_IRUGO, usnic_ib_show_config, NULL);
-static DEVICE_ATTR(iface, S_IRUGO, usnic_ib_show_iface, NULL);
-static DEVICE_ATTR(max_vf, S_IRUGO, usnic_ib_show_max_vf, NULL);
-static DEVICE_ATTR(qp_per_vf, S_IRUGO, usnic_ib_show_qp_per_vf, NULL);
-static DEVICE_ATTR(cq_per_vf, S_IRUGO, usnic_ib_show_cq_per_vf, NULL);
-
-static struct device_attribute *usnic_class_attributes[] = {
- &dev_attr_board_id,
- &dev_attr_config,
- &dev_attr_iface,
- &dev_attr_max_vf,
- &dev_attr_qp_per_vf,
- &dev_attr_cq_per_vf,
+const struct attribute_group usnic_attr_group = {
+ .attrs = usnic_class_attributes,
};
struct qpn_attribute {
@@ -278,18 +276,6 @@ static struct kobj_type usnic_ib_qpn_type = {
int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev)
{
- int i;
- int err;
- for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) {
- err = device_create_file(&us_ibdev->ib_dev.dev,
- usnic_class_attributes[i]);
- if (err) {
- usnic_err("Failed to create device file %d for %s eith err %d",
- i, us_ibdev->ib_dev.name, err);
- return -EINVAL;
- }
- }
-
/* create kernel object for looking at individual QPs */
kobject_get(&us_ibdev->ib_dev.dev.kobj);
us_ibdev->qpn_kobj = kobject_create_and_add("qpn",
@@ -304,12 +290,6 @@ int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev)
void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev)
{
- int i;
- for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) {
- device_remove_file(&us_ibdev->ib_dev.dev,
- usnic_class_attributes[i]);
- }
-
kobject_put(us_ibdev->qpn_kobj);
}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h
index 3d98e16cfeaf..b1f064cec850 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h
@@ -41,4 +41,6 @@ void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev);
void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp);
void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp);
+extern const struct attribute_group usnic_attr_group;
+
#endif /* !USNIC_IB_SYSFS_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 9973ac893635..0b91ff36768a 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -159,7 +159,8 @@ static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
err = ib_copy_to_udata(udata, &resp, sizeof(resp));
if (err) {
- usnic_err("Failed to copy udata for %s", us_ibdev->ib_dev.name);
+ usnic_err("Failed to copy udata for %s",
+ dev_name(&us_ibdev->ib_dev.dev));
return err;
}
@@ -197,7 +198,7 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
vnic = vf->vnic;
if (!usnic_vnic_check_room(vnic, res_spec)) {
usnic_dbg("Found used vnic %s from %s\n",
- us_ibdev->ib_dev.name,
+ dev_name(&us_ibdev->ib_dev.dev),
pci_name(usnic_vnic_get_pdev(
vnic)));
qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev,
@@ -230,7 +231,8 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
spin_unlock(&vf->lock);
}
- usnic_info("No free qp grp found on %s\n", us_ibdev->ib_dev.name);
+ usnic_info("No free qp grp found on %s\n",
+ dev_name(&us_ibdev->ib_dev.dev));
return ERR_PTR(-ENOMEM);
qp_grp_check:
@@ -471,7 +473,7 @@ struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev,
}
usnic_info("domain 0x%p allocated for context 0x%p and device %s\n",
- pd, context, ibdev->name);
+ pd, context, dev_name(&ibdev->dev));
return &pd->ibpd;
}
@@ -508,20 +510,20 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
err = ib_copy_from_udata(&cmd, udata, sizeof(cmd));
if (err) {
usnic_err("%s: cannot copy udata for create_qp\n",
- us_ibdev->ib_dev.name);
+ dev_name(&us_ibdev->ib_dev.dev));
return ERR_PTR(-EINVAL);
}
err = create_qp_validate_user_data(cmd);
if (err) {
usnic_err("%s: Failed to validate user data\n",
- us_ibdev->ib_dev.name);
+ dev_name(&us_ibdev->ib_dev.dev));
return ERR_PTR(-EINVAL);
}
if (init_attr->qp_type != IB_QPT_UD) {
usnic_err("%s asked to make a non-UD QP: %d\n",
- us_ibdev->ib_dev.name, init_attr->qp_type);
+ dev_name(&us_ibdev->ib_dev.dev), init_attr->qp_type);
return ERR_PTR(-EINVAL);
}
diff --git a/drivers/infiniband/hw/usnic/usnic_transport.c b/drivers/infiniband/hw/usnic/usnic_transport.c
index e0a95538c364..82dd810bc000 100644
--- a/drivers/infiniband/hw/usnic/usnic_transport.c
+++ b/drivers/infiniband/hw/usnic/usnic_transport.c
@@ -121,7 +121,7 @@ void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num)
if (type == USNIC_TRANSPORT_ROCE_CUSTOM) {
spin_lock(&roce_bitmap_lock);
if (!port_num) {
- usnic_err("Unreserved unvalid port num 0 for %s\n",
+ usnic_err("Unreserved invalid port num 0 for %s\n",
usnic_transport_to_str(type));
goto out_roce_custom;
}
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 9dd39daa602b..49275a548751 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -54,18 +54,6 @@ static struct workqueue_struct *usnic_uiom_wq;
((void *) &((struct usnic_uiom_chunk *) 0)->page_list[1] - \
(void *) &((struct usnic_uiom_chunk *) 0)->page_list[0]))
-static void usnic_uiom_reg_account(struct work_struct *work)
-{
- struct usnic_uiom_reg *umem = container_of(work,
- struct usnic_uiom_reg, work);
-
- down_write(&umem->mm->mmap_sem);
- umem->mm->locked_vm -= umem->diff;
- up_write(&umem->mm->mmap_sem);
- mmput(umem->mm);
- kfree(umem);
-}
-
static int usnic_uiom_dma_fault(struct iommu_domain *domain,
struct device *dev,
unsigned long iova, int flags,
@@ -99,8 +87,9 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
}
static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
- int dmasync, struct list_head *chunk_list)
+ int dmasync, struct usnic_uiom_reg *uiomr)
{
+ struct list_head *chunk_list = &uiomr->chunk_list;
struct page **page_list;
struct scatterlist *sg;
struct usnic_uiom_chunk *chunk;
@@ -114,6 +103,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
int flags;
dma_addr_t pa;
unsigned int gup_flags;
+ struct mm_struct *mm;
/*
* If the combination of the addr and size requested for this memory
@@ -136,7 +126,8 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
npages = PAGE_ALIGN(size + (addr & ~PAGE_MASK)) >> PAGE_SHIFT;
- down_write(&current->mm->mmap_sem);
+ uiomr->owning_mm = mm = current->mm;
+ down_write(&mm->mmap_sem);
locked = npages + current->mm->pinned_vm;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
@@ -196,10 +187,12 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
out:
if (ret < 0)
usnic_uiom_put_pages(chunk_list, 0);
- else
- current->mm->pinned_vm = locked;
+ else {
+ mm->pinned_vm = locked;
+ mmgrab(uiomr->owning_mm);
+ }
- up_write(&current->mm->mmap_sem);
+ up_write(&mm->mmap_sem);
free_page((unsigned long) page_list);
return ret;
}
@@ -379,7 +372,7 @@ struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
uiomr->pd = pd;
err = usnic_uiom_get_pages(addr, size, writable, dmasync,
- &uiomr->chunk_list);
+ uiomr);
if (err) {
usnic_err("Failed get_pages vpn [0x%lx,0x%lx] err %d\n",
vpn_start, vpn_last, err);
@@ -426,29 +419,39 @@ out_put_intervals:
out_put_pages:
usnic_uiom_put_pages(&uiomr->chunk_list, 0);
spin_unlock(&pd->lock);
+ mmdrop(uiomr->owning_mm);
out_free_uiomr:
kfree(uiomr);
return ERR_PTR(err);
}
-void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
- struct ib_ucontext *ucontext)
+static void __usnic_uiom_release_tail(struct usnic_uiom_reg *uiomr)
{
- struct task_struct *task;
- struct mm_struct *mm;
- unsigned long diff;
+ mmdrop(uiomr->owning_mm);
+ kfree(uiomr);
+}
- __usnic_uiom_reg_release(uiomr->pd, uiomr, 1);
+static inline size_t usnic_uiom_num_pages(struct usnic_uiom_reg *uiomr)
+{
+ return PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
+}
- task = get_pid_task(ucontext->tgid, PIDTYPE_PID);
- if (!task)
- goto out;
- mm = get_task_mm(task);
- put_task_struct(task);
- if (!mm)
- goto out;
+static void usnic_uiom_release_defer(struct work_struct *work)
+{
+ struct usnic_uiom_reg *uiomr =
+ container_of(work, struct usnic_uiom_reg, work);
- diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
+ down_write(&uiomr->owning_mm->mmap_sem);
+ uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr);
+ up_write(&uiomr->owning_mm->mmap_sem);
+
+ __usnic_uiom_release_tail(uiomr);
+}
+
+void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
+ struct ib_ucontext *context)
+{
+ __usnic_uiom_reg_release(uiomr->pd, uiomr, 1);
/*
* We may be called with the mm's mmap_sem already held. This
@@ -456,25 +459,21 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
* the last reference to our file and calls our release
* method. If there are memory regions to destroy, we'll end
* up here and not be able to take the mmap_sem. In that case
- * we defer the vm_locked accounting to the system workqueue.
+ * we defer the vm_locked accounting to a workqueue.
*/
- if (ucontext->closing) {
- if (!down_write_trylock(&mm->mmap_sem)) {
- INIT_WORK(&uiomr->work, usnic_uiom_reg_account);
- uiomr->mm = mm;
- uiomr->diff = diff;
-
+ if (context->closing) {
+ if (!down_write_trylock(&uiomr->owning_mm->mmap_sem)) {
+ INIT_WORK(&uiomr->work, usnic_uiom_release_defer);
queue_work(usnic_uiom_wq, &uiomr->work);
return;
}
- } else
- down_write(&mm->mmap_sem);
+ } else {
+ down_write(&uiomr->owning_mm->mmap_sem);
+ }
+ uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr);
+ up_write(&uiomr->owning_mm->mmap_sem);
- mm->pinned_vm -= diff;
- up_write(&mm->mmap_sem);
- mmput(mm);
-out:
- kfree(uiomr);
+ __usnic_uiom_release_tail(uiomr);
}
struct usnic_uiom_pd *usnic_uiom_alloc_pd(void)
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h
index 8c096acff123..b86a9731071b 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.h
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.h
@@ -71,8 +71,7 @@ struct usnic_uiom_reg {
int writable;
struct list_head chunk_list;
struct work_struct work;
- struct mm_struct *mm;
- unsigned long diff;
+ struct mm_struct *owning_mm;
};
struct usnic_uiom_chunk {
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index a5719899f49a..398443f43dc3 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -65,32 +65,36 @@ static struct workqueue_struct *event_wq;
static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context);
static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context);
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
return sprintf(buf, "VMW_PVRDMA-%s\n", DRV_VERSION);
}
+static DEVICE_ATTR_RO(hca_type);
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%d\n", PVRDMA_REV_ID);
}
+static DEVICE_ATTR_RO(hw_rev);
-static ssize_t show_board(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%d\n", PVRDMA_BOARD_ID);
}
+static DEVICE_ATTR_RO(board_id);
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+static struct attribute *pvrdma_class_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ NULL,
+};
-static struct device_attribute *pvrdma_class_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id
+static const struct attribute_group pvrdma_attr_group = {
+ .attrs = pvrdma_class_attributes,
};
static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str)
@@ -160,9 +164,7 @@ static struct net_device *pvrdma_get_netdev(struct ib_device *ibdev,
static int pvrdma_register_device(struct pvrdma_dev *dev)
{
int ret = -1;
- int i = 0;
- strlcpy(dev->ib_dev.name, "vmw_pvrdma%d", IB_DEVICE_NAME_MAX);
dev->ib_dev.node_guid = dev->dsr->caps.node_guid;
dev->sys_image_guid = dev->dsr->caps.sys_image_guid;
dev->flags = 0;
@@ -266,24 +268,16 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
}
dev->ib_dev.driver_id = RDMA_DRIVER_VMW_PVRDMA;
spin_lock_init(&dev->srq_tbl_lock);
+ rdma_set_device_sysfs_group(&dev->ib_dev, &pvrdma_attr_group);
- ret = ib_register_device(&dev->ib_dev, NULL);
+ ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d", NULL);
if (ret)
goto err_srq_free;
- for (i = 0; i < ARRAY_SIZE(pvrdma_class_attributes); ++i) {
- ret = device_create_file(&dev->ib_dev.dev,
- pvrdma_class_attributes[i]);
- if (ret)
- goto err_class;
- }
-
dev->ib_active = true;
return 0;
-err_class:
- ib_unregister_device(&dev->ib_dev);
err_srq_free:
kfree(dev->srq_tbl);
err_qp_free:
@@ -735,7 +729,7 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
default:
dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n",
- event, dev->ib_dev.name);
+ event, dev_name(&dev->ib_dev.dev));
break;
}
}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index 60083c0363a5..cf22f57a9f0d 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -499,7 +499,7 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
next_state = (attr_mask & IB_QP_STATE) ? attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, next_state, ibqp->qp_type,
- attr_mask, IB_LINK_LAYER_ETHERNET)) {
+ attr_mask)) {
ret = -EINVAL;
goto out;
}