From e7d80c830489f67b1d0257e6919840100085dea9 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 22 Jun 2017 15:01:10 -0400 Subject: IB/iser: Handle lack of memory management extentions correctly max_fast_reg_page_list_len is only valid when the memory management extentions are signaled by the underlying driver. Fix by adjusting iser_calc_scsi_params() to use ISCSI_ISER_MAX_SG_TABLESIZE when the extentions are not indicated. Reported-by: Thomas Rosenstein Fixes: Commit df749cdc45d9 ("IB/iser: Support up to 8MB data transfer in a single command") Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Acked-by: Sagi Grimberg Tested-by: Thomas Rosenstein Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/iser/iser_verbs.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index c538a38c91ce..26a004e97ae0 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -708,8 +708,14 @@ iser_calc_scsi_params(struct iser_conn *iser_conn, unsigned short sg_tablesize, sup_sg_tablesize; sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K); - sup_sg_tablesize = min_t(unsigned, ISCSI_ISER_MAX_SG_TABLESIZE, - device->ib_device->attrs.max_fast_reg_page_list_len); + if (device->ib_device->attrs.device_cap_flags & + IB_DEVICE_MEM_MGT_EXTENSIONS) + sup_sg_tablesize = + min_t( + uint, ISCSI_ISER_MAX_SG_TABLESIZE, + device->ib_device->attrs.max_fast_reg_page_list_len); + else + sup_sg_tablesize = ISCSI_ISER_MAX_SG_TABLESIZE; iser_conn->scsi_sg_tablesize = min(sg_tablesize, sup_sg_tablesize); } -- cgit v1.2.3 From c8c16d3bae967f1c7af541e8d016e5c51e4f010a Mon Sep 17 00:00:00 2001 From: Vladimir Neyelov Date: Sun, 21 May 2017 19:17:31 +0300 Subject: IB/iser: Fix connection teardown race condition Under heavy iser target(scst) start/stop stress during login/logout on iser intitiator side happened trace call provided below. The function iscsi_iser_slave_alloc iser_conn pointer could be NULL, due to the fact that function iscsi_iser_conn_stop can be called before and free iser connection. Let's protect that flow by introducing global mutex. BUG: unable to handle kernel paging request at 0000000000001018 IP: [] iscsi_iser_slave_alloc+0x1e/0x50 [ib_iser] Call Trace: ? scsi_alloc_sdev+0x242/0x300 scsi_probe_and_add_lun+0x9e1/0xea0 ? kfree_const+0x21/0x30 ? kobject_set_name_vargs+0x76/0x90 ? __pm_runtime_resume+0x5b/0x70 __scsi_scan_target+0xf6/0x250 scsi_scan_target+0xea/0x100 iscsi_user_scan_session.part.13+0x101/0x130 [scsi_transport_iscsi] ? iscsi_user_scan_session.part.13+0x130/0x130 [scsi_transport_iscsi] iscsi_user_scan_session+0x1e/0x30 [scsi_transport_iscsi] device_for_each_child+0x50/0x90 iscsi_user_scan+0x44/0x60 [scsi_transport_iscsi] store_scan+0xa8/0x100 ? common_file_perm+0x5d/0x1c0 dev_attr_store+0x18/0x30 sysfs_kf_write+0x37/0x40 kernfs_fop_write+0x12c/0x1c0 __vfs_write+0x18/0x40 vfs_write+0xb5/0x1a0 SyS_write+0x55/0xc0 Fixes: 318d311e8f01 ("iser: Accept arbitrary sg lists mapping if the device supports it") Cc: # v4.5+ Signed-off-by: Vladimir Neyelov Signed-off-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/iser/iscsi_iser.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 5a887efb4bdf..37b33d708c2d 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -83,6 +83,7 @@ static struct scsi_host_template iscsi_iser_sht; static struct iscsi_transport iscsi_iser_transport; static struct scsi_transport_template *iscsi_iser_scsi_transport; static struct workqueue_struct *release_wq; +static DEFINE_MUTEX(unbind_iser_conn_mutex); struct iser_global ig; int iser_debug_level = 0; @@ -550,12 +551,14 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) */ if (iser_conn) { mutex_lock(&iser_conn->state_mutex); + mutex_lock(&unbind_iser_conn_mutex); iser_conn_terminate(iser_conn); iscsi_conn_stop(cls_conn, flag); /* unbind */ iser_conn->iscsi_conn = NULL; conn->dd_data = NULL; + mutex_unlock(&unbind_iser_conn_mutex); complete(&iser_conn->stop_completion); mutex_unlock(&iser_conn->state_mutex); @@ -977,13 +980,21 @@ static int iscsi_iser_slave_alloc(struct scsi_device *sdev) struct iser_conn *iser_conn; struct ib_device *ib_dev; + mutex_lock(&unbind_iser_conn_mutex); + session = starget_to_session(scsi_target(sdev))->dd_data; iser_conn = session->leadconn->dd_data; + if (!iser_conn) { + mutex_unlock(&unbind_iser_conn_mutex); + return -ENOTCONN; + } ib_dev = iser_conn->ib_conn.device->ib_device; if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K); + mutex_unlock(&unbind_iser_conn_mutex); + return 0; } -- cgit v1.2.3 From 98e77d9fd7dff05019436370e78c3ec0f9894e25 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 23 May 2017 11:29:42 +0300 Subject: IB: Convert msleep below 20ms to usleep_range The msleep(1) may do not sleep 1 ms as expected and will sleep longer. The simple conversion from msleep to usleep_range between 1ms and 2ms can solve an issue. The full and comprehensive explanation can be found at [1] and [2]. [1] https://lkml.org/lkml/2007/8/3/250 [2] Documentation/timers/timers-howto.txt Signed-off-by: Leon Romanovsky Reviewed-by: Erez Shitrit Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 3 ++- drivers/infiniband/hw/mlx4/main.c | 2 +- drivers/infiniband/hw/mlx4/mcg.c | 2 +- drivers/infiniband/hw/nes/nes_hw.c | 4 ++-- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 4 ++-- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 2 +- 6 files changed, 9 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 37d5d29597a4..729f8cc8738b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -995,7 +995,8 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) goto free_work; } ne -= ret; - msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE); + usleep_range(HNS_ROCE_V1_FREE_MR_WAIT_VALUE * 1000, + (1 + HNS_ROCE_V1_FREE_MR_WAIT_VALUE) * 1000); } while (ne && time_before_eq(jiffies, end)); if (ne != 0) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 75b2f7d4cd95..d1b43cbbfea7 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1155,7 +1155,7 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) * call to mlx4_ib_vma_close. */ put_task_struct(owning_process); - msleep(1); + usleep_range(1000, 2000); owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); if (!owning_process || diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 3405e947dc1e..b73f89700ef9 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -1091,7 +1091,7 @@ static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy if (!count) break; - msleep(1); + usleep_range(1000, 2000); } while (time_after(end, jiffies)); flush_workqueue(ctx->mcg_wq); diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 8f9d8b4ad583..b0adf65e4bdb 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -551,7 +551,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { if ((0x0F000100 == (pcs_control_status0 & 0x0F000100)) || (0x0F000100 == (pcs_control_status1 & 0x0F000100))) int_cnt++; - msleep(1); + usleep_range(1000, 2000); } if (int_cnt > 1) { spin_lock_irqsave(&nesadapter->phy_lock, flags); @@ -592,7 +592,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { break; } } - msleep(1); + usleep_range(1000, 2000); } } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 7cbcfdac6529..d574d41bdf61 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -954,7 +954,7 @@ void ipoib_cm_dev_stop(struct net_device *dev) break; } spin_unlock_irq(&priv->lock); - msleep(1); + usleep_range(1000, 2000); ipoib_drain_cq(dev); spin_lock_irq(&priv->lock); } @@ -1206,7 +1206,7 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) goto timeout; } - msleep(1); + usleep_range(1000, 2000); } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index efe7402f4885..57a9655e844d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -770,7 +770,7 @@ int ipoib_ib_dev_stop_default(struct net_device *dev) ipoib_drain_cq(dev); - msleep(1); + usleep_range(1000, 2000); } ipoib_dbg(priv, "All sends and receives done.\n"); -- cgit v1.2.3 From ed7b521d8a98c3371e3c9300df8bf3cb774d8ea6 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Tue, 23 May 2017 11:42:52 +0300 Subject: IB/IPoIB: Forward MTU change to driver below This patch checks if there is a driver below that needs to be updated on the new MTU and calls it accordingly. Signed-off-by: Erez Shitrit Reviewed by: Alex Vesker Signed-off-by: Leon Romanovsky Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 6e86eeee370e..3e2b7988ead8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -233,6 +233,7 @@ static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_featu static int ipoib_change_mtu(struct net_device *dev, int new_mtu) { struct ipoib_dev_priv *priv = ipoib_priv(dev); + int ret = 0; /* dev->mtu > 2K ==> connected mode */ if (ipoib_cm_admin_enabled(dev)) { @@ -256,9 +257,23 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) ipoib_dbg(priv, "MTU must be smaller than the underlying " "link layer MTU - 4 (%u)\n", priv->mcast_mtu); - dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); + new_mtu = min(priv->mcast_mtu, priv->admin_mtu); - return 0; + if (priv->rn_ops->ndo_change_mtu) { + bool carrier_status = netif_carrier_ok(dev); + + netif_carrier_off(dev); + + /* notify lower level on the real mtu */ + ret = priv->rn_ops->ndo_change_mtu(dev, new_mtu); + + if (carrier_status) + netif_carrier_on(dev); + } else { + dev->mtu = new_mtu; + } + + return ret; } /* Called with an RCU read lock taken */ -- cgit v1.2.3 From d83187dda9b930dc268ab05da265f3d5d7eca451 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 23 May 2017 14:38:13 +0300 Subject: IB/IPoIB: Convert IPoIB to memalloc_noio_* calls Commit 21caf2fc1931 ("mm: teach mm by current context info to not do I/O during memory allocation") added the memalloc_noio_(save|restore) functions to enable people to modify the MM behavior by disabling I/O during memory allocation. This was further extended in Fixes: 934f3072c17c ("mm: clear __GFP_FS when PF_MEMALLOC_NOIO is set"). memalloc_noio_* functions prevent allocation paths recursing back into the filesystem without explicitly changing the flags for every allocation site. However the IPoIB hasn't been keeping up with the changes and missed completely these memalloc_noio_* calls. This led to update of allocation site with special QP creation flag, see commit 09b93088d750 ("IB: Add a QP creation flag to use GFP_NOIO allocations"), while this flag is supported by small number of drivers in IB stack. Let's change it by updating to memalloc_noio_* calls and allow for every driver underneath enjoy NOIO allocations. Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index d574d41bdf61..f87d104837dc 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "ipoib.h" @@ -1047,9 +1048,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_RC, .qp_context = tx, - .create_flags = IB_QP_CREATE_USE_GFP_NOIO + .create_flags = 0 }; - struct ib_qp *tx_qp; if (dev->features & NETIF_F_SG) @@ -1057,10 +1057,6 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1); tx_qp = ib_create_qp(priv->pd, &attr); - if (PTR_ERR(tx_qp) == -EINVAL) { - attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO; - tx_qp = ib_create_qp(priv->pd, &attr); - } tx->max_send_sge = attr.cap.max_send_sge; return tx_qp; } @@ -1131,10 +1127,11 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, struct sa_path_rec *pathrec) { struct ipoib_dev_priv *priv = ipoib_priv(p->dev); + unsigned int noio_flag; int ret; - p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring, - GFP_NOIO, PAGE_KERNEL); + noio_flag = memalloc_noio_save(); + p->tx_ring = vzalloc(ipoib_sendq_size * sizeof(*p->tx_ring)); if (!p->tx_ring) { ret = -ENOMEM; goto err_tx; @@ -1142,9 +1139,10 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring); p->qp = ipoib_cm_create_tx_qp(p->dev, p); + memalloc_noio_restore(noio_flag); if (IS_ERR(p->qp)) { ret = PTR_ERR(p->qp); - ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret); + ipoib_warn(priv, "failed to create tx qp: %d\n", ret); goto err_qp; } -- cgit v1.2.3 From b6c871e5875798e5ed3744c725622dcd3c92be92 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Mon, 12 Jun 2017 10:45:21 +0300 Subject: IB/ipoib: Let lower driver handle get_stats64 call The driver checks if the lower level driver supports get_stats, and if so calls it to get the updated statistics, otherwise takes from the current netdevice stats object. Signed-off-by: Erez Shitrit Reviewed-by: Alex Vesker Signed-off-by: Leon Romanovsky Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 3e2b7988ead8..70dacaf9044e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -276,6 +276,17 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) return ret; } +static void ipoib_get_stats(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + if (priv->rn_ops->ndo_get_stats64) + priv->rn_ops->ndo_get_stats64(dev, stats); + else + netdev_stats_to_stats64(stats, &dev->stats); +} + /* Called with an RCU read lock taken */ static bool ipoib_is_dev_match_addr_rcu(const struct sockaddr *addr, struct net_device *dev) @@ -1823,6 +1834,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = { .ndo_get_vf_stats = ipoib_get_vf_stats, .ndo_set_vf_guid = ipoib_set_vf_guid, .ndo_set_mac_address = ipoib_set_mac, + .ndo_get_stats64 = ipoib_get_stats, }; static const struct net_device_ops ipoib_netdev_ops_vf = { -- cgit v1.2.3 From 5c8857b653e71a9850a02837e1268e3198abbd1a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 13 Jul 2017 10:45:48 +0300 Subject: IB/IPoIB: Fix error code in ipoib_add_port() We accidentally don't see the error code on some of these error paths. It means we return ERR_PTR(0) which is NULL and it results in a NULL dereference in the caller. This bug dates to pre-git days. Signed-off-by: Dan Carpenter Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 70dacaf9044e..4ce315c92b48 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2239,6 +2239,7 @@ static struct net_device *ipoib_add_port(const char *format, goto register_failed; } + result = -ENOMEM; if (ipoib_cm_add_mode_attr(priv->dev)) goto sysfs_failed; if (ipoib_add_pkey_attr(priv->dev)) -- cgit v1.2.3 From e6e52aec494900912fedd7b595b8827ba70a670d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 6 Jul 2017 10:21:36 +0300 Subject: RDMA/iser: don't send an rkey if all data is written as immadiate-data We might get some bogus error completions in case the target will remotely invalidate the rkey and the HCA will need to retransmit from this buffer. Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/iser/iser_initiator.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/ulp') diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 12ed62ce9ff7..2a07692007bd 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -137,8 +137,10 @@ iser_prepare_write_cmd(struct iscsi_task *task, if (unsol_sz < edtl) { hdr->flags |= ISER_WSV; - hdr->write_stag = cpu_to_be32(mem_reg->rkey); - hdr->write_va = cpu_to_be64(mem_reg->sge.addr + unsol_sz); + if (buf_out->data_len > imm_sz) { + hdr->write_stag = cpu_to_be32(mem_reg->rkey); + hdr->write_va = cpu_to_be64(mem_reg->sge.addr + unsol_sz); + } iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X " "VA:%#llX + unsol:%d\n", -- cgit v1.2.3