summaryrefslogtreecommitdiffstats
path: root/drivers/nvme/target/tcp.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-04-28 14:39:37 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-04-28 14:39:37 -0700
commitfc0586062816559defb14c947319ef8c4c326fb3 (patch)
tree5ca73bd1fc9de596a11e6d3549fd8fbf6f87dafc /drivers/nvme/target/tcp.c
parent6c0029211382011af508273c4fc98a732f841d95 (diff)
parent8324fbae75ce65fc2eb960a8434799dca48248ac (diff)
downloadlinux-fc0586062816559defb14c947319ef8c4c326fb3.tar.bz2
Merge tag 'for-5.13/drivers-2021-04-27' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe: - MD changes via Song: - raid5 POWER fix - raid1 failure fix - UAF fix for md cluster - mddev_find_or_alloc() clean up - Fix NULL pointer deref with external bitmap - Performance improvement for raid10 discard requests - Fix missing information of /proc/mdstat - rsxx const qualifier removal (Arnd) - Expose allocated brd pages (Calvin) - rnbd via Gioh Kim: - Change maintainer - Change domain address of maintainers' email - Add polling IO mode and document update - Fix memory leak and some bug detected by static code analysis tools - Code refactoring - Series of floppy cleanups/fixes (Denis) - s390 dasd fixes (Julian) - kerneldoc fixes (Lee) - null_blk double free (Lv) - null_blk virtual boundary addition (Max) - Remove xsysace driver (Michal) - umem driver removal (Davidlohr) - ataflop fixes (Dan) - Revalidate disk removal (Christoph) - Bounce buffer cleanups (Christoph) - Mark lightnvm as deprecated (Christoph) - mtip32xx init cleanups (Shixin) - Various fixes (Tian, Gustavo, Coly, Yang, Zhang, Zhiqiang) * tag 'for-5.13/drivers-2021-04-27' of git://git.kernel.dk/linux-block: (143 commits) async_xor: increase src_offs when dropping destination page drivers/block/null_blk/main: Fix a double free in null_init. md/raid1: properly indicate failure when ending a failed write request md-cluster: fix use-after-free issue when removing rdev nvme: introduce generic per-namespace chardev nvme: cleanup nvme_configure_apst nvme: do not try to reconfigure APST when the controller is not live nvme: add 'kato' sysfs attribute nvme: sanitize KATO setting nvmet: avoid queuing keep-alive timer if it is disabled brd: expose number of allocated pages in debugfs ataflop: fix off by one in ataflop_probe() ataflop: potential out of bounds in do_format() drbd: Fix fall-through warnings for Clang block/rnbd: Use strscpy instead of strlcpy block/rnbd-clt-sysfs: Remove copy buffer overlap in rnbd_clt_get_path_name block/rnbd-clt: Remove max_segment_size block/rnbd-clt: Generate kobject_uevent when the rnbd device state changes block/rnbd-srv: Remove unused arguments of rnbd_srv_rdma_ev Documentation/ABI/rnbd-clt: Add description for nr_poll_queues ...
Diffstat (limited to 'drivers/nvme/target/tcp.c')
-rw-r--r--drivers/nvme/target/tcp.c79
1 files changed, 67 insertions, 12 deletions
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index d658c6e8263a..f9f34f6caf5e 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -29,6 +29,16 @@ static int so_priority;
module_param(so_priority, int, 0644);
MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority");
+/* Define a time period (in usecs) that io_work() shall sample an activated
+ * queue before determining it to be idle. This optional module behavior
+ * can enable NIC solutions that support socket optimized packet processing
+ * using advanced interrupt moderation techniques.
+ */
+static int idle_poll_period_usecs;
+module_param(idle_poll_period_usecs, int, 0644);
+MODULE_PARM_DESC(idle_poll_period_usecs,
+ "nvmet tcp io_work poll till idle time period in usecs");
+
#define NVMET_TCP_RECV_BUDGET 8
#define NVMET_TCP_SEND_BUDGET 8
#define NVMET_TCP_IO_WORK_BUDGET 64
@@ -119,6 +129,8 @@ struct nvmet_tcp_queue {
struct ahash_request *snd_hash;
struct ahash_request *rcv_hash;
+ unsigned long poll_end;
+
spinlock_t state_lock;
enum nvmet_tcp_queue_state state;
@@ -525,11 +537,36 @@ static void nvmet_tcp_queue_response(struct nvmet_req *req)
struct nvmet_tcp_cmd *cmd =
container_of(req, struct nvmet_tcp_cmd, req);
struct nvmet_tcp_queue *queue = cmd->queue;
+ struct nvme_sgl_desc *sgl;
+ u32 len;
+
+ if (unlikely(cmd == queue->cmd)) {
+ sgl = &cmd->req.cmd->common.dptr.sgl;
+ len = le32_to_cpu(sgl->length);
+
+ /*
+ * Wait for inline data before processing the response.
+ * Avoid using helpers, this might happen before
+ * nvmet_req_init is completed.
+ */
+ if (queue->rcv_state == NVMET_TCP_RECV_PDU &&
+ len && len < cmd->req.port->inline_data_size &&
+ nvme_is_write(cmd->req.cmd))
+ return;
+ }
llist_add(&cmd->lentry, &queue->resp_list);
queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &cmd->queue->io_work);
}
+static void nvmet_tcp_execute_request(struct nvmet_tcp_cmd *cmd)
+{
+ if (unlikely(cmd->flags & NVMET_TCP_F_INIT_FAILED))
+ nvmet_tcp_queue_response(&cmd->req);
+ else
+ cmd->req.execute(&cmd->req);
+}
+
static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd)
{
u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue);
@@ -961,7 +998,7 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
le32_to_cpu(req->cmd->common.dptr.sgl.length));
nvmet_tcp_handle_req_failure(queue, queue->cmd, req);
- return -EAGAIN;
+ return 0;
}
ret = nvmet_tcp_map_data(queue->cmd);
@@ -1104,10 +1141,8 @@ static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue)
return 0;
}
- if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) &&
- cmd->rbytes_done == cmd->req.transfer_len) {
- cmd->req.execute(&cmd->req);
- }
+ if (cmd->rbytes_done == cmd->req.transfer_len)
+ nvmet_tcp_execute_request(cmd);
nvmet_prepare_receive_pdu(queue);
return 0;
@@ -1144,9 +1179,9 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue)
goto out;
}
- if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) &&
- cmd->rbytes_done == cmd->req.transfer_len)
- cmd->req.execute(&cmd->req);
+ if (cmd->rbytes_done == cmd->req.transfer_len)
+ nvmet_tcp_execute_request(cmd);
+
ret = 0;
out:
nvmet_prepare_receive_pdu(queue);
@@ -1216,6 +1251,23 @@ static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue)
spin_unlock(&queue->state_lock);
}
+static inline void nvmet_tcp_arm_queue_deadline(struct nvmet_tcp_queue *queue)
+{
+ queue->poll_end = jiffies + usecs_to_jiffies(idle_poll_period_usecs);
+}
+
+static bool nvmet_tcp_check_queue_deadline(struct nvmet_tcp_queue *queue,
+ int ops)
+{
+ if (!idle_poll_period_usecs)
+ return false;
+
+ if (ops)
+ nvmet_tcp_arm_queue_deadline(queue);
+
+ return !time_after(jiffies, queue->poll_end);
+}
+
static void nvmet_tcp_io_work(struct work_struct *w)
{
struct nvmet_tcp_queue *queue =
@@ -1241,9 +1293,10 @@ static void nvmet_tcp_io_work(struct work_struct *w)
} while (pending && ops < NVMET_TCP_IO_WORK_BUDGET);
/*
- * We exahusted our budget, requeue our selves
+ * Requeue the worker if idle deadline period is in progress or any
+ * ops activity was recorded during the do-while loop above.
*/
- if (pending)
+ if (nvmet_tcp_check_queue_deadline(queue, ops) || pending)
queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
}
@@ -1434,7 +1487,7 @@ static void nvmet_tcp_state_change(struct sock *sk)
{
struct nvmet_tcp_queue *queue;
- write_lock_bh(&sk->sk_callback_lock);
+ read_lock_bh(&sk->sk_callback_lock);
queue = sk->sk_user_data;
if (!queue)
goto done;
@@ -1452,7 +1505,7 @@ static void nvmet_tcp_state_change(struct sock *sk)
queue->idx, sk->sk_state);
}
done:
- write_unlock_bh(&sk->sk_callback_lock);
+ read_unlock_bh(&sk->sk_callback_lock);
}
static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
@@ -1501,6 +1554,8 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
sock->sk->sk_state_change = nvmet_tcp_state_change;
queue->write_space = sock->sk->sk_write_space;
sock->sk->sk_write_space = nvmet_tcp_write_space;
+ if (idle_poll_period_usecs)
+ nvmet_tcp_arm_queue_deadline(queue);
queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
}
write_unlock_bh(&sock->sk->sk_callback_lock);