summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-06-29 17:18:40 -0700
committerDavid S. Miller <davem@davemloft.net>2020-06-29 17:18:40 -0700
commit10780291724bc11da5ed9a83be7f755e4ab3c554 (patch)
treed023bc13e86ce2de8b7ab2ba9780f2127162e2e4 /include
parent989d957a8b3e4442006d9ab68d0215718f57ec56 (diff)
parenta29074367b347af9e19d36522f7ad9a7db4b9c28 (diff)
downloadlinux-10780291724bc11da5ed9a83be7f755e4ab3c554.tar.bz2
Merge tag 'mlx5-tls-2020-06-26' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux
Saeed Mahameed says: ==================== mlx5-tls-2020-06-26 1) Improve hardware layouts and structure for kTLS support 2) Generalize ICOSQ (Internal Channel Operations Send Queue) Due to the asynchronous nature of adding new kTLS flows and handling HW asynchronous kTLS resync requests, the XSK ICOSQ was extended to support generic async operations, such as kTLS add flow and resync, in addition to the existing XSK usages. 3) kTLS hardware flow steering and classification: The driver already has the means to classify TCP ipv4/6 flows to send them to the corresponding RSS HW engine, as reflected in patches 3 through 5, the series will add a steering layer that will hook to the driver's TCP classifiers and will match on well known kTLS connection, in case of a match traffic will be redirected to the kTLS decryption engine, otherwise traffic will continue flowing normally to the TCP RSS engine. 3) kTLS add flow RX HW offload support New offload contexts post their static/progress params WQEs (Work Queue Element) to communicate the newly added kTLS contexts over the per-channel async ICOSQ. The Channel/RQ is selected according to the socket's rxq index. A new TLS-RX workqueue is used to allow asynchronous addition of steering rules, out of the NAPI context. It will be also used in a downstream patch in the resync procedure. Feature is OFF by default. Can be turned on by: $ ethtool -K <if> tls-hw-rx-offload on 4) Added mlx5 kTLS sw stats and new counters are documented in Documentation/networking/tls-offload.rst rx_tls_ctx - number of TLS RX HW offload contexts added to device for decryption. rx_tls_ooo - number of RX packets which were part of a TLS stream but did not arrive in the expected order and triggered the resync procedure. rx_tls_del - number of TLS RX HW offload contexts deleted from device (connection has finished). rx_tls_err - number of RX packets which were part of a TLS stream but were not decrypted due to unexpected error in the state machine. 5) Asynchronous RX resync a. The NIC driver indicates that it would like to resync on some TLS record within the received packet (P), but the driver does not know (yet) which of the TLS records within the packet. At this stage, the NIC driver will query the device to find the exact TCP sequence for resync (tcpsn), however, the driver does not wait for the device to provide the response. b. Eventually, the device responds, and the driver provides the tcpsn within the resync packet to KTLS. Now, KTLS can check the tcpsn against any processed TLS records within packet P, and also against any record that is processed in the future within packet P. The asynchronous resync path simplifies the device driver, as it can save bits on the packet completion (32-bit TCP sequence), and pass this information on an asynchronous command instead. Performance: CPU: Intel(R) Xeon(R) CPU E5-2687W v4 @ 3.00GHz, 24 cores, HT off NIC: ConnectX-6 Dx 100GbE dual port Goodput (app-layer throughput) comparison: +---------------+-------+-------+---------+ | # connections | 1 | 4 | 8 | +---------------+-------+-------+---------+ | SW (Gbps) | 7.26 | 24.70 | 50.30 | +---------------+-------+-------+---------+ | HW (Gbps) | 18.50 | 64.30 | 92.90 | +---------------+-------+-------+---------+ | Speedup | 2.55x | 2.56x | 1.85x * | +---------------+-------+-------+---------+ * After linerate is reached, diff is observed in CPU util ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/mlx5/cq.h1
-rw-r--r--include/linux/mlx5/device.h9
-rw-r--r--include/linux/mlx5/mlx5_ifc.h5
-rw-r--r--include/linux/mlx5/qp.h2
-rw-r--r--include/linux/mlx5/rsc_dump.h51
-rw-r--r--include/net/tls.h34
6 files changed, 92 insertions, 10 deletions
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index b5a9399e07ee..7bfb67363434 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -33,7 +33,6 @@
#ifndef MLX5_CORE_CQ_H
#define MLX5_CORE_CQ_H
-#include <rdma/ib_verbs.h>
#include <linux/mlx5/driver.h>
#include <linux/refcount.h>
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 1bc27aca648b..57db125e5802 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -458,6 +458,15 @@ enum {
MLX5_OPC_MOD_TLS_TIR_PROGRESS_PARAMS = 0x2,
};
+struct mlx5_wqe_tls_static_params_seg {
+ u8 ctx[MLX5_ST_SZ_BYTES(tls_static_params)];
+};
+
+struct mlx5_wqe_tls_progress_params_seg {
+ __be32 tis_tir_num;
+ u8 ctx[MLX5_ST_SZ_BYTES(tls_progress_params)];
+};
+
enum {
MLX5_SET_PORT_RESET_QKEY = 0,
MLX5_SET_PORT_GUID0 = 16,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index ca1887dd0423..516fa16f4ed3 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -10640,16 +10640,13 @@ struct mlx5_ifc_tls_static_params_bits {
};
struct mlx5_ifc_tls_progress_params_bits {
- u8 reserved_at_0[0x8];
- u8 tisn[0x18];
-
u8 next_record_tcp_sn[0x20];
u8 hw_resync_tcp_sn[0x20];
u8 record_tracker_state[0x2];
u8 auth_state[0x2];
- u8 reserved_at_64[0x4];
+ u8 reserved_at_44[0x4];
u8 hw_offset_record_number[0x18];
};
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index b8992b861ae6..36492a1342cf 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -209,7 +209,7 @@ struct mlx5_wqe_ctrl_seg {
__be32 general_id;
__be32 imm;
__be32 umr_mkey;
- __be32 tisn;
+ __be32 tis_tir_num;
};
};
diff --git a/include/linux/mlx5/rsc_dump.h b/include/linux/mlx5/rsc_dump.h
new file mode 100644
index 000000000000..d11c0b228620
--- /dev/null
+++ b/include/linux/mlx5/rsc_dump.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies inc. */
+
+#include <linux/mlx5/driver.h>
+
+#ifndef __MLX5_RSC_DUMP
+#define __MLX5_RSC_DUMP
+
+enum mlx5_sgmt_type {
+ MLX5_SGMT_TYPE_HW_CQPC,
+ MLX5_SGMT_TYPE_HW_SQPC,
+ MLX5_SGMT_TYPE_HW_RQPC,
+ MLX5_SGMT_TYPE_FULL_SRQC,
+ MLX5_SGMT_TYPE_FULL_CQC,
+ MLX5_SGMT_TYPE_FULL_EQC,
+ MLX5_SGMT_TYPE_FULL_QPC,
+ MLX5_SGMT_TYPE_SND_BUFF,
+ MLX5_SGMT_TYPE_RCV_BUFF,
+ MLX5_SGMT_TYPE_SRQ_BUFF,
+ MLX5_SGMT_TYPE_CQ_BUFF,
+ MLX5_SGMT_TYPE_EQ_BUFF,
+ MLX5_SGMT_TYPE_SX_SLICE,
+ MLX5_SGMT_TYPE_SX_SLICE_ALL,
+ MLX5_SGMT_TYPE_RDB,
+ MLX5_SGMT_TYPE_RX_SLICE_ALL,
+ MLX5_SGMT_TYPE_PRM_QUERY_QP,
+ MLX5_SGMT_TYPE_PRM_QUERY_CQ,
+ MLX5_SGMT_TYPE_PRM_QUERY_MKEY,
+ MLX5_SGMT_TYPE_MENU,
+ MLX5_SGMT_TYPE_TERMINATE,
+
+ MLX5_SGMT_TYPE_NUM, /* Keep last */
+};
+
+struct mlx5_rsc_key {
+ enum mlx5_sgmt_type rsc;
+ int index1;
+ int index2;
+ int num_of_obj1;
+ int num_of_obj2;
+ int size;
+};
+
+struct mlx5_rsc_dump_cmd;
+
+struct mlx5_rsc_dump_cmd *mlx5_rsc_dump_cmd_create(struct mlx5_core_dev *dev,
+ struct mlx5_rsc_key *key);
+void mlx5_rsc_dump_cmd_destroy(struct mlx5_rsc_dump_cmd *cmd);
+int mlx5_rsc_dump_next(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd,
+ struct page *page, int *size);
+#endif /* __MLX5_RSC_DUMP */
diff --git a/include/net/tls.h b/include/net/tls.h
index 3212d3c214a9..c875c0a445a6 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -291,11 +291,19 @@ struct tlsdev_ops {
enum tls_offload_sync_type {
TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ = 0,
TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT = 1,
+ TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ_ASYNC = 2,
};
#define TLS_DEVICE_RESYNC_NH_START_IVAL 2
#define TLS_DEVICE_RESYNC_NH_MAX_IVAL 128
+#define TLS_DEVICE_RESYNC_ASYNC_LOGMAX 13
+struct tls_offload_resync_async {
+ atomic64_t req;
+ u32 loglen;
+ u32 log[TLS_DEVICE_RESYNC_ASYNC_LOGMAX];
+};
+
struct tls_offload_context_rx {
/* sw must be the first member of tls_offload_context_rx */
struct tls_sw_context_rx sw;
@@ -314,6 +322,10 @@ struct tls_offload_context_rx {
u32 decrypted_failed;
u32 decrypted_tgt;
} resync_nh;
+ /* TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ_ASYNC */
+ struct {
+ struct tls_offload_resync_async *resync_async;
+ };
};
u8 driver_state[] __aligned(8);
/* The TLS layer reserves room for driver specific state
@@ -606,9 +618,9 @@ tls_driver_ctx(const struct sock *sk, enum tls_offload_ctx_dir direction)
}
#endif
+#define RESYNC_REQ BIT(0)
+#define RESYNC_REQ_ASYNC BIT(1)
/* The TLS context is valid until sk_destruct is called */
-#define RESYNC_REQ (1 << 0)
-#define RESYNC_REQ_FORCE (1 << 1)
static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
@@ -617,12 +629,26 @@ static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq)
atomic64_set(&rx_ctx->resync_req, ((u64)ntohl(seq) << 32) | RESYNC_REQ);
}
-static inline void tls_offload_rx_force_resync_request(struct sock *sk)
+/* Log all TLS record header TCP sequences in [seq, seq+len] */
+static inline void
+tls_offload_rx_resync_async_request_start(struct sock *sk, __be32 seq, u16 len)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx);
+
+ atomic64_set(&rx_ctx->resync_async->req, ((u64)ntohl(seq) << 32) |
+ (len << 16) | RESYNC_REQ | RESYNC_REQ_ASYNC);
+ rx_ctx->resync_async->loglen = 0;
+}
+
+static inline void
+tls_offload_rx_resync_async_request_end(struct sock *sk, __be32 seq)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx);
- atomic64_set(&rx_ctx->resync_req, RESYNC_REQ | RESYNC_REQ_FORCE);
+ atomic64_set(&rx_ctx->resync_async->req,
+ ((u64)ntohl(seq) << 32) | RESYNC_REQ);
}
static inline void