summaryrefslogtreecommitdiffstats
path: root/net/smc/smc_core.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-10-15 18:42:13 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-10-15 18:42:13 -0700
commit9ff9b0d392ea08090cd1780fb196f36dbb586529 (patch)
tree276a3a5c4525b84dee64eda30b423fc31bf94850 /net/smc/smc_core.c
parent840e5bb326bbcb16ce82dd2416d2769de4839aea (diff)
parent105faa8742437c28815b2a3eb8314ebc5fd9288c (diff)
downloadlinux-9ff9b0d392ea08090cd1780fb196f36dbb586529.tar.bz2
Merge tag 'net-next-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: - Add redirect_neigh() BPF packet redirect helper, allowing to limit stack traversal in common container configs and improving TCP back-pressure. Daniel reports ~10Gbps => ~15Gbps single stream TCP performance gain. - Expand netlink policy support and improve policy export to user space. (Ge)netlink core performs request validation according to declared policies. Expand the expressiveness of those policies (min/max length and bitmasks). Allow dumping policies for particular commands. This is used for feature discovery by user space (instead of kernel version parsing or trial and error). - Support IGMPv3/MLDv2 multicast listener discovery protocols in bridge. - Allow more than 255 IPv4 multicast interfaces. - Add support for Type of Service (ToS) reflection in SYN/SYN-ACK packets of TCPv6. - In Multi-patch TCP (MPTCP) support concurrent transmission of data on multiple subflows in a load balancing scenario. Enhance advertising addresses via the RM_ADDR/ADD_ADDR options. - Support SMC-Dv2 version of SMC, which enables multi-subnet deployments. - Allow more calls to same peer in RxRPC. - Support two new Controller Area Network (CAN) protocols - CAN-FD and ISO 15765-2:2016. - Add xfrm/IPsec compat layer, solving the 32bit user space on 64bit kernel problem. - Add TC actions for implementing MPLS L2 VPNs. - Improve nexthop code - e.g. handle various corner cases when nexthop objects are removed from groups better, skip unnecessary notifications and make it easier to offload nexthops into HW by converting to a blocking notifier. - Support adding and consuming TCP header options by BPF programs, opening the doors for easy experimental and deployment-specific TCP option use. - Reorganize TCP congestion control (CC) initialization to simplify life of TCP CC implemented in BPF. - Add support for shipping BPF programs with the kernel and loading them early on boot via the User Mode Driver mechanism, hence reusing all the user space infra we have. - Support sleepable BPF programs, initially targeting LSM and tracing. - Add bpf_d_path() helper for returning full path for given 'struct path'. - Make bpf_tail_call compatible with bpf-to-bpf calls. - Allow BPF programs to call map_update_elem on sockmaps. - Add BPF Type Format (BTF) support for type and enum discovery, as well as support for using BTF within the kernel itself (current use is for pretty printing structures). - Support listing and getting information about bpf_links via the bpf syscall. - Enhance kernel interfaces around NIC firmware update. Allow specifying overwrite mask to control if settings etc. are reset during update; report expected max time operation may take to users; support firmware activation without machine reboot incl. limits of how much impact reset may have (e.g. dropping link or not). - Extend ethtool configuration interface to report IEEE-standard counters, to limit the need for per-vendor logic in user space. - Adopt or extend devlink use for debug, monitoring, fw update in many drivers (dsa loop, ice, ionic, sja1105, qed, mlxsw, mv88e6xxx, dpaa2-eth). - In mlxsw expose critical and emergency SFP module temperature alarms. Refactor port buffer handling to make the defaults more suitable and support setting these values explicitly via the DCBNL interface. - Add XDP support for Intel's igb driver. - Support offloading TC flower classification and filtering rules to mscc_ocelot switches. - Add PTP support for Marvell Octeontx2 and PP2.2 hardware, as well as fixed interval period pulse generator and one-step timestamping in dpaa-eth. - Add support for various auth offloads in WiFi APs, e.g. SAE (WPA3) offload. - Add Lynx PHY/PCS MDIO module, and convert various drivers which have this HW to use it. Convert mvpp2 to split PCS. - Support Marvell Prestera 98DX3255 24-port switch ASICs, as well as 7-port Mediatek MT7531 IP. - Add initial support for QCA6390 and IPQ6018 in ath11k WiFi driver, and wcn3680 support in wcn36xx. - Improve performance for packets which don't require much offloads on recent Mellanox NICs by 20% by making multiple packets share a descriptor entry. - Move chelsio inline crypto drivers (for TLS and IPsec) from the crypto subtree to drivers/net. Move MDIO drivers out of the phy directory. - Clean up a lot of W=1 warnings, reportedly the actively developed subsections of networking drivers should now build W=1 warning free. - Make sure drivers don't use in_interrupt() to dynamically adapt their code. Convert tasklets to use new tasklet_setup API (sadly this conversion is not yet complete). * tag 'net-next-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2583 commits) Revert "bpfilter: Fix build error with CONFIG_BPFILTER_UMH" net, sockmap: Don't call bpf_prog_put() on NULL pointer bpf, selftest: Fix flaky tcp_hdr_options test when adding addr to lo bpf, sockmap: Add locking annotations to iterator netfilter: nftables: allow re-computing sctp CRC-32C in 'payload' statements net: fix pos incrementment in ipv6_route_seq_next net/smc: fix invalid return code in smcd_new_buf_create() net/smc: fix valid DMBE buffer sizes net/smc: fix use-after-free of delayed events bpfilter: Fix build error with CONFIG_BPFILTER_UMH cxgb4/ch_ipsec: Replace the module name to ch_ipsec from chcr net: sched: Fix suspicious RCU usage while accessing tcf_tunnel_info bpf: Fix register equivalence tracking. rxrpc: Fix loss of final ack on shutdown rxrpc: Fix bundle counting for exclusive connections netfilter: restore NF_INET_NUMHOOKS ibmveth: Identify ingress large send packets. ibmveth: Switch order of ibmveth_helper calls. cxgb4: handle 4-tuple PEDIT to NAT mode translation selftests: Add VRF route leaking tests ...
Diffstat (limited to 'net/smc/smc_core.c')
-rw-r--r--net/smc/smc_core.c82
1 files changed, 41 insertions, 41 deletions
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index a406627b1d55..d790c43c473f 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -34,7 +34,6 @@
#define SMC_LGR_NUM_INCR 256
#define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
#define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
-#define SMC_LGR_FREE_DELAY_FAST (8 * HZ)
static struct smc_lgr_list smc_lgr_list = { /* established link groups */
.lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
@@ -70,7 +69,7 @@ static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
* creation. For client use a somewhat higher removal delay time,
* otherwise there is a risk of out-of-sync link groups.
*/
- if (!lgr->freeing && !lgr->freefast) {
+ if (!lgr->freeing) {
mod_delayed_work(system_wq, &lgr->free_work,
(!lgr->is_smcd && lgr->role == SMC_CLNT) ?
SMC_LGR_FREE_DELAY_CLNT :
@@ -78,15 +77,6 @@ static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
}
}
-void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
-{
- if (!lgr->freeing && !lgr->freefast) {
- lgr->freefast = 1;
- mod_delayed_work(system_wq, &lgr->free_work,
- SMC_LGR_FREE_DELAY_FAST);
- }
-}
-
/* Register connection's alert token in our lookup structure.
* To use rbtrees we have to implement our own insert core.
* Requires @conns_lock
@@ -227,7 +217,7 @@ void smc_lgr_cleanup_early(struct smc_connection *conn)
if (!list_empty(lgr_list))
list_del_init(lgr_list);
spin_unlock_bh(lgr_lock);
- smc_lgr_schedule_free_work_fast(lgr);
+ __smc_lgr_terminate(lgr, true);
}
static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
@@ -385,7 +375,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
int i;
if (ini->is_smcd && ini->vlan_id) {
- if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
+ if (smc_ism_get_vlan(ini->ism_dev[ini->ism_selected],
+ ini->vlan_id)) {
rc = SMC_CLC_DECL_ISMVLANERR;
goto out;
}
@@ -396,10 +387,15 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
rc = SMC_CLC_DECL_MEM;
goto ism_put_vlan;
}
+ lgr->tx_wq = alloc_workqueue("smc_tx_wq-%*phN", 0, 0,
+ SMC_LGR_ID_SIZE, &lgr->id);
+ if (!lgr->tx_wq) {
+ rc = -ENOMEM;
+ goto free_lgr;
+ }
lgr->is_smcd = ini->is_smcd;
lgr->sync_err = 0;
lgr->terminating = 0;
- lgr->freefast = 0;
lgr->freeing = 0;
lgr->vlan_id = ini->vlan_id;
mutex_init(&lgr->sndbufs_lock);
@@ -417,13 +413,14 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->conns_all = RB_ROOT;
if (ini->is_smcd) {
/* SMC-D specific settings */
- get_device(&ini->ism_dev->dev);
- lgr->peer_gid = ini->ism_gid;
- lgr->smcd = ini->ism_dev;
- lgr_list = &ini->ism_dev->lgr_list;
+ get_device(&ini->ism_dev[ini->ism_selected]->dev);
+ lgr->peer_gid = ini->ism_peer_gid[ini->ism_selected];
+ lgr->smcd = ini->ism_dev[ini->ism_selected];
+ lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list;
lgr_lock = &lgr->smcd->lgr_lock;
+ lgr->smc_version = ini->smcd_version;
lgr->peer_shutdown = 0;
- atomic_inc(&ini->ism_dev->lgr_cnt);
+ atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt);
} else {
/* SMC-R specific settings */
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
@@ -437,7 +434,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lnk = &lgr->lnk[link_idx];
rc = smcr_link_init(lgr, lnk, link_idx, ini);
if (rc)
- goto free_lgr;
+ goto free_wq;
lgr_list = &smc_lgr_list.list;
lgr_lock = &smc_lgr_list.lock;
atomic_inc(&lgr_cnt);
@@ -448,11 +445,13 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
spin_unlock_bh(lgr_lock);
return 0;
+free_wq:
+ destroy_workqueue(lgr->tx_wq);
free_lgr:
kfree(lgr);
ism_put_vlan:
if (ini->is_smcd && ini->vlan_id)
- smc_ism_put_vlan(ini->ism_dev, ini->vlan_id);
+ smc_ism_put_vlan(ini->ism_dev[ini->ism_selected], ini->vlan_id);
out:
if (rc < 0) {
if (rc == -ENOMEM)
@@ -517,7 +516,7 @@ static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
smc->sk.sk_state != SMC_CLOSED) {
rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf);
if (!rc) {
- schedule_delayed_work(&conn->tx_work, 0);
+ queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work, 0);
smc->sk.sk_data_ready(&smc->sk);
}
} else {
@@ -824,11 +823,10 @@ static void smc_lgr_free(struct smc_link_group *lgr)
}
smc_lgr_free_bufs(lgr);
+ destroy_workqueue(lgr->tx_wq);
if (lgr->is_smcd) {
- if (!lgr->terminating) {
- smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
- put_device(&lgr->smcd->dev);
- }
+ smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
+ put_device(&lgr->smcd->dev);
if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
wake_up(&lgr->smcd->lgrs_deleted);
} else {
@@ -889,8 +887,6 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr)
if (lgr->is_smcd) {
smc_ism_signal_shutdown(lgr);
smcd_unregister_all_dmbs(lgr);
- smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
- put_device(&lgr->smcd->dev);
} else {
u32 rsn = lgr->llc_termination_rsn;
@@ -1294,11 +1290,13 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
spinlock_t *lgr_lock;
int rc = 0;
- lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list;
- lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock;
- ini->cln_first_contact = SMC_FIRST_CONTACT;
+ lgr_list = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_list :
+ &smc_lgr_list.list;
+ lgr_lock = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_lock :
+ &smc_lgr_list.lock;
+ ini->first_contact_local = 1;
role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
- if (role == SMC_CLNT && ini->srv_first_contact)
+ if (role == SMC_CLNT && ini->first_contact_peer)
/* create new link group as well */
goto create;
@@ -1307,14 +1305,15 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
list_for_each_entry(lgr, lgr_list, list) {
write_lock_bh(&lgr->conns_lock);
if ((ini->is_smcd ?
- smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
+ smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected],
+ ini->ism_peer_gid[ini->ism_selected]) :
smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
!lgr->sync_err &&
lgr->vlan_id == ini->vlan_id &&
(role == SMC_CLNT || ini->is_smcd ||
lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
/* link group found */
- ini->cln_first_contact = SMC_REUSE_CONTACT;
+ ini->first_contact_local = 0;
conn->lgr = lgr;
rc = smc_lgr_register_conn(conn, false);
write_unlock_bh(&lgr->conns_lock);
@@ -1328,8 +1327,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
if (rc)
return rc;
- if (role == SMC_CLNT && !ini->srv_first_contact &&
- ini->cln_first_contact == SMC_FIRST_CONTACT) {
+ if (role == SMC_CLNT && !ini->first_contact_peer &&
+ ini->first_contact_local) {
/* Server reuses a link group, but Client wants to start
* a new one
* send out_of_sync decline, reason synchr. error
@@ -1338,7 +1337,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
}
create:
- if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
+ if (ini->first_contact_local) {
rc = smc_lgr_create(smc, ini);
if (rc)
goto out;
@@ -1597,7 +1596,7 @@ out:
return rc;
}
-#define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
+#define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
bool is_dmb, int bufsize)
@@ -1616,7 +1615,8 @@ static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
if (rc) {
kfree(buf_desc);
- return (rc == -ENOMEM) ? ERR_PTR(-EAGAIN) : ERR_PTR(rc);
+ return (rc == -ENOMEM) ? ERR_PTR(-EAGAIN) :
+ ERR_PTR(-EIO);
}
buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
/* CDC header stored in buf. So, pretend it was smaller */
@@ -1892,8 +1892,8 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn,
struct smc_link *lnk,
struct smc_clc_msg_accept_confirm *clc)
{
- conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr,
- clc->rmb_rkey);
+ conn->rtoken_idx = smc_rtoken_add(lnk, clc->r0.rmb_dma_addr,
+ clc->r0.rmb_rkey);
if (conn->rtoken_idx < 0)
return conn->rtoken_idx;
return 0;