summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-05-04 10:54:40 -0700
committerDavid S. Miller <davem@davemloft.net>2020-05-04 10:54:40 -0700
commit627642f07b3093f501495d226c7a0b9d56a0c870 (patch)
tree491c9b0129cec2e31bfb5e28c1f5baf9791c4cfa
parentfc99584e9446976c1e438c2cb368010b844d0b4f (diff)
parent649758fff327eeb184713db8b0b0ebfa28693077 (diff)
downloadlinux-627642f07b3093f501495d226c7a0b9d56a0c870.tar.bz2
Merge branch 'net-smc-add-failover-processing'
Karsten Graul says: ==================== net/smc: add failover processing This patch series adds the actual SMC-R link failover processing and improved link group termination. There will be one more (very small) series after this which will complete the SMC-R link failover support. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/smc/af_smc.c2
-rw-r--r--net/smc/smc.h6
-rw-r--r--net/smc/smc_cdc.c86
-rw-r--r--net/smc/smc_cdc.h2
-rw-r--r--net/smc/smc_core.c303
-rw-r--r--net/smc/smc_core.h14
-rw-r--r--net/smc/smc_llc.c111
-rw-r--r--net/smc/smc_llc.h12
-rw-r--r--net/smc/smc_tx.c12
-rw-r--r--net/smc/smc_wr.c39
-rw-r--r--net/smc/smc_wr.h2
11 files changed, 514 insertions, 75 deletions
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index c67272007f41..4e4421c95ca1 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -390,6 +390,7 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
}
+ smc_llc_save_peer_uid(qentry);
rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ);
smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
if (rc)
@@ -1056,6 +1057,7 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
}
+ smc_llc_save_peer_uid(qentry);
rc = smc_llc_eval_conf_link(qentry, SMC_LLC_RESP);
smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
if (rc)
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 1a084afa7372..6f1c42da7a4c 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -143,6 +143,9 @@ struct smc_connection {
* .prod cf. TCP snd_nxt
* .cons cf. TCP sends ack
*/
+ union smc_host_cursor local_tx_ctrl_fin;
+ /* prod crsr - confirmed by peer
+ */
union smc_host_cursor tx_curs_prep; /* tx - prepared data
* snd_max..wmem_alloc
*/
@@ -154,6 +157,7 @@ struct smc_connection {
*/
atomic_t sndbuf_space; /* remaining space in sndbuf */
u16 tx_cdc_seq; /* sequence # for CDC send */
+ u16 tx_cdc_seq_fin; /* sequence # - tx completed */
spinlock_t send_lock; /* protect wr_sends */
struct delayed_work tx_work; /* retry of smc_cdc_msg_send */
u32 tx_off; /* base offset in peer rmb */
@@ -184,12 +188,14 @@ struct smc_connection {
spinlock_t acurs_lock; /* protect cursors */
#endif
struct work_struct close_work; /* peer sent some closing */
+ struct work_struct abort_work; /* abort the connection */
struct tasklet_struct rx_tsklet; /* Receiver tasklet for SMC-D */
u8 rx_off; /* receive offset:
* 0 for SMC-R, 32 for SMC-D
*/
u64 peer_token; /* SMC-D token of peer */
u8 killed : 1; /* abnormal termination */
+ u8 out_of_sync : 1; /* out of sync with peer */
};
struct smc_sock { /* smc sock container */
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index f64589d823aa..b2b85e1be72c 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -47,17 +47,20 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
/* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */
smp_mb__after_atomic();
smc_curs_copy(&conn->tx_curs_fin, &cdcpend->cursor, conn);
+ smc_curs_copy(&conn->local_tx_ctrl_fin, &cdcpend->p_cursor,
+ conn);
+ conn->tx_cdc_seq_fin = cdcpend->ctrl_seq;
}
smc_tx_sndbuf_nonfull(smc);
bh_unlock_sock(&smc->sk);
}
int smc_cdc_get_free_slot(struct smc_connection *conn,
+ struct smc_link *link,
struct smc_wr_buf **wr_buf,
struct smc_rdma_wr **wr_rdma_buf,
struct smc_cdc_tx_pend **pend)
{
- struct smc_link *link = conn->lnk;
int rc;
rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
@@ -104,22 +107,64 @@ int smc_cdc_msg_send(struct smc_connection *conn,
if (!rc) {
smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn);
conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0;
+ } else {
+ conn->tx_cdc_seq--;
+ conn->local_tx_ctrl.seqno = conn->tx_cdc_seq;
}
return rc;
}
+/* send a validation msg indicating the move of a conn to an other QP link */
+int smcr_cdc_msg_send_validation(struct smc_connection *conn)
+{
+ struct smc_host_cdc_msg *local = &conn->local_tx_ctrl;
+ struct smc_link *link = conn->lnk;
+ struct smc_cdc_tx_pend *pend;
+ struct smc_wr_buf *wr_buf;
+ struct smc_cdc_msg *peer;
+ int rc;
+
+ rc = smc_cdc_get_free_slot(conn, link, &wr_buf, NULL, &pend);
+ if (rc)
+ return rc;
+
+ peer = (struct smc_cdc_msg *)wr_buf;
+ peer->common.type = local->common.type;
+ peer->len = local->len;
+ peer->seqno = htons(conn->tx_cdc_seq_fin); /* seqno last compl. tx */
+ peer->token = htonl(local->token);
+ peer->prod_flags.failover_validation = 1;
+
+ rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend);
+ return rc;
+}
+
static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn)
{
struct smc_cdc_tx_pend *pend;
struct smc_wr_buf *wr_buf;
+ struct smc_link *link;
+ bool again = false;
int rc;
- rc = smc_cdc_get_free_slot(conn, &wr_buf, NULL, &pend);
+again:
+ link = conn->lnk;
+ rc = smc_cdc_get_free_slot(conn, link, &wr_buf, NULL, &pend);
if (rc)
return rc;
spin_lock_bh(&conn->send_lock);
+ if (link != conn->lnk) {
+ /* link of connection changed, try again one time*/
+ spin_unlock_bh(&conn->send_lock);
+ smc_wr_tx_put_slot(link,
+ (struct smc_wr_tx_pend_priv *)pend);
+ if (again)
+ return -ENOLINK;
+ again = true;
+ goto again;
+ }
rc = smc_cdc_msg_send(conn, wr_buf, pend);
spin_unlock_bh(&conn->send_lock);
return rc;
@@ -237,6 +282,28 @@ static void smc_cdc_handle_urg_data_arrival(struct smc_sock *smc,
sk_send_sigurg(&smc->sk);
}
+static void smc_cdc_msg_validate(struct smc_sock *smc, struct smc_cdc_msg *cdc,
+ struct smc_link *link)
+{
+ struct smc_connection *conn = &smc->conn;
+ u16 recv_seq = ntohs(cdc->seqno);
+ s16 diff;
+
+ /* check that seqnum was seen before */
+ diff = conn->local_rx_ctrl.seqno - recv_seq;
+ if (diff < 0) { /* diff larger than 0x7fff */
+ /* drop connection */
+ conn->out_of_sync = 1; /* prevent any further receives */
+ spin_lock_bh(&conn->send_lock);
+ conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
+ conn->lnk = link;
+ spin_unlock_bh(&conn->send_lock);
+ sock_hold(&smc->sk); /* sock_put in abort_work */
+ if (!schedule_work(&conn->abort_work))
+ sock_put(&smc->sk);
+ }
+}
+
static void smc_cdc_msg_recv_action(struct smc_sock *smc,
struct smc_cdc_msg *cdc)
{
@@ -367,16 +434,19 @@ static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf)
read_lock_bh(&lgr->conns_lock);
conn = smc_lgr_find_conn(ntohl(cdc->token), lgr);
read_unlock_bh(&lgr->conns_lock);
- if (!conn)
+ if (!conn || conn->out_of_sync)
return;
smc = container_of(conn, struct smc_sock, conn);
- if (!cdc->prod_flags.failover_validation) {
- if (smc_cdc_before(ntohs(cdc->seqno),
- conn->local_rx_ctrl.seqno))
- /* received seqno is old */
- return;
+ if (cdc->prod_flags.failover_validation) {
+ smc_cdc_msg_validate(smc, cdc, link);
+ return;
}
+ if (smc_cdc_before(ntohs(cdc->seqno),
+ conn->local_rx_ctrl.seqno))
+ /* received seqno is old */
+ return;
+
smc_cdc_msg_recv(smc, cdc);
}
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 861dc24c588c..9cfabc9af120 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -304,6 +304,7 @@ struct smc_cdc_tx_pend {
};
int smc_cdc_get_free_slot(struct smc_connection *conn,
+ struct smc_link *link,
struct smc_wr_buf **wr_buf,
struct smc_rdma_wr **wr_rdma_buf,
struct smc_cdc_tx_pend **pend);
@@ -312,6 +313,7 @@ int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
struct smc_cdc_tx_pend *pend);
int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
int smcd_cdc_msg_send(struct smc_connection *conn);
+int smcr_cdc_msg_send_validation(struct smc_connection *conn);
int smc_cdc_init(void) __init;
void smcd_cdc_rx_init(struct smc_connection *conn);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 32a6cadc5c1f..fb5f685ff494 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -121,16 +121,60 @@ static void smc_lgr_add_alert_token(struct smc_connection *conn)
rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
}
+/* assign an SMC-R link to the connection */
+static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
+{
+ enum smc_link_state expected = first ? SMC_LNK_ACTIVATING :
+ SMC_LNK_ACTIVE;
+ int i, j;
+
+ /* do link balancing */
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ struct smc_link *lnk = &conn->lgr->lnk[i];
+
+ if (lnk->state != expected || lnk->link_is_asym)
+ continue;
+ if (conn->lgr->role == SMC_CLNT) {
+ conn->lnk = lnk; /* temporary, SMC server assigns link*/
+ break;
+ }
+ if (conn->lgr->conns_num % 2) {
+ for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
+ struct smc_link *lnk2;
+
+ lnk2 = &conn->lgr->lnk[j];
+ if (lnk2->state == expected &&
+ !lnk2->link_is_asym) {
+ conn->lnk = lnk2;
+ break;
+ }
+ }
+ }
+ if (!conn->lnk)
+ conn->lnk = lnk;
+ break;
+ }
+ if (!conn->lnk)
+ return SMC_CLC_DECL_NOACTLINK;
+ return 0;
+}
+
/* Register connection in link group by assigning an alert token
* registered in a search tree.
* Requires @conns_lock
* Note that '0' is a reserved value and not assigned.
*/
-static int smc_lgr_register_conn(struct smc_connection *conn)
+static int smc_lgr_register_conn(struct smc_connection *conn, bool first)
{
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
static atomic_t nexttoken = ATOMIC_INIT(0);
+ int rc;
+ if (!conn->lgr->is_smcd) {
+ rc = smcr_lgr_conn_assign_link(conn, first);
+ if (rc)
+ return rc;
+ }
/* find a new alert_token_local value not yet used by some connection
* in this link group
*/
@@ -141,22 +185,6 @@ static int smc_lgr_register_conn(struct smc_connection *conn)
conn->alert_token_local = 0;
}
smc_lgr_add_alert_token(conn);
-
- /* assign the new connection to a link */
- if (!conn->lgr->is_smcd) {
- struct smc_link *lnk;
- int i;
-
- /* tbd - link balancing */
- for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
- lnk = &conn->lgr->lnk[i];
- if (lnk->state == SMC_LNK_ACTIVATING ||
- lnk->state == SMC_LNK_ACTIVE)
- conn->lnk = lnk;
- }
- if (!conn->lnk)
- return SMC_CLC_DECL_NOACTLINK;
- }
conn->lgr->conns_num++;
return 0;
}
@@ -209,6 +237,19 @@ void smc_lgr_cleanup_early(struct smc_connection *conn)
smc_lgr_schedule_free_work_fast(lgr);
}
+static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
+{
+ int i;
+
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ struct smc_link *lnk = &lgr->lnk[i];
+
+ if (smc_link_usable(lnk))
+ lnk->state = SMC_LNK_INACTIVE;
+ }
+ wake_up_interruptible_all(&lgr->llc_waiter);
+}
+
static void smc_lgr_free(struct smc_link_group *lgr);
static void smc_lgr_free_work(struct work_struct *work)
@@ -218,7 +259,6 @@ static void smc_lgr_free_work(struct work_struct *work)
free_work);
spinlock_t *lgr_lock;
bool conns;
- int i;
smc_lgr_list_head(lgr, &lgr_lock);
spin_lock_bh(lgr_lock);
@@ -238,17 +278,13 @@ static void smc_lgr_free_work(struct work_struct *work)
spin_unlock_bh(lgr_lock);
cancel_delayed_work(&lgr->free_work);
+ if (!lgr->is_smcd && !lgr->terminating)
+ smc_llc_send_link_delete_all(lgr, true,
+ SMC_LLC_DEL_PROG_INIT_TERM);
if (lgr->is_smcd && !lgr->terminating)
smc_ism_signal_shutdown(lgr);
- if (!lgr->is_smcd) {
- for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
- struct smc_link *lnk = &lgr->lnk[i];
-
- if (smc_link_usable(lnk))
- lnk->state = SMC_LNK_INACTIVE;
- }
- wake_up_interruptible_all(&lgr->llc_waiter);
- }
+ if (!lgr->is_smcd)
+ smcr_lgr_link_deactivate_all(lgr);
smc_lgr_free(lgr);
}
@@ -295,6 +331,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
lnk->smcibdev = ini->ib_dev;
lnk->ibport = ini->ib_port;
lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
+ smc_llc_link_set_uid(lnk);
INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
if (!ini->ib_dev->initialized) {
rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
@@ -432,6 +469,135 @@ out:
return rc;
}
+static int smc_write_space(struct smc_connection *conn)
+{
+ int buffer_len = conn->peer_rmbe_size;
+ union smc_host_cursor prod;
+ union smc_host_cursor cons;
+ int space;
+
+ smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
+ smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
+ /* determine rx_buf space */
+ space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod);
+ return space;
+}
+
+static int smc_switch_cursor(struct smc_sock *smc)
+{
+ struct smc_connection *conn = &smc->conn;
+ union smc_host_cursor cons, fin;
+ int rc = 0;
+ int diff;
+
+ smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn);
+ smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn);
+ /* set prod cursor to old state, enforce tx_rdma_writes() */
+ smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn);
+ smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
+
+ if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) {
+ /* cons cursor advanced more than fin, and prod was set
+ * fin above, so now prod is smaller than cons. Fix that.
+ */
+ diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons);
+ smc_curs_add(conn->sndbuf_desc->len,
+ &conn->tx_curs_sent, diff);
+ smc_curs_add(conn->sndbuf_desc->len,
+ &conn->tx_curs_fin, diff);
+
+ smp_mb__before_atomic();
+ atomic_add(diff, &conn->sndbuf_space);
+ smp_mb__after_atomic();
+
+ smc_curs_add(conn->peer_rmbe_size,
+ &conn->local_tx_ctrl.prod, diff);
+ smc_curs_add(conn->peer_rmbe_size,
+ &conn->local_tx_ctrl_fin, diff);
+ }
+ /* recalculate, value is used by tx_rdma_writes() */
+ atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn));
+
+ if (smc->sk.sk_state != SMC_INIT &&
+ smc->sk.sk_state != SMC_CLOSED) {
+ rc = smcr_cdc_msg_send_validation(conn);
+ if (!rc) {
+ schedule_delayed_work(&conn->tx_work, 0);
+ smc->sk.sk_data_ready(&smc->sk);
+ }
+ }
+ return rc;
+}
+
+struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
+ struct smc_link *from_lnk, bool is_dev_err)
+{
+ struct smc_link *to_lnk = NULL;
+ struct smc_connection *conn;
+ struct smc_sock *smc;
+ struct rb_node *node;
+ int i, rc = 0;
+
+ /* link is inactive, wake up tx waiters */
+ smc_wr_wakeup_tx_wait(from_lnk);
+
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (lgr->lnk[i].state != SMC_LNK_ACTIVE ||
+ i == from_lnk->link_idx)
+ continue;
+ if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev &&
+ from_lnk->ibport == lgr->lnk[i].ibport) {
+ continue;
+ }
+ to_lnk = &lgr->lnk[i];
+ break;
+ }
+ if (!to_lnk) {
+ smc_lgr_terminate_sched(lgr);
+ return NULL;
+ }
+again:
+ read_lock_bh(&lgr->conns_lock);
+ for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) {
+ conn = rb_entry(node, struct smc_connection, alert_node);
+ if (conn->lnk != from_lnk)
+ continue;
+ smc = container_of(conn, struct smc_sock, conn);
+ /* conn->lnk not yet set in SMC_INIT state */
+ if (smc->sk.sk_state == SMC_INIT)
+ continue;
+ if (smc->sk.sk_state == SMC_CLOSED ||
+ smc->sk.sk_state == SMC_PEERCLOSEWAIT1 ||
+ smc->sk.sk_state == SMC_PEERCLOSEWAIT2 ||
+ smc->sk.sk_state == SMC_APPFINCLOSEWAIT ||
+ smc->sk.sk_state == SMC_APPCLOSEWAIT1 ||
+ smc->sk.sk_state == SMC_APPCLOSEWAIT2 ||
+ smc->sk.sk_state == SMC_PEERFINCLOSEWAIT ||
+ smc->sk.sk_state == SMC_PEERABORTWAIT ||
+ smc->sk.sk_state == SMC_PROCESSABORT) {
+ spin_lock_bh(&conn->send_lock);
+ conn->lnk = to_lnk;
+ spin_unlock_bh(&conn->send_lock);
+ continue;
+ }
+ sock_hold(&smc->sk);
+ read_unlock_bh(&lgr->conns_lock);
+ /* avoid race with smcr_tx_sndbuf_nonempty() */
+ spin_lock_bh(&conn->send_lock);
+ conn->lnk = to_lnk;
+ rc = smc_switch_cursor(smc);
+ spin_unlock_bh(&conn->send_lock);
+ sock_put(&smc->sk);
+ if (rc) {
+ smcr_link_down_cond_sched(to_lnk);
+ return NULL;
+ }
+ goto again;
+ }
+ read_unlock_bh(&lgr->conns_lock);
+ return to_lnk;
+}
+
static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
struct smc_link_group *lgr)
{
@@ -486,6 +652,8 @@ void smc_conn_free(struct smc_connection *conn)
tasklet_kill(&conn->rx_tsklet);
} else {
smc_cdc_tx_dismiss_slots(conn);
+ if (current_work() != &conn->abort_work)
+ cancel_work_sync(&conn->abort_work);
}
if (!list_empty(&lgr->list)) {
smc_lgr_unregister_conn(conn);
@@ -640,6 +808,16 @@ static void smc_lgr_free(struct smc_link_group *lgr)
{
int i;
+ if (!lgr->is_smcd) {
+ mutex_lock(&lgr->llc_conf_mutex);
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (lgr->lnk[i].state != SMC_LNK_UNUSED)
+ smcr_link_clear(&lgr->lnk[i]);
+ }
+ mutex_unlock(&lgr->llc_conf_mutex);
+ smc_llc_lgr_clear(lgr);
+ }
+
smc_lgr_free_bufs(lgr);
if (lgr->is_smcd) {
if (!lgr->terminating) {
@@ -649,11 +827,6 @@ static void smc_lgr_free(struct smc_link_group *lgr)
if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
wake_up(&lgr->smcd->lgrs_deleted);
} else {
- for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
- if (lgr->lnk[i].state != SMC_LNK_UNUSED)
- smcr_link_clear(&lgr->lnk[i]);
- }
- smc_llc_lgr_clear(lgr);
if (!atomic_dec_return(&lgr_cnt))
wake_up(&lgrs_deleted);
}
@@ -708,21 +881,18 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft)
static void smc_lgr_cleanup(struct smc_link_group *lgr)
{
- int i;
-
if (lgr->is_smcd) {
smc_ism_signal_shutdown(lgr);
smcd_unregister_all_dmbs(lgr);
smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
put_device(&lgr->smcd->dev);
} else {
- for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
- struct smc_link *lnk = &lgr->lnk[i];
+ u32 rsn = lgr->llc_termination_rsn;
- if (smc_link_usable(lnk))
- lnk->state = SMC_LNK_INACTIVE;
- }
- wake_up_interruptible_all(&lgr->llc_waiter);
+ if (!rsn)
+ rsn = SMC_LLC_DEL_PROG_INIT_TERM;
+ smc_llc_send_link_delete_all(lgr, false, rsn);
+ smcr_lgr_link_deactivate_all(lgr);
}
}
@@ -738,8 +908,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
if (lgr->terminating)
return; /* lgr already terminating */
- if (!soft)
- cancel_delayed_work_sync(&lgr->free_work);
+ /* cancel free_work sync, will terminate when lgr->freeing is set */
+ cancel_delayed_work_sync(&lgr->free_work);
lgr->terminating = 1;
/* kill remaining link group connections */
@@ -759,10 +929,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
}
read_unlock_bh(&lgr->conns_lock);
smc_lgr_cleanup(lgr);
- if (soft)
- smc_lgr_schedule_free_work_fast(lgr);
- else
- smc_lgr_free(lgr);
+ smc_lgr_free(lgr);
}
/* unlink link group and schedule termination */
@@ -777,6 +944,7 @@ void smc_lgr_terminate_sched(struct smc_link_group *lgr)
return; /* lgr already terminating */
}
list_del_init(&lgr->list);
+ lgr->freeing = 1;
spin_unlock_bh(lgr_lock);
schedule_work(&lgr->terminate_work);
}
@@ -795,6 +963,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
if (peer_gid) /* peer triggered termination */
lgr->peer_shutdown = 1;
list_move(&lgr->list, &lgr_free_list);
+ lgr->freeing = 1;
}
}
spin_unlock_bh(&dev->lgr_lock);
@@ -854,6 +1023,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
list_del_init(&lgr->list);
+ smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM);
__smc_lgr_terminate(lgr, false);
}
@@ -867,6 +1037,37 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
}
}
+/* set new lgr type and clear all asymmetric link tagging */
+void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type)
+{
+ int i;
+
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
+ if (smc_link_usable(&lgr->lnk[i]))
+ lgr->lnk[i].link_is_asym = false;
+ lgr->type = new_type;
+}
+
+/* set new lgr type and tag a link as asymmetric */
+void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
+ enum smc_lgr_type new_type, int asym_lnk_idx)
+{
+ smcr_lgr_set_type(lgr, new_type);
+ lgr->lnk[asym_lnk_idx].link_is_asym = true;
+}
+
+/* abort connection, abort_work scheduled from tasklet context */
+static void smc_conn_abort_work(struct work_struct *work)
+{
+ struct smc_connection *conn = container_of(work,
+ struct smc_connection,
+ abort_work);
+ struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+
+ smc_conn_kill(conn, true);
+ sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
+}
+
/* link is up - establish alternate link if applicable */
static void smcr_link_up(struct smc_link_group *lgr,
struct smc_ib_device *smcibdev, u8 ibport)
@@ -943,13 +1144,12 @@ static void smcr_link_down(struct smc_link *lnk)
return;
smc_ib_modify_qp_reset(lnk);
- to_lnk = NULL;
- /* tbd: call to_lnk = smc_switch_conns(lgr, lnk, true); */
+ to_lnk = smc_switch_conns(lgr, lnk, true);
if (!to_lnk) { /* no backup link available */
smcr_link_clear(lnk);
return;
}
- lgr->type = SMC_LGR_SINGLE;
+ smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
del_link_id = lnk->link_id;
if (lgr->role == SMC_SERV) {
@@ -1138,7 +1338,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
/* link group found */
ini->cln_first_contact = SMC_REUSE_CONTACT;
conn->lgr = lgr;
- rc = smc_lgr_register_conn(conn); /* add conn to lgr */
+ rc = smc_lgr_register_conn(conn, false);
write_unlock_bh(&lgr->conns_lock);
if (!rc && delayed_work_pending(&lgr->free_work))
cancel_delayed_work(&lgr->free_work);
@@ -1166,7 +1366,7 @@ create:
goto out;
lgr = conn->lgr;
write_lock_bh(&lgr->conns_lock);
- rc = smc_lgr_register_conn(conn); /* add smc conn to lgr */
+ rc = smc_lgr_register_conn(conn, true);
write_unlock_bh(&lgr->conns_lock);
if (rc)
goto out;
@@ -1174,6 +1374,7 @@ create:
conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
conn->urg_state = SMC_URG_READ;
+ INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
if (ini->is_smcd) {
conn->rx_off = sizeof(struct smcd_cdc_msg);
smcd_cdc_rx_init(conn); /* init tasklet for this conn */
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 7fe53feb9dc4..4ae76802214f 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -70,6 +70,8 @@ struct smc_rdma_wr { /* work requests per message
struct ib_rdma_wr wr_tx_rdma[SMC_MAX_RDMA_WRITES];
};
+#define SMC_LGR_ID_SIZE 4
+
struct smc_link {
struct smc_ib_device *smcibdev; /* ib-device */
u8 ibport; /* port - values 1 | 2 */
@@ -85,6 +87,7 @@ struct smc_link {
struct smc_rdma_sges *wr_tx_rdma_sges;/*RDMA WRITE gather meta data*/
struct smc_rdma_wr *wr_tx_rdmas; /* WR RDMA WRITE */
struct smc_wr_tx_pend *wr_tx_pends; /* WR send waiting for CQE */
+ struct completion *wr_tx_compl; /* WR send CQE completion */
/* above four vectors have wr_tx_cnt elements and use the same index */
dma_addr_t wr_tx_dma_addr; /* DMA address of wr_tx_bufs */
atomic_long_t wr_tx_id; /* seq # of last sent WR */
@@ -115,7 +118,10 @@ struct smc_link {
u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */
u8 peer_gid[SMC_GID_SIZE]; /* gid of peer*/
u8 link_id; /* unique # within link group */
+ u8 link_uid[SMC_LGR_ID_SIZE]; /* unique lnk id */
+ u8 peer_link_uid[SMC_LGR_ID_SIZE]; /* peer uid */
u8 link_idx; /* index in lgr link array */
+ u8 link_is_asym; /* is link asymmetric? */
struct smc_link_group *lgr; /* parent link group */
struct work_struct link_down_wrk; /* wrk to bring link down */
@@ -176,7 +182,6 @@ struct smc_rtoken { /* address/key of remote RMB */
u32 rkey;
};
-#define SMC_LGR_ID_SIZE 4
#define SMC_BUF_MIN_SIZE 16384 /* minimum size of an RMB */
#define SMC_RMBE_SIZES 16 /* number of distinct RMBE sizes */
/* theoretically, the RFC states that largest size would be 512K,
@@ -269,6 +274,8 @@ struct smc_link_group {
/* protects llc flow */
int llc_testlink_time;
/* link keep alive time */
+ u32 llc_termination_rsn;
+ /* rsn code for termination */
};
struct { /* SMC-D */
u64 peer_gid;
@@ -379,7 +386,12 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
void smcr_link_clear(struct smc_link *lnk);
int smcr_buf_map_lgr(struct smc_link *lnk);
int smcr_buf_reg_lgr(struct smc_link *lnk);
+void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type);
+void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
+ enum smc_lgr_type new_type, int asym_lnk_idx);
int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc);
+struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
+ struct smc_link *from_lnk, bool is_dev_err);
void smcr_link_down_cond(struct smc_link *lnk);
void smcr_link_down_cond_sched(struct smc_link *lnk);
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 7675ccd6f3c3..66ddc9cf5e2f 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -361,7 +361,6 @@ static int smc_llc_add_pending_send(struct smc_link *link,
int smc_llc_send_confirm_link(struct smc_link *link,
enum smc_llc_reqresp reqresp)
{
- struct smc_link_group *lgr = smc_get_lgr(link);
struct smc_llc_msg_confirm_link *confllc;
struct smc_wr_tx_pend_priv *pend;
struct smc_wr_buf *wr_buf;
@@ -382,7 +381,7 @@ int smc_llc_send_confirm_link(struct smc_link *link,
memcpy(confllc->sender_gid, link->gid, SMC_GID_SIZE);
hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
confllc->link_num = link->link_id;
- memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE);
+ memcpy(confllc->link_uid, link->link_uid, SMC_LGR_ID_SIZE);
confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS;
/* send llc message */
rc = smc_wr_tx_send(link, pend);
@@ -560,6 +559,25 @@ static int smc_llc_send_message(struct smc_link *link, void *llcbuf)
return smc_wr_tx_send(link, pend);
}
+/* schedule an llc send on link, may wait for buffers,
+ * and wait for send completion notification.
+ * @return 0 on success
+ */
+static int smc_llc_send_message_wait(struct smc_link *link, void *llcbuf)
+{
+ struct smc_wr_tx_pend_priv *pend;
+ struct smc_wr_buf *wr_buf;
+ int rc;
+
+ if (!smc_link_usable(link))
+ return -ENOLINK;
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ if (rc)
+ return rc;
+ memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg));
+ return smc_wr_tx_send_wait(link, pend, SMC_LLC_WAIT_TIME);
+}
+
/********************************* receive ***********************************/
static int smc_llc_alloc_alt_link(struct smc_link_group *lgr,
@@ -752,6 +770,7 @@ static int smc_llc_cli_conf_link(struct smc_link *link,
smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
return -ENOLINK;
}
+ smc_llc_save_peer_uid(qentry);
smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
rc = smc_ib_modify_qp_rts(link_new);
@@ -777,7 +796,11 @@ static int smc_llc_cli_conf_link(struct smc_link *link,
return -ENOLINK;
}
smc_llc_link_active(link_new);
- lgr->type = lgr_new_t;
+ if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
+ lgr_new_t == SMC_LGR_ASYMMETRIC_PEER)
+ smcr_lgr_set_type_asym(lgr, lgr_new_t, link_new->link_idx);
+ else
+ smcr_lgr_set_type(lgr, lgr_new_t);
return 0;
}
@@ -822,7 +845,8 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
if (rc)
goto out_reject;
smc_llc_save_add_link_info(lnk_new, llc);
- lnk_new->link_id = llc->link_num;
+ lnk_new->link_id = llc->link_num; /* SMC server assigns link id */
+ smc_llc_link_set_uid(lnk_new);
rc = smc_ib_ready_link(lnk_new);
if (rc)
@@ -933,7 +957,7 @@ static void smc_llc_delete_asym_link(struct smc_link_group *lgr)
return; /* no asymmetric link */
if (!smc_link_downing(&lnk_asym->state))
return;
- /* tbd: lnk_new = smc_switch_conns(lgr, lnk_asym, false); */
+ lnk_new = smc_switch_conns(lgr, lnk_asym, false);
smc_wr_tx_wait_no_pending_sends(lnk_asym);
if (!lnk_new)
goto out_free;
@@ -1018,8 +1042,13 @@ static int smc_llc_srv_conf_link(struct smc_link *link,
false, SMC_LLC_DEL_LOST_PATH);
return -ENOLINK;
}
+ smc_llc_save_peer_uid(qentry);
smc_llc_link_active(link_new);
- lgr->type = lgr_new_t;
+ if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
+ lgr_new_t == SMC_LGR_ASYMMETRIC_PEER)
+ smcr_lgr_set_type_asym(lgr, lgr_new_t, link_new->link_idx);
+ else
+ smcr_lgr_set_type(lgr, lgr_new_t);
smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
return 0;
}
@@ -1195,7 +1224,7 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
smc_llc_send_message(lnk, &qentry->msg); /* response */
if (smc_link_downing(&lnk_del->state)) {
- /* tbd: call smc_switch_conns(lgr, lnk_del, false); */
+ smc_switch_conns(lgr, lnk_del, false);
smc_wr_tx_wait_no_pending_sends(lnk_del);
}
smcr_link_clear(lnk_del);
@@ -1204,9 +1233,9 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
if (lnk_del == lnk_asym) {
/* expected deletion of asym link, don't change lgr state */
} else if (active_links == 1) {
- lgr->type = SMC_LGR_SINGLE;
+ smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
} else if (!active_links) {
- lgr->type = SMC_LGR_NONE;
+ smcr_lgr_set_type(lgr, SMC_LGR_NONE);
smc_lgr_terminate_sched(lgr);
}
out_unlock:
@@ -1215,6 +1244,29 @@ out:
kfree(qentry);
}
+/* try to send a DELETE LINK ALL request on any active link,
+ * waiting for send completion
+ */
+void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn)
+{
+ struct smc_llc_msg_del_link delllc = {0};
+ int i;
+
+ delllc.hd.common.type = SMC_LLC_DELETE_LINK;
+ delllc.hd.length = sizeof(delllc);
+ if (ord)
+ delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
+ delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
+ delllc.reason = htonl(rsn);
+
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (!smc_link_usable(&lgr->lnk[i]))
+ continue;
+ if (!smc_llc_send_message_wait(&lgr->lnk[i], &delllc))
+ break;
+ }
+}
+
static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
{
struct smc_llc_msg_del_link *del_llc;
@@ -1230,6 +1282,8 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
if (qentry->msg.delete_link.hd.flags & SMC_LLC_FLAG_DEL_LINK_ALL) {
/* delete entire lgr */
+ smc_llc_send_link_delete_all(lgr, true, ntohl(
+ qentry->msg.delete_link.reason));
smc_lgr_terminate_sched(lgr);
goto out;
}
@@ -1245,7 +1299,7 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
goto out; /* asymmetric link already deleted */
if (smc_link_downing(&lnk_del->state)) {
- /* tbd: call smc_switch_conns(lgr, lnk_del, false); */
+ smc_switch_conns(lgr, lnk_del, false);
smc_wr_tx_wait_no_pending_sends(lnk_del);
}
if (!list_empty(&lgr->list)) {
@@ -1270,9 +1324,9 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
active_links = smc_llc_active_link_count(lgr);
if (active_links == 1) {
- lgr->type = SMC_LGR_SINGLE;
+ smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
} else if (!active_links) {
- lgr->type = SMC_LGR_NONE;
+ smcr_lgr_set_type(lgr, SMC_LGR_NONE);
smc_lgr_terminate_sched(lgr);
}
@@ -1368,6 +1422,14 @@ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr)
smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
}
+static void smc_llc_protocol_violation(struct smc_link_group *lgr, u8 type)
+{
+ pr_warn_ratelimited("smc: SMC-R lg %*phN LLC protocol violation: "
+ "llc_type %d\n", SMC_LGR_ID_SIZE, &lgr->id, type);
+ smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_PROT_VIOL);
+ smc_lgr_terminate_sched(lgr);
+}
+
/* flush the llc event queue */
static void smc_llc_event_flush(struct smc_link_group *lgr)
{
@@ -1468,6 +1530,9 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt);
}
return;
+ default:
+ smc_llc_protocol_violation(lgr, llc->raw.hdr.common.type);
+ break;
}
out:
kfree(qentry);
@@ -1527,6 +1592,9 @@ static void smc_llc_rx_response(struct smc_link *link,
case SMC_LLC_CONFIRM_RKEY_CONT:
/* not used because max links is 3 */
break;
+ default:
+ smc_llc_protocol_violation(link->lgr, llc_type);
+ break;
}
kfree(qentry);
}
@@ -1709,12 +1777,29 @@ out:
return rc;
}
+void smc_llc_link_set_uid(struct smc_link *link)
+{
+ __be32 link_uid;
+
+ link_uid = htonl(*((u32 *)link->lgr->id) + link->link_id);
+ memcpy(link->link_uid, &link_uid, SMC_LGR_ID_SIZE);
+}
+
+/* save peers link user id, used for debug purposes */
+void smc_llc_save_peer_uid(struct smc_llc_qentry *qentry)
+{
+ memcpy(qentry->link->peer_link_uid, qentry->msg.confirm_link.link_uid,
+ SMC_LGR_ID_SIZE);
+}
+
/* evaluate confirm link request or response */
int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry,
enum smc_llc_reqresp type)
{
- if (type == SMC_LLC_REQ) /* SMC server assigns link_id */
+ if (type == SMC_LLC_REQ) { /* SMC server assigns link_id */
qentry->link->link_id = qentry->msg.confirm_link.link_num;
+ smc_llc_link_set_uid(qentry->link);
+ }
if (!(qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_NO_RMBE_EYEC))
return -ENOTSUPP;
return 0;
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index c335fc5f363c..55287376112d 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -60,6 +60,14 @@ static inline struct smc_link *smc_llc_usable_link(struct smc_link_group *lgr)
return NULL;
}
+/* set the termination reason code for the link group */
+static inline void smc_llc_set_termination_rsn(struct smc_link_group *lgr,
+ u32 rsn)
+{
+ if (!lgr->llc_termination_rsn)
+ lgr->llc_termination_rsn = rsn;
+}
+
/* transmit */
int smc_llc_send_confirm_link(struct smc_link *lnk,
enum smc_llc_reqresp reqresp);
@@ -84,11 +92,15 @@ int smc_llc_flow_initiate(struct smc_link_group *lgr,
void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow);
int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry,
enum smc_llc_reqresp type);
+void smc_llc_link_set_uid(struct smc_link *link);
+void smc_llc_save_peer_uid(struct smc_llc_qentry *qentry);
struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
struct smc_link *lnk,
int time_out, u8 exp_msg);
struct smc_llc_qentry *smc_llc_flow_qentry_clr(struct smc_llc_flow *flow);
void smc_llc_flow_qentry_del(struct smc_llc_flow *flow);
+void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord,
+ u32 rsn);
int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry);
int smc_llc_srv_add_link(struct smc_link *link);
void smc_llc_srv_add_link_local(struct smc_link *link);
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 417204572a69..54ba0443847e 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -482,12 +482,13 @@ static int smc_tx_rdma_writes(struct smc_connection *conn,
static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
+ struct smc_link *link = conn->lnk;
struct smc_rdma_wr *wr_rdma_buf;
struct smc_cdc_tx_pend *pend;
struct smc_wr_buf *wr_buf;
int rc;
- rc = smc_cdc_get_free_slot(conn, &wr_buf, &wr_rdma_buf, &pend);
+ rc = smc_cdc_get_free_slot(conn, link, &wr_buf, &wr_rdma_buf, &pend);
if (rc < 0) {
if (rc == -EBUSY) {
struct smc_sock *smc =
@@ -505,10 +506,17 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
}
spin_lock_bh(&conn->send_lock);
+ if (link != conn->lnk) {
+ /* link of connection changed, tx_work will restart */
+ smc_wr_tx_put_slot(link,
+ (struct smc_wr_tx_pend_priv *)pend);
+ rc = -ENOLINK;
+ goto out_unlock;
+ }
if (!pflags->urg_data_present) {
rc = smc_tx_rdma_writes(conn, wr_rdma_buf);
if (rc) {
- smc_wr_tx_put_slot(conn->lnk,
+ smc_wr_tx_put_slot(link,
(struct smc_wr_tx_pend_priv *)pend);
goto out_unlock;
}
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 3fd27bea4f7a..7239ba9b99dc 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -44,6 +44,7 @@ struct smc_wr_tx_pend { /* control data for a pending send request */
struct smc_link *link;
u32 idx;
struct smc_wr_tx_pend_priv priv;
+ u8 compl_requested;
};
/******************************** send queue *********************************/
@@ -103,6 +104,8 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
if (pnd_snd_idx == link->wr_tx_cnt)
return;
link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status;
+ if (link->wr_tx_pends[pnd_snd_idx].compl_requested)
+ complete(&link->wr_tx_compl[pnd_snd_idx]);
memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx], sizeof(pnd_snd));
/* clear the full struct smc_wr_tx_pend including .priv */
memset(&link->wr_tx_pends[pnd_snd_idx], 0,
@@ -275,6 +278,33 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
return rc;
}
+/* Send prepared WR slot via ib_post_send and wait for send completion
+ * notification.
+ * @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer
+ */
+int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
+ unsigned long timeout)
+{
+ struct smc_wr_tx_pend *pend;
+ int rc;
+
+ pend = container_of(priv, struct smc_wr_tx_pend, priv);
+ pend->compl_requested = 1;
+ init_completion(&link->wr_tx_compl[pend->idx]);
+
+ rc = smc_wr_tx_send(link, priv);
+ if (rc)
+ return rc;
+ /* wait for completion by smc_wr_tx_process_cqe() */
+ rc = wait_for_completion_interruptible_timeout(
+ &link->wr_tx_compl[pend->idx], timeout);
+ if (rc <= 0)
+ rc = -ENODATA;
+ if (rc > 0)
+ rc = 0;
+ return rc;
+}
+
/* Register a memory region and wait for result. */
int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
{
@@ -555,6 +585,8 @@ void smc_wr_free_link(struct smc_link *lnk)
void smc_wr_free_link_mem(struct smc_link *lnk)
{
+ kfree(lnk->wr_tx_compl);
+ lnk->wr_tx_compl = NULL;
kfree(lnk->wr_tx_pends);
lnk->wr_tx_pends = NULL;
kfree(lnk->wr_tx_mask);
@@ -625,8 +657,15 @@ int smc_wr_alloc_link_mem(struct smc_link *link)
GFP_KERNEL);
if (!link->wr_tx_pends)
goto no_mem_wr_tx_mask;
+ link->wr_tx_compl = kcalloc(SMC_WR_BUF_CNT,
+ sizeof(link->wr_tx_compl[0]),
+ GFP_KERNEL);
+ if (!link->wr_tx_compl)
+ goto no_mem_wr_tx_pends;
return 0;
+no_mem_wr_tx_pends:
+ kfree(link->wr_tx_pends);
no_mem_wr_tx_mask:
kfree(link->wr_tx_mask);
no_mem_wr_rx_sges:
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index f7eaeb3391f3..423b8709f1c9 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -101,6 +101,8 @@ int smc_wr_tx_put_slot(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv);
int smc_wr_tx_send(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv);
+int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
+ unsigned long timeout);
void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
smc_wr_tx_filter filter,