summaryrefslogtreecommitdiffstats
path: root/net/rds
diff options
context:
space:
mode:
Diffstat (limited to 'net/rds')
-rw-r--r--net/rds/connection.c6
-rw-r--r--net/rds/ib.c12
-rw-r--r--net/rds/ib.h2
-rw-r--r--net/rds/ib_rdma.c4
-rw-r--r--net/rds/message.c12
-rw-r--r--net/rds/rdma.c10
-rw-r--r--net/rds/rds.h17
-rw-r--r--net/rds/recv.c30
-rw-r--r--net/rds/send.c21
-rw-r--r--net/rds/tcp.h1
-rw-r--r--net/rds/tcp_connect.c5
-rw-r--r--net/rds/tcp_listen.c42
-rw-r--r--net/rds/tcp_send.c2
-rw-r--r--net/rds/threads.c5
14 files changed, 91 insertions, 78 deletions
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 6a5ebdea7d2e..50a3789ac23e 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -124,11 +124,6 @@ static void __rds_conn_path_init(struct rds_connection *conn,
cp->cp_conn = conn;
atomic_set(&cp->cp_state, RDS_CONN_DOWN);
cp->cp_send_gen = 0;
- /* cp_outgoing is per-path. So we can only set it here
- * for the single-path transports.
- */
- if (!conn->c_trans->t_mp_capable)
- cp->cp_outgoing = (is_outgoing ? 1 : 0);
cp->cp_reconnect_jiffies = 0;
INIT_DELAYED_WORK(&cp->cp_send_w, rds_send_worker);
INIT_DELAYED_WORK(&cp->cp_recv_w, rds_recv_worker);
@@ -417,6 +412,7 @@ void rds_conn_destroy(struct rds_connection *conn)
"%pI4\n", conn, &conn->c_laddr,
&conn->c_faddr);
+ conn->c_destroy_in_prog = 1;
/* Ensure conn will not be scheduled for reconnect */
spin_lock_irq(&rds_conn_lock);
hlist_del_init_rcu(&conn->c_hash_node);
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 7a64c8db81ab..a0954ace3774 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -118,8 +118,8 @@ static void rds_ib_dev_free(struct work_struct *work)
void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
{
- BUG_ON(atomic_read(&rds_ibdev->refcount) <= 0);
- if (atomic_dec_and_test(&rds_ibdev->refcount))
+ BUG_ON(refcount_read(&rds_ibdev->refcount) == 0);
+ if (refcount_dec_and_test(&rds_ibdev->refcount))
queue_work(rds_wq, &rds_ibdev->free_work);
}
@@ -137,7 +137,7 @@ static void rds_ib_add_one(struct ib_device *device)
return;
spin_lock_init(&rds_ibdev->spinlock);
- atomic_set(&rds_ibdev->refcount, 1);
+ refcount_set(&rds_ibdev->refcount, 1);
INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free);
rds_ibdev->max_wrs = device->attrs.max_qp_wr;
@@ -205,10 +205,10 @@ static void rds_ib_add_one(struct ib_device *device)
down_write(&rds_ib_devices_lock);
list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices);
up_write(&rds_ib_devices_lock);
- atomic_inc(&rds_ibdev->refcount);
+ refcount_inc(&rds_ibdev->refcount);
ib_set_client_data(device, &rds_ib_client, rds_ibdev);
- atomic_inc(&rds_ibdev->refcount);
+ refcount_inc(&rds_ibdev->refcount);
rds_ib_nodev_connect();
@@ -239,7 +239,7 @@ struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device)
rcu_read_lock();
rds_ibdev = ib_get_client_data(device, &rds_ib_client);
if (rds_ibdev)
- atomic_inc(&rds_ibdev->refcount);
+ refcount_inc(&rds_ibdev->refcount);
rcu_read_unlock();
return rds_ibdev;
}
diff --git a/net/rds/ib.h b/net/rds/ib.h
index ec550626e221..bf4822407567 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -230,7 +230,7 @@ struct rds_ib_device {
unsigned int max_initiator_depth;
unsigned int max_responder_resources;
spinlock_t spinlock; /* protect the above */
- atomic_t refcount;
+ refcount_t refcount;
struct work_struct free_work;
int *vector_load;
};
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 977f69886c00..9a3c54e659e9 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -52,7 +52,7 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) {
list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
if (i_ipaddr->ipaddr == ipaddr) {
- atomic_inc(&rds_ibdev->refcount);
+ refcount_inc(&rds_ibdev->refcount);
rcu_read_unlock();
return rds_ibdev;
}
@@ -134,7 +134,7 @@ void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *con
spin_unlock_irq(&ib_nodev_conns_lock);
ic->rds_ibdev = rds_ibdev;
- atomic_inc(&rds_ibdev->refcount);
+ refcount_inc(&rds_ibdev->refcount);
}
void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
diff --git a/net/rds/message.c b/net/rds/message.c
index 49bfb512d808..4318cc9b78f7 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -48,8 +48,8 @@ static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = {
void rds_message_addref(struct rds_message *rm)
{
- rdsdebug("addref rm %p ref %d\n", rm, atomic_read(&rm->m_refcount));
- atomic_inc(&rm->m_refcount);
+ rdsdebug("addref rm %p ref %d\n", rm, refcount_read(&rm->m_refcount));
+ refcount_inc(&rm->m_refcount);
}
EXPORT_SYMBOL_GPL(rds_message_addref);
@@ -83,9 +83,9 @@ static void rds_message_purge(struct rds_message *rm)
void rds_message_put(struct rds_message *rm)
{
- rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount));
- WARN(!atomic_read(&rm->m_refcount), "danger refcount zero on %p\n", rm);
- if (atomic_dec_and_test(&rm->m_refcount)) {
+ rdsdebug("put rm %p ref %d\n", rm, refcount_read(&rm->m_refcount));
+ WARN(!refcount_read(&rm->m_refcount), "danger refcount zero on %p\n", rm);
+ if (refcount_dec_and_test(&rm->m_refcount)) {
BUG_ON(!list_empty(&rm->m_sock_item));
BUG_ON(!list_empty(&rm->m_conn_item));
rds_message_purge(rm);
@@ -206,7 +206,7 @@ struct rds_message *rds_message_alloc(unsigned int extra_len, gfp_t gfp)
rm->m_used_sgs = 0;
rm->m_total_sgs = extra_len / sizeof(struct scatterlist);
- atomic_set(&rm->m_refcount, 1);
+ refcount_set(&rm->m_refcount, 1);
INIT_LIST_HEAD(&rm->m_sock_item);
INIT_LIST_HEAD(&rm->m_conn_item);
spin_lock_init(&rm->m_rs_lock);
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index f06fac4886b0..8886f15abe90 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -84,7 +84,7 @@ static struct rds_mr *rds_mr_tree_walk(struct rb_root *root, u64 key,
if (insert) {
rb_link_node(&insert->r_rb_node, parent, p);
rb_insert_color(&insert->r_rb_node, root);
- atomic_inc(&insert->r_refcount);
+ refcount_inc(&insert->r_refcount);
}
return NULL;
}
@@ -99,7 +99,7 @@ static void rds_destroy_mr(struct rds_mr *mr)
unsigned long flags;
rdsdebug("RDS: destroy mr key is %x refcnt %u\n",
- mr->r_key, atomic_read(&mr->r_refcount));
+ mr->r_key, refcount_read(&mr->r_refcount));
if (test_and_set_bit(RDS_MR_DEAD, &mr->r_state))
return;
@@ -223,7 +223,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
goto out;
}
- atomic_set(&mr->r_refcount, 1);
+ refcount_set(&mr->r_refcount, 1);
RB_CLEAR_NODE(&mr->r_rb_node);
mr->r_trans = rs->rs_transport;
mr->r_sock = rs;
@@ -307,7 +307,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
rdsdebug("RDS: get_mr key is %x\n", mr->r_key);
if (mr_ret) {
- atomic_inc(&mr->r_refcount);
+ refcount_inc(&mr->r_refcount);
*mr_ret = mr;
}
@@ -756,7 +756,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
if (!mr)
err = -EINVAL; /* invalid r_key */
else
- atomic_inc(&mr->r_refcount);
+ refcount_inc(&mr->r_refcount);
spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
if (mr) {
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 82d38ccf5e8b..516bcc89b46f 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -8,6 +8,7 @@
#include <linux/mutex.h>
#include <linux/rds.h>
#include <linux/rhashtable.h>
+#include <linux/refcount.h>
#include "info.h"
@@ -92,6 +93,8 @@ enum {
#define RDS_MPATH_HASH(rs, n) (jhash_1word((rs)->rs_bound_port, \
(rs)->rs_hash_initval) & ((n) - 1))
+#define IS_CANONICAL(laddr, faddr) (htonl(laddr) < htonl(faddr))
+
/* Per mpath connection state */
struct rds_conn_path {
struct rds_connection *cp_conn;
@@ -125,8 +128,6 @@ struct rds_conn_path {
unsigned int cp_unacked_packets;
unsigned int cp_unacked_bytes;
- unsigned int cp_outgoing:1,
- cp_pad_to_32:31;
unsigned int cp_index;
};
@@ -137,7 +138,8 @@ struct rds_connection {
__be32 c_faddr;
unsigned int c_loopback:1,
c_ping_triggered:1,
- c_pad_to_32:30;
+ c_destroy_in_prog:1,
+ c_pad_to_32:29;
int c_npaths;
struct rds_connection *c_passive;
struct rds_transport *c_trans;
@@ -260,7 +262,7 @@ struct rds_ext_header_rdma_dest {
#define RDS_MSG_RX_CMSG 3
struct rds_incoming {
- atomic_t i_refcount;
+ refcount_t i_refcount;
struct list_head i_item;
struct rds_connection *i_conn;
struct rds_conn_path *i_conn_path;
@@ -275,7 +277,7 @@ struct rds_incoming {
struct rds_mr {
struct rb_node r_rb_node;
- atomic_t r_refcount;
+ refcount_t r_refcount;
u32 r_key;
/* A copy of the creation flags */
@@ -354,7 +356,7 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
#define RDS_MSG_FLUSH 8
struct rds_message {
- atomic_t m_refcount;
+ refcount_t m_refcount;
struct list_head m_sock_item;
struct list_head m_conn_item;
struct rds_incoming m_inc;
@@ -827,6 +829,7 @@ void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
is_acked_func is_acked);
void rds_send_path_drop_acked(struct rds_conn_path *cp, u64 ack,
is_acked_func is_acked);
+void rds_send_ping(struct rds_connection *conn, int cp_index);
int rds_send_pong(struct rds_conn_path *cp, __be16 dport);
/* rdma.c */
@@ -854,7 +857,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
void __rds_put_mr_final(struct rds_mr *mr);
static inline void rds_mr_put(struct rds_mr *mr)
{
- if (atomic_dec_and_test(&mr->r_refcount))
+ if (refcount_dec_and_test(&mr->r_refcount))
__rds_put_mr_final(mr);
}
diff --git a/net/rds/recv.c b/net/rds/recv.c
index c70c32cb05f5..b25bcfe411ca 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -45,7 +45,7 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
{
int i;
- atomic_set(&inc->i_refcount, 1);
+ refcount_set(&inc->i_refcount, 1);
INIT_LIST_HEAD(&inc->i_item);
inc->i_conn = conn;
inc->i_saddr = saddr;
@@ -61,7 +61,7 @@ EXPORT_SYMBOL_GPL(rds_inc_init);
void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *cp,
__be32 saddr)
{
- atomic_set(&inc->i_refcount, 1);
+ refcount_set(&inc->i_refcount, 1);
INIT_LIST_HEAD(&inc->i_item);
inc->i_conn = cp->cp_conn;
inc->i_conn_path = cp;
@@ -74,14 +74,14 @@ EXPORT_SYMBOL_GPL(rds_inc_path_init);
static void rds_inc_addref(struct rds_incoming *inc)
{
- rdsdebug("addref inc %p ref %d\n", inc, atomic_read(&inc->i_refcount));
- atomic_inc(&inc->i_refcount);
+ rdsdebug("addref inc %p ref %d\n", inc, refcount_read(&inc->i_refcount));
+ refcount_inc(&inc->i_refcount);
}
void rds_inc_put(struct rds_incoming *inc)
{
- rdsdebug("put inc %p ref %d\n", inc, atomic_read(&inc->i_refcount));
- if (atomic_dec_and_test(&inc->i_refcount)) {
+ rdsdebug("put inc %p ref %d\n", inc, refcount_read(&inc->i_refcount));
+ if (refcount_dec_and_test(&inc->i_refcount)) {
BUG_ON(!list_empty(&inc->i_item));
inc->i_conn->c_trans->inc_free(inc);
@@ -215,10 +215,10 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
switch (type) {
case RDS_EXTHDR_NPATHS:
conn->c_npaths = min_t(int, RDS_MPATH_WORKERS,
- buffer.rds_npaths);
+ be16_to_cpu(buffer.rds_npaths));
break;
case RDS_EXTHDR_GEN_NUM:
- new_peer_gen_num = buffer.rds_gen_num;
+ new_peer_gen_num = be32_to_cpu(buffer.rds_gen_num);
break;
default:
pr_warn_ratelimited("ignoring unknown exthdr type "
@@ -227,6 +227,7 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
}
/* if RDS_EXTHDR_NPATHS was not found, default to a single-path */
conn->c_npaths = max_t(int, conn->c_npaths, 1);
+ conn->c_ping_triggered = 0;
rds_conn_peer_gen_update(conn, new_peer_gen_num);
}
@@ -244,8 +245,7 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
* called after reception of the probe-pong on all mprds_paths.
* Otherwise (sender of probe-ping is not the smaller ip addr): just call
* rds_conn_path_connect_if_down on the hashed path. (see rule 4)
- * 4. when cp_index > 0, rds_connect_worker must only trigger
- * a connection if laddr < faddr.
+ * 4. rds_connect_worker must only trigger a connection if laddr < faddr.
* 5. sender may end up queuing the packet on the cp. will get sent out later.
* when connection is completed.
*/
@@ -254,8 +254,9 @@ static void rds_start_mprds(struct rds_connection *conn)
int i;
struct rds_conn_path *cp;
- if (conn->c_npaths > 1 && conn->c_laddr < conn->c_faddr) {
- for (i = 1; i < conn->c_npaths; i++) {
+ if (conn->c_npaths > 1 &&
+ IS_CANONICAL(conn->c_laddr, conn->c_faddr)) {
+ for (i = 0; i < conn->c_npaths; i++) {
cp = &conn->c_path[i];
rds_conn_path_connect_if_down(cp);
}
@@ -339,14 +340,15 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
rds_stats_inc(s_recv_ping);
rds_send_pong(cp, inc->i_hdr.h_sport);
/* if this is a handshake ping, start multipath if necessary */
- if (RDS_HS_PROBE(inc->i_hdr.h_sport, inc->i_hdr.h_dport)) {
+ if (RDS_HS_PROBE(be16_to_cpu(inc->i_hdr.h_sport),
+ be16_to_cpu(inc->i_hdr.h_dport))) {
rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn);
rds_start_mprds(cp->cp_conn);
}
goto out;
}
- if (inc->i_hdr.h_dport == RDS_FLAG_PROBE_PORT &&
+ if (be16_to_cpu(inc->i_hdr.h_dport) == RDS_FLAG_PROBE_PORT &&
inc->i_hdr.h_sport == 0) {
rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn);
/* if this is a handshake pong, start multipath if necessary */
diff --git a/net/rds/send.c b/net/rds/send.c
index 5cc64039caf7..e81aa176f4e2 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -971,8 +971,6 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
return ret;
}
-static void rds_send_ping(struct rds_connection *conn);
-
static int rds_send_mprds_hash(struct rds_sock *rs, struct rds_connection *conn)
{
int hash;
@@ -982,7 +980,7 @@ static int rds_send_mprds_hash(struct rds_sock *rs, struct rds_connection *conn)
else
hash = RDS_MPATH_HASH(rs, conn->c_npaths);
if (conn->c_npaths == 0 && hash != 0) {
- rds_send_ping(conn);
+ rds_send_ping(conn, 0);
if (conn->c_npaths == 0) {
wait_event_interruptible(conn->c_hs_waitq,
@@ -1246,15 +1244,17 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport,
rm->m_inc.i_hdr.h_flags |= h_flags;
cp->cp_next_tx_seq++;
- if (RDS_HS_PROBE(sport, dport) && cp->cp_conn->c_trans->t_mp_capable) {
- u16 npaths = RDS_MPATH_WORKERS;
+ if (RDS_HS_PROBE(be16_to_cpu(sport), be16_to_cpu(dport)) &&
+ cp->cp_conn->c_trans->t_mp_capable) {
+ u16 npaths = cpu_to_be16(RDS_MPATH_WORKERS);
+ u32 my_gen_num = cpu_to_be32(cp->cp_conn->c_my_gen_num);
rds_message_add_extension(&rm->m_inc.i_hdr,
RDS_EXTHDR_NPATHS, &npaths,
sizeof(npaths));
rds_message_add_extension(&rm->m_inc.i_hdr,
RDS_EXTHDR_GEN_NUM,
- &cp->cp_conn->c_my_gen_num,
+ &my_gen_num,
sizeof(u32));
}
spin_unlock_irqrestore(&cp->cp_lock, flags);
@@ -1280,11 +1280,11 @@ rds_send_pong(struct rds_conn_path *cp, __be16 dport)
return rds_send_probe(cp, 0, dport, 0);
}
-static void
-rds_send_ping(struct rds_connection *conn)
+void
+rds_send_ping(struct rds_connection *conn, int cp_index)
{
unsigned long flags;
- struct rds_conn_path *cp = &conn->c_path[0];
+ struct rds_conn_path *cp = &conn->c_path[cp_index];
spin_lock_irqsave(&cp->cp_lock, flags);
if (conn->c_ping_triggered) {
@@ -1293,5 +1293,6 @@ rds_send_ping(struct rds_connection *conn)
}
conn->c_ping_triggered = 1;
spin_unlock_irqrestore(&cp->cp_lock, flags);
- rds_send_probe(&conn->c_path[0], RDS_FLAG_PROBE_PORT, 0, 0);
+ rds_send_probe(cp, cpu_to_be16(RDS_FLAG_PROBE_PORT), 0, 0);
}
+EXPORT_SYMBOL_GPL(rds_send_ping);
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 56ea6620fcf9..f8800b7ce79c 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -71,6 +71,7 @@ void rds_tcp_listen_data_ready(struct sock *sk);
int rds_tcp_accept_one(struct socket *sock);
int rds_tcp_keepalive(struct socket *sock);
void *rds_tcp_listen_sock_def_readable(struct net *net);
+void rds_tcp_set_linger(struct socket *sock);
/* tcp_recv.c */
int rds_tcp_recv_init(void);
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index d6839d96d539..cbe08a1fa4c7 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -66,7 +66,7 @@ void rds_tcp_state_change(struct sock *sk)
* RDS connection as RDS_CONN_UP until the reconnect,
* to avoid RDS datagram loss.
*/
- if (cp->cp_conn->c_laddr > cp->cp_conn->c_faddr &&
+ if (!IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr) &&
rds_conn_path_transition(cp, RDS_CONN_CONNECTING,
RDS_CONN_ERROR)) {
rds_conn_path_drop(cp);
@@ -135,7 +135,6 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest),
O_NONBLOCK);
- cp->cp_outgoing = 1;
rdsdebug("connect to address %pI4 returned %d\n", &conn->c_faddr, ret);
if (ret == -EINPROGRESS)
ret = 0;
@@ -171,6 +170,8 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp)
cp->cp_conn, tc, sock);
if (sock) {
+ if (cp->cp_conn->c_destroy_in_prog)
+ rds_tcp_set_linger(sock);
sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN);
lock_sock(sock->sk);
rds_tcp_restore_callbacks(sock, tc); /* tc->tc_sock = NULL */
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 507678853e6c..c6dc8caaf5ca 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -83,7 +83,7 @@ static
struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn)
{
int i;
- bool peer_is_smaller = (conn->c_faddr < conn->c_laddr);
+ bool peer_is_smaller = IS_CANONICAL(conn->c_faddr, conn->c_laddr);
int npaths = max_t(int, 1, conn->c_npaths);
/* for mprds, all paths MUST be initiated by the peer
@@ -112,6 +112,17 @@ struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn)
return NULL;
}
+void rds_tcp_set_linger(struct socket *sock)
+{
+ struct linger no_linger = {
+ .l_onoff = 1,
+ .l_linger = 0,
+ };
+
+ kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
+ (char *)&no_linger, sizeof(no_linger));
+}
+
int rds_tcp_accept_one(struct socket *sock)
{
struct socket *new_sock = NULL;
@@ -171,30 +182,27 @@ int rds_tcp_accept_one(struct socket *sock)
if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_ERROR)
goto rst_nsk;
if (rs_tcp->t_sock) {
- /* Need to resolve a duelling SYN between peers.
- * We have an outstanding SYN to this peer, which may
- * potentially have transitioned to the RDS_CONN_UP state,
- * so we must quiesce any send threads before resetting
- * c_transport_data.
- */
- if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr) ||
- !cp->cp_outgoing) {
- goto rst_nsk;
- } else {
- rds_tcp_reset_callbacks(new_sock, cp);
- cp->cp_outgoing = 0;
- /* rds_connect_path_complete() marks RDS_CONN_UP */
- rds_connect_path_complete(cp, RDS_CONN_RESETTING);
- }
+ /* Duelling SYN has been handled in rds_tcp_accept_one() */
+ rds_tcp_reset_callbacks(new_sock, cp);
+ /* rds_connect_path_complete() marks RDS_CONN_UP */
+ rds_connect_path_complete(cp, RDS_CONN_RESETTING);
} else {
rds_tcp_set_callbacks(new_sock, cp);
rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
}
new_sock = NULL;
ret = 0;
+ if (conn->c_npaths == 0)
+ rds_send_ping(cp->cp_conn, cp->cp_index);
goto out;
rst_nsk:
- /* reset the newly returned accept sock and bail */
+ /* reset the newly returned accept sock and bail.
+ * It is safe to set linger on new_sock because the RDS connection
+ * has not been brought up on new_sock, so no RDS-level data could
+ * be pending on it. By setting linger, we achieve the side-effect
+ * of avoiding TIME_WAIT state on new_sock.
+ */
+ rds_tcp_set_linger(new_sock);
kernel_sock_shutdown(new_sock, SHUT_RDWR);
ret = 0;
out:
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 52d11d7725c8..0d8616aa5bad 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -202,7 +202,7 @@ void rds_tcp_write_space(struct sock *sk)
tc->t_last_seen_una = rds_tcp_snd_una(tc);
rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked);
- if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
+ if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
out:
diff --git a/net/rds/threads.c b/net/rds/threads.c
index 3e447d056d09..2852bc1d37d4 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -127,7 +127,7 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
/* let peer with smaller addr initiate reconnect, to avoid duels */
if (conn->c_trans->t_type == RDS_TRANS_TCP &&
- conn->c_laddr > conn->c_faddr)
+ !IS_CANONICAL(conn->c_laddr, conn->c_faddr))
return;
set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
@@ -156,7 +156,8 @@ void rds_connect_worker(struct work_struct *work)
struct rds_connection *conn = cp->cp_conn;
int ret;
- if (cp->cp_index > 0 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr)
+ if (cp->cp_index > 0 &&
+ !IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr))
return;
clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING);