diff options
-rw-r--r-- | include/uapi/linux/rds.h | 11 | ||||
-rw-r--r-- | net/rds/af_rds.c | 37 | ||||
-rw-r--r-- | net/rds/connection.c | 21 | ||||
-rw-r--r-- | net/rds/ib.c | 11 | ||||
-rw-r--r-- | net/rds/ib.h | 4 | ||||
-rw-r--r-- | net/rds/ib_cm.c | 72 | ||||
-rw-r--r-- | net/rds/ib_recv.c | 4 | ||||
-rw-r--r-- | net/rds/ib_send.c | 5 | ||||
-rw-r--r-- | net/rds/rdma_transport.c | 14 | ||||
-rw-r--r-- | net/rds/rdma_transport.h | 6 | ||||
-rw-r--r-- | net/rds/rds.h | 14 | ||||
-rw-r--r-- | net/rds/recv.c | 1 | ||||
-rw-r--r-- | net/rds/send.c | 7 | ||||
-rw-r--r-- | net/rds/tcp.c | 8 | ||||
-rw-r--r-- | net/rds/tcp_listen.c | 2 | ||||
-rw-r--r-- | net/rds/threads.c | 1 |
16 files changed, 166 insertions, 52 deletions
diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h index 8b73cb603c5f..5d0f76c780e5 100644 --- a/include/uapi/linux/rds.h +++ b/include/uapi/linux/rds.h @@ -69,6 +69,12 @@ #define RDS_TRANS_COUNT 3 #define RDS_TRANS_NONE (~0) +/* IOCTLS commands for SOL_RDS */ +#define SIOCRDSSETTOS (SIOCPROTOPRIVATE) +#define SIOCRDSGETTOS (SIOCPROTOPRIVATE + 1) + +typedef __u8 rds_tos_t; + /* * Control message types for SOL_RDS. * @@ -149,6 +155,7 @@ struct rds_info_connection { __be32 faddr; __u8 transport[TRANSNAMSIZ]; /* null term ascii */ __u8 flags; + __u8 tos; } __attribute__((packed)); struct rds6_info_connection { @@ -171,6 +178,7 @@ struct rds_info_message { __be16 lport; __be16 fport; __u8 flags; + __u8 tos; } __attribute__((packed)); struct rds6_info_message { @@ -214,6 +222,7 @@ struct rds_info_tcp_socket { __u32 last_sent_nxt; __u32 last_expected_una; __u32 last_seen_una; + __u8 tos; } __attribute__((packed)); struct rds6_info_tcp_socket { @@ -240,6 +249,7 @@ struct rds_info_rdma_connection { __u32 max_send_sge; __u32 rdma_mr_max; __u32 rdma_mr_size; + __u8 tos; }; struct rds6_info_rdma_connection { @@ -253,6 +263,7 @@ struct rds6_info_rdma_connection { __u32 max_send_sge; __u32 rdma_mr_max; __u32 rdma_mr_size; + __u8 tos; }; /* RDS message Receive Path Latency points */ diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 65571a6273c3..d6cc97fbbbb0 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -254,7 +254,40 @@ static __poll_t rds_poll(struct file *file, struct socket *sock, static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { - return -ENOIOCTLCMD; + struct rds_sock *rs = rds_sk_to_rs(sock->sk); + rds_tos_t utos, tos = 0; + + switch (cmd) { + case SIOCRDSSETTOS: + if (get_user(utos, (rds_tos_t __user *)arg)) + return -EFAULT; + + if (rs->rs_transport && + rs->rs_transport->get_tos_map) + tos = rs->rs_transport->get_tos_map(utos); + else + return -ENOIOCTLCMD; + + spin_lock_bh(&rds_sock_lock); + if (rs->rs_tos || rs->rs_conn) { + spin_unlock_bh(&rds_sock_lock); + return -EINVAL; + } + rs->rs_tos = tos; + spin_unlock_bh(&rds_sock_lock); + break; + case SIOCRDSGETTOS: + spin_lock_bh(&rds_sock_lock); + tos = rs->rs_tos; + spin_unlock_bh(&rds_sock_lock); + if (put_user(tos, (rds_tos_t __user *)arg)) + return -EFAULT; + break; + default: + return -ENOIOCTLCMD; + } + + return 0; } static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval, @@ -650,6 +683,8 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol) spin_lock_init(&rs->rs_rdma_lock); rs->rs_rdma_keys = RB_ROOT; rs->rs_rx_traces = 0; + rs->rs_tos = 0; + rs->rs_conn = NULL; spin_lock_bh(&rds_sock_lock); list_add_tail(&rs->rs_item, &rds_sock_list); diff --git a/net/rds/connection.c b/net/rds/connection.c index 3bd2f4a5a30d..7ea134f9a825 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -84,7 +84,7 @@ static struct rds_connection *rds_conn_lookup(struct net *net, const struct in6_addr *laddr, const struct in6_addr *faddr, struct rds_transport *trans, - int dev_if) + u8 tos, int dev_if) { struct rds_connection *conn, *ret = NULL; @@ -92,6 +92,7 @@ static struct rds_connection *rds_conn_lookup(struct net *net, if (ipv6_addr_equal(&conn->c_faddr, faddr) && ipv6_addr_equal(&conn->c_laddr, laddr) && conn->c_trans == trans && + conn->c_tos == tos && net == rds_conn_net(conn) && conn->c_dev_if == dev_if) { ret = conn; @@ -139,6 +140,7 @@ static void __rds_conn_path_init(struct rds_connection *conn, atomic_set(&cp->cp_state, RDS_CONN_DOWN); cp->cp_send_gen = 0; cp->cp_reconnect_jiffies = 0; + cp->cp_conn->c_proposed_version = RDS_PROTOCOL_VERSION; INIT_DELAYED_WORK(&cp->cp_send_w, rds_send_worker); INIT_DELAYED_WORK(&cp->cp_recv_w, rds_recv_worker); INIT_DELAYED_WORK(&cp->cp_conn_w, rds_connect_worker); @@ -159,7 +161,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, const struct in6_addr *laddr, const struct in6_addr *faddr, struct rds_transport *trans, - gfp_t gfp, + gfp_t gfp, u8 tos, int is_outgoing, int dev_if) { @@ -171,7 +173,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1); rcu_read_lock(); - conn = rds_conn_lookup(net, head, laddr, faddr, trans, dev_if); + conn = rds_conn_lookup(net, head, laddr, faddr, trans, tos, dev_if); if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && @@ -205,6 +207,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, conn->c_isv6 = !ipv6_addr_v4mapped(laddr); conn->c_faddr = *faddr; conn->c_dev_if = dev_if; + conn->c_tos = tos; #if IS_ENABLED(CONFIG_IPV6) /* If the local address is link local, set c_bound_if to be the @@ -297,7 +300,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, struct rds_connection *found; found = rds_conn_lookup(net, head, laddr, faddr, trans, - dev_if); + tos, dev_if); if (found) { struct rds_conn_path *cp; int i; @@ -332,10 +335,10 @@ out: struct rds_connection *rds_conn_create(struct net *net, const struct in6_addr *laddr, const struct in6_addr *faddr, - struct rds_transport *trans, gfp_t gfp, - int dev_if) + struct rds_transport *trans, u8 tos, + gfp_t gfp, int dev_if) { - return __rds_conn_create(net, laddr, faddr, trans, gfp, 0, dev_if); + return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 0, dev_if); } EXPORT_SYMBOL_GPL(rds_conn_create); @@ -343,9 +346,9 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net, const struct in6_addr *laddr, const struct in6_addr *faddr, struct rds_transport *trans, - gfp_t gfp, int dev_if) + u8 tos, gfp_t gfp, int dev_if) { - return __rds_conn_create(net, laddr, faddr, trans, gfp, 1, dev_if); + return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 1, dev_if); } EXPORT_SYMBOL_GPL(rds_conn_create_outgoing); diff --git a/net/rds/ib.c b/net/rds/ib.c index 9d7b7586f240..2da9b75bad16 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -301,6 +301,7 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn, iinfo->src_addr = conn->c_laddr.s6_addr32[3]; iinfo->dst_addr = conn->c_faddr.s6_addr32[3]; + iinfo->tos = conn->c_tos; memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid)); memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid)); @@ -514,6 +515,15 @@ void rds_ib_exit(void) rds_ib_mr_exit(); } +static u8 rds_ib_get_tos_map(u8 tos) +{ + /* 1:1 user to transport map for RDMA transport. + * In future, if custom map is desired, hook can export + * user configurable map. + */ + return tos; +} + struct rds_transport rds_ib_transport = { .laddr_check = rds_ib_laddr_check, .xmit_path_complete = rds_ib_xmit_path_complete, @@ -536,6 +546,7 @@ struct rds_transport rds_ib_transport = { .sync_mr = rds_ib_sync_mr, .free_mr = rds_ib_free_mr, .flush_mrs = rds_ib_flush_mrs, + .get_tos_map = rds_ib_get_tos_map, .t_owner = THIS_MODULE, .t_name = "infiniband", .t_unloading = rds_ib_is_unloading, diff --git a/net/rds/ib.h b/net/rds/ib.h index 71ff356ee702..752f92235a38 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -67,7 +67,9 @@ struct rds_ib_conn_priv_cmn { u8 ricpc_protocol_major; u8 ricpc_protocol_minor; __be16 ricpc_protocol_minor_mask; /* bitmask */ - __be32 ricpc_reserved1; + u8 ricpc_dp_toss; + u8 ripc_reserved1; + __be16 ripc_reserved2; __be64 ricpc_ack_seq; __be32 ricpc_credit; /* non-zero enables flow ctl */ }; diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index bfbb31f0c7fd..66c6eb56072b 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -133,23 +133,24 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even rds_ib_set_flow_control(conn, be32_to_cpu(credit)); } - if (conn->c_version < RDS_PROTOCOL(3, 1)) { - pr_notice("RDS/IB: Connection <%pI6c,%pI6c> version %u.%u no longer supported\n", - &conn->c_laddr, &conn->c_faddr, - RDS_PROTOCOL_MAJOR(conn->c_version), - RDS_PROTOCOL_MINOR(conn->c_version)); - set_bit(RDS_DESTROY_PENDING, &conn->c_path[0].cp_flags); - rds_conn_destroy(conn); - return; - } else { - pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c> version %u.%u%s\n", - ic->i_active_side ? "Active" : "Passive", - &conn->c_laddr, &conn->c_faddr, - RDS_PROTOCOL_MAJOR(conn->c_version), - RDS_PROTOCOL_MINOR(conn->c_version), - ic->i_flowctl ? ", flow control" : ""); + if (conn->c_version < RDS_PROTOCOL_VERSION) { + if (conn->c_version != RDS_PROTOCOL_COMPAT_VERSION) { + pr_notice("RDS/IB: Connection <%pI6c,%pI6c> version %u.%u no longer supported\n", + &conn->c_laddr, &conn->c_faddr, + RDS_PROTOCOL_MAJOR(conn->c_version), + RDS_PROTOCOL_MINOR(conn->c_version)); + rds_conn_destroy(conn); + return; + } } + pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c,%d> version %u.%u%s\n", + ic->i_active_side ? "Active" : "Passive", + &conn->c_laddr, &conn->c_faddr, conn->c_tos, + RDS_PROTOCOL_MAJOR(conn->c_version), + RDS_PROTOCOL_MINOR(conn->c_version), + ic->i_flowctl ? ", flow control" : ""); + atomic_set(&ic->i_cq_quiesce, 0); /* Init rings and fill recv. this needs to wait until protocol @@ -184,6 +185,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even NULL); } + conn->c_proposed_version = conn->c_version; rds_connect_complete(conn); } @@ -220,6 +222,7 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn, cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); dp->ricp_v6.dp_ack_seq = cpu_to_be64(rds_ib_piggyb_ack(ic)); + dp->ricp_v6.dp_cmn.ricpc_dp_toss = conn->c_tos; conn_param->private_data = &dp->ricp_v6; conn_param->private_data_len = sizeof(dp->ricp_v6); @@ -234,6 +237,7 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn, cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); dp->ricp_v4.dp_ack_seq = cpu_to_be64(rds_ib_piggyb_ack(ic)); + dp->ricp_v4.dp_cmn.ricpc_dp_toss = conn->c_tos; conn_param->private_data = &dp->ricp_v4; conn_param->private_data_len = sizeof(dp->ricp_v4); @@ -389,10 +393,9 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data) rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST); break; default: - rdsdebug("Fatal QP Event %u (%s) " - "- connection %pI6c->%pI6c, reconnecting\n", - event->event, ib_event_msg(event->event), - &conn->c_laddr, &conn->c_faddr); + rdsdebug("Fatal QP Event %u (%s) - connection %pI6c->%pI6c, reconnecting\n", + event->event, ib_event_msg(event->event), + &conn->c_laddr, &conn->c_faddr); rds_conn_drop(conn); break; } @@ -660,13 +663,16 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event, bool isv6) /* Even if len is crap *now* I still want to check it. -ASG */ if (event->param.conn.private_data_len < data_len || major == 0) - return RDS_PROTOCOL_3_0; + return RDS_PROTOCOL_4_0; common = be16_to_cpu(mask) & RDS_IB_SUPPORTED_PROTOCOLS; - if (major == 3 && common) { - version = RDS_PROTOCOL_3_0; + if (major == 4 && common) { + version = RDS_PROTOCOL_4_0; while ((common >>= 1) != 0) version++; + } else if (RDS_PROTOCOL_COMPAT_VERSION == + RDS_PROTOCOL(major, minor)) { + version = RDS_PROTOCOL_COMPAT_VERSION; } else { if (isv6) printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI6c using incompatible protocol version %u.%u\n", @@ -729,8 +735,10 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, /* Check whether the remote protocol version matches ours. */ version = rds_ib_protocol_compatible(event, isv6); - if (!version) + if (!version) { + err = RDS_RDMA_REJ_INCOMPAT; goto out; + } dp = event->param.conn.private_data; if (isv6) { @@ -771,15 +779,16 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, daddr6 = &d_mapped_addr; } - rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid " - "0x%llx\n", saddr6, daddr6, - RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version), + rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid 0x%llx, tos:%d\n", + saddr6, daddr6, RDS_PROTOCOL_MAJOR(version), + RDS_PROTOCOL_MINOR(version), (unsigned long long)be64_to_cpu(lguid), - (unsigned long long)be64_to_cpu(fguid)); + (unsigned long long)be64_to_cpu(fguid), dp_cmn->ricpc_dp_toss); /* RDS/IB is not currently netns aware, thus init_net */ conn = rds_conn_create(&init_net, daddr6, saddr6, - &rds_ib_transport, GFP_KERNEL, ifindex); + &rds_ib_transport, dp_cmn->ricpc_dp_toss, + GFP_KERNEL, ifindex); if (IS_ERR(conn)) { rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn)); conn = NULL; @@ -846,7 +855,7 @@ out: if (conn) mutex_unlock(&conn->c_cm_lock); if (err) - rdma_reject(cm_id, NULL, 0); + rdma_reject(cm_id, &err, sizeof(int)); return destroy; } @@ -861,7 +870,7 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6) /* If the peer doesn't do protocol negotiation, we must * default to RDSv3.0 */ - rds_ib_set_protocol(conn, RDS_PROTOCOL_3_0); + rds_ib_set_protocol(conn, RDS_PROTOCOL_4_1); ic->i_flowctl = rds_ib_sysctl_flow_control; /* advertise flow control */ ret = rds_ib_setup_qp(conn); @@ -870,7 +879,8 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6) goto out; } - rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION, + rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, + conn->c_proposed_version, UINT_MAX, UINT_MAX, isv6); ret = rdma_connect(cm_id, &conn_param); if (ret) diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 2f16146e4ec9..d395eec98959 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -986,9 +986,9 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, } else { /* We expect errors as the qp is drained during shutdown */ if (rds_conn_up(conn) || rds_conn_connecting(conn)) - rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n", + rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), disconnecting and reconnecting\n", &conn->c_laddr, &conn->c_faddr, - wc->status, + conn->c_tos, wc->status, ib_wc_status_msg(wc->status)); } diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 4e0c36acf866..09c46f2e97fa 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -305,8 +305,9 @@ void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) /* We expect errors as the qp is drained during shutdown */ if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) { - rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n", - &conn->c_laddr, &conn->c_faddr, wc->status, + rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), disconnecting and reconnecting\n", + &conn->c_laddr, &conn->c_faddr, + conn->c_tos, wc->status, ib_wc_status_msg(wc->status)); } } diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 6b0f57c83a2a..46bce8389066 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -51,6 +51,8 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, struct rds_connection *conn = cm_id->context; struct rds_transport *trans; int ret = 0; + int *err; + u8 len; rdsdebug("conn %p id %p handling event %u (%s)\n", conn, cm_id, event->event, rdma_event_msg(event->event)); @@ -81,6 +83,7 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, break; case RDMA_CM_EVENT_ADDR_RESOLVED: + rdma_set_service_type(cm_id, conn->c_tos); /* XXX do we need to clean up if this fails? */ ret = rdma_resolve_route(cm_id, RDS_RDMA_RESOLVE_TIMEOUT_MS); @@ -106,8 +109,19 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, break; case RDMA_CM_EVENT_REJECTED: + if (!conn) + break; + err = (int *)rdma_consumer_reject_data(cm_id, event, &len); + if (!err || (err && ((*err) == RDS_RDMA_REJ_INCOMPAT))) { + pr_warn("RDS/RDMA: conn <%pI6c, %pI6c> rejected, dropping connection\n", + &conn->c_laddr, &conn->c_faddr); + conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION; + conn->c_tos = 0; + rds_conn_drop(conn); + } rdsdebug("Connection rejected: %s\n", rdma_reject_msg(cm_id, event->status)); + break; /* FALLTHROUGH */ case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_ROUTE_ERROR: diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h index 200d3134aaae..bfafd4a6d827 100644 --- a/net/rds/rdma_transport.h +++ b/net/rds/rdma_transport.h @@ -11,6 +11,12 @@ #define RDS_RDMA_RESOLVE_TIMEOUT_MS 5000 +/* Below reject reason is for legacy interoperability issue with non-linux + * RDS endpoints where older version incompatibility is conveyed via value 1. + * For future version(s), proper encoded reject reason should be be used. + */ +#define RDS_RDMA_REJ_INCOMPAT 1 + int rds_rdma_conn_connect(struct rds_connection *conn); int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event); diff --git a/net/rds/rds.h b/net/rds/rds.h index 4ffe100ff5e6..0d8f67cadd74 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -19,10 +19,13 @@ */ #define RDS_PROTOCOL_3_0 0x0300 #define RDS_PROTOCOL_3_1 0x0301 +#define RDS_PROTOCOL_4_0 0x0400 +#define RDS_PROTOCOL_4_1 0x0401 #define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_1 #define RDS_PROTOCOL_MAJOR(v) ((v) >> 8) #define RDS_PROTOCOL_MINOR(v) ((v) & 255) #define RDS_PROTOCOL(maj, min) (((maj) << 8) | min) +#define RDS_PROTOCOL_COMPAT_VERSION RDS_PROTOCOL_3_1 /* The following ports, 16385, 18634, 18635, are registered with IANA as * the ports to be used for RDS over TCP and UDP. Currently, only RDS over @@ -151,9 +154,13 @@ struct rds_connection { struct rds_cong_map *c_fcong; /* Protocol version */ + unsigned int c_proposed_version; unsigned int c_version; possible_net_t c_net; + /* TOS */ + u8 c_tos; + struct list_head c_map_item; unsigned long c_map_queued; @@ -567,6 +574,7 @@ struct rds_transport { void (*free_mr)(void *trans_private, int invalidate); void (*flush_mrs)(void); bool (*t_unloading)(struct rds_connection *conn); + u8 (*get_tos_map)(u8 tos); }; /* Bind hash table key length. It is the sum of the size of a struct @@ -648,6 +656,7 @@ struct rds_sock { u8 rs_rx_traces; u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; struct rds_msg_zcopy_queue rs_zcookie_queue; + u8 rs_tos; }; static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk) @@ -756,13 +765,14 @@ void rds_conn_exit(void); struct rds_connection *rds_conn_create(struct net *net, const struct in6_addr *laddr, const struct in6_addr *faddr, - struct rds_transport *trans, gfp_t gfp, + struct rds_transport *trans, + u8 tos, gfp_t gfp, int dev_if); struct rds_connection *rds_conn_create_outgoing(struct net *net, const struct in6_addr *laddr, const struct in6_addr *faddr, struct rds_transport *trans, - gfp_t gfp, int dev_if); + u8 tos, gfp_t gfp, int dev_if); void rds_conn_shutdown(struct rds_conn_path *cpath); void rds_conn_destroy(struct rds_connection *conn); void rds_conn_drop(struct rds_connection *conn); diff --git a/net/rds/recv.c b/net/rds/recv.c index 6bb6b16ca270..853de4876088 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -782,6 +782,7 @@ void rds_inc_info_copy(struct rds_incoming *inc, minfo.seq = be64_to_cpu(inc->i_hdr.h_sequence); minfo.len = be32_to_cpu(inc->i_hdr.h_len); + minfo.tos = inc->i_conn->c_tos; if (flip) { minfo.laddr = daddr; diff --git a/net/rds/send.c b/net/rds/send.c index fd8b687d5c05..166dd578c1cc 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1277,12 +1277,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) /* rds_conn_create has a spinlock that runs with IRQ off. * Caching the conn in the socket helps a lot. */ - if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr)) + if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr) && + rs->rs_tos == rs->rs_conn->c_tos) { conn = rs->rs_conn; - else { + } else { conn = rds_conn_create_outgoing(sock_net(sock->sk), &rs->rs_bound_addr, &daddr, - rs->rs_transport, + rs->rs_transport, rs->rs_tos, sock->sk->sk_allocation, scope_id); if (IS_ERR(conn)) { diff --git a/net/rds/tcp.c b/net/rds/tcp.c index c16f0a362c32..fd2694174607 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -267,6 +267,7 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, tsinfo.last_sent_nxt = tc->t_last_sent_nxt; tsinfo.last_expected_una = tc->t_last_expected_una; tsinfo.last_seen_una = tc->t_last_seen_una; + tsinfo.tos = tc->t_cpath->cp_conn->c_tos; rds_info_copy(iter, &tsinfo, sizeof(tsinfo)); } @@ -452,6 +453,12 @@ static void rds_tcp_destroy_conns(void) static void rds_tcp_exit(void); +static u8 rds_tcp_get_tos_map(u8 tos) +{ + /* all user tos mapped to default 0 for TCP transport */ + return 0; +} + struct rds_transport rds_tcp_transport = { .laddr_check = rds_tcp_laddr_check, .xmit_path_prepare = rds_tcp_xmit_path_prepare, @@ -466,6 +473,7 @@ struct rds_transport rds_tcp_transport = { .inc_free = rds_tcp_inc_free, .stats_info_copy = rds_tcp_stats_info_copy, .exit = rds_tcp_exit, + .get_tos_map = rds_tcp_get_tos_map, .t_owner = THIS_MODULE, .t_name = "tcp", .t_type = RDS_TRANS_TCP, diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index c12203f646da..810a3a49e947 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -200,7 +200,7 @@ int rds_tcp_accept_one(struct socket *sock) conn = rds_conn_create(sock_net(sock->sk), my_addr, peer_addr, - &rds_tcp_transport, GFP_KERNEL, dev_if); + &rds_tcp_transport, 0, GFP_KERNEL, dev_if); if (IS_ERR(conn)) { ret = PTR_ERR(conn); diff --git a/net/rds/threads.c b/net/rds/threads.c index e64f9e4c3cda..32dc50f0a303 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -93,6 +93,7 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr) queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); } rcu_read_unlock(); + cp->cp_conn->c_proposed_version = RDS_PROTOCOL_VERSION; } EXPORT_SYMBOL_GPL(rds_connect_path_complete); |