summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2022-03-23 10:52:56 -0700
committerJakub Kicinski <kuba@kernel.org>2022-03-23 10:53:49 -0700
commit89695196f0ba78a17453f9616355f2ca6b293402 (patch)
tree7a0f44d06e5b83002899667841baa9a3598bf819 /net
parent764f4eb6846f5475f1244767d24d25dd86528a4a (diff)
parentf92fcb5c00dc924a4661d5bf68de7937040f26b8 (diff)
downloadlinux-89695196f0ba78a17453f9616355f2ca6b293402.tar.bz2
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Merge in overtime fixes, no conflicts. Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
-rw-r--r--net/ax25/af_ax25.c18
-rw-r--r--net/ax25/ax25_subr.c20
-rw-r--r--net/dsa/dsa2.c5
-rw-r--r--net/ipv4/route.c18
-rw-r--r--net/ipv4/tcp_output.c5
-rw-r--r--net/mptcp/protocol.c1
-rw-r--r--net/netfilter/nf_flow_table_inet.c17
-rw-r--r--net/netfilter/nf_flow_table_ip.c18
-rw-r--r--net/netfilter/nf_tables_api.c22
-rw-r--r--net/netfilter/nf_tables_core.c2
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/openvswitch/conntrack.c118
-rw-r--r--net/tipc/socket.c3
-rw-r--r--net/unix/af_unix.c16
-rw-r--r--net/xdp/xsk.c69
15 files changed, 208 insertions, 126 deletions
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 6bd097180772..992b6e5d85d7 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -89,18 +89,20 @@ again:
sk = s->sk;
if (!sk) {
spin_unlock_bh(&ax25_list_lock);
- s->ax25_dev = NULL;
ax25_disconnect(s, ENETUNREACH);
+ s->ax25_dev = NULL;
spin_lock_bh(&ax25_list_lock);
goto again;
}
sock_hold(sk);
spin_unlock_bh(&ax25_list_lock);
lock_sock(sk);
- s->ax25_dev = NULL;
- dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker);
- ax25_dev_put(ax25_dev);
ax25_disconnect(s, ENETUNREACH);
+ s->ax25_dev = NULL;
+ if (sk->sk_socket) {
+ dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker);
+ ax25_dev_put(ax25_dev);
+ }
release_sock(sk);
spin_lock_bh(&ax25_list_lock);
sock_put(sk);
@@ -979,14 +981,20 @@ static int ax25_release(struct socket *sock)
{
struct sock *sk = sock->sk;
ax25_cb *ax25;
+ ax25_dev *ax25_dev;
if (sk == NULL)
return 0;
sock_hold(sk);
- sock_orphan(sk);
lock_sock(sk);
+ sock_orphan(sk);
ax25 = sk_to_ax25(sk);
+ ax25_dev = ax25->ax25_dev;
+ if (ax25_dev) {
+ dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker);
+ ax25_dev_put(ax25_dev);
+ }
if (sk->sk_type == SOCK_SEQPACKET) {
switch (ax25->state) {
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index 15ab812c4fe4..3a476e4f6cd0 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -261,12 +261,20 @@ void ax25_disconnect(ax25_cb *ax25, int reason)
{
ax25_clear_queues(ax25);
- if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY))
- ax25_stop_heartbeat(ax25);
- ax25_stop_t1timer(ax25);
- ax25_stop_t2timer(ax25);
- ax25_stop_t3timer(ax25);
- ax25_stop_idletimer(ax25);
+ if (reason == ENETUNREACH) {
+ del_timer_sync(&ax25->timer);
+ del_timer_sync(&ax25->t1timer);
+ del_timer_sync(&ax25->t2timer);
+ del_timer_sync(&ax25->t3timer);
+ del_timer_sync(&ax25->idletimer);
+ } else {
+ if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY))
+ ax25_stop_heartbeat(ax25);
+ ax25_stop_t1timer(ax25);
+ ax25_stop_t2timer(ax25);
+ ax25_stop_t3timer(ax25);
+ ax25_stop_idletimer(ax25);
+ }
ax25->state = AX25_STATE_0;
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 47b334226f4e..ca6af86964bc 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -1786,6 +1786,10 @@ void dsa_switch_shutdown(struct dsa_switch *ds)
struct dsa_port *dp;
mutex_lock(&dsa2_mutex);
+
+ if (!ds->setup)
+ goto out;
+
rtnl_lock();
dsa_switch_for_each_user_port(dp, ds) {
@@ -1802,6 +1806,7 @@ void dsa_switch_shutdown(struct dsa_switch *ds)
dp->master->dsa_ptr = NULL;
rtnl_unlock();
+out:
mutex_unlock(&dsa2_mutex);
}
EXPORT_SYMBOL_GPL(dsa_switch_shutdown);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 63f3256a407d..98c6f3429593 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -498,6 +498,15 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
}
EXPORT_SYMBOL(__ip_select_ident);
+static void ip_rt_fix_tos(struct flowi4 *fl4)
+{
+ __u8 tos = RT_FL_TOS(fl4);
+
+ fl4->flowi4_tos = tos & IPTOS_RT_MASK;
+ fl4->flowi4_scope = tos & RTO_ONLINK ?
+ RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
+}
+
static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
const struct sock *sk,
const struct iphdr *iph,
@@ -823,6 +832,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
rt = (struct rtable *) dst;
__build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
+ ip_rt_fix_tos(&fl4);
__ip_do_redirect(rt, skb, &fl4, true);
}
@@ -1047,6 +1057,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct flowi4 fl4;
ip_rt_build_flow_key(&fl4, sk, skb);
+ ip_rt_fix_tos(&fl4);
/* Don't make lookup fail for bridged encapsulations */
if (skb && netif_is_any_bridge_port(skb->dev))
@@ -1121,6 +1132,8 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
goto out;
new = true;
+ } else {
+ ip_rt_fix_tos(&fl4);
}
__ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu);
@@ -2609,7 +2622,6 @@ add:
struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
const struct sk_buff *skb)
{
- __u8 tos = RT_FL_TOS(fl4);
struct fib_result res = {
.type = RTN_UNSPEC,
.fi = NULL,
@@ -2619,9 +2631,7 @@ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
struct rtable *rth;
fl4->flowi4_iif = LOOPBACK_IFINDEX;
- fl4->flowi4_tos = tos & IPTOS_RT_MASK;
- fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
- RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
+ ip_rt_fix_tos(fl4);
rcu_read_lock();
rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 81aaa7da3e8c..9ede847f4199 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3730,6 +3730,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
*/
static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
{
+ struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_fastopen_request *fo = tp->fastopen_req;
int space, err = 0;
@@ -3744,8 +3745,10 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
* private TCP options. The cost is reduced data space in SYN :(
*/
tp->rx_opt.mss_clamp = tcp_mss_clamp(tp, tp->rx_opt.mss_clamp);
+ /* Sync mss_cache after updating the mss_clamp */
+ tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
- space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
+ space = __tcp_mtu_to_mss(sk, icsk->icsk_pmtu_cookie) -
MAX_TCP_OPTION_SPACE;
space = min_t(size_t, space, fo->size);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 101aeebeb9eb..0cbea3b6d0a4 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1199,6 +1199,7 @@ static struct sk_buff *__mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, g
tcp_skb_entail(ssk, skb);
return skb;
}
+ tcp_skb_tsorted_anchor_cleanup(skb);
kfree_skb(skb);
return NULL;
}
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index 5c57ade6bd05..0ccabf3fa6aa 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -6,12 +6,29 @@
#include <linux/rhashtable.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_tables.h>
+#include <linux/if_vlan.h>
static unsigned int
nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
+ struct vlan_ethhdr *veth;
+ __be16 proto;
+
switch (skb->protocol) {
+ case htons(ETH_P_8021Q):
+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
+ proto = veth->h_vlan_encapsulated_proto;
+ break;
+ case htons(ETH_P_PPP_SES):
+ proto = nf_flow_pppoe_proto(skb);
+ break;
+ default:
+ proto = skb->protocol;
+ break;
+ }
+
+ switch (proto) {
case htons(ETH_P_IP):
return nf_flow_offload_ip_hook(priv, skb, state);
case htons(ETH_P_IPV6):
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 010b0b4eb05d..32c0eb1b4821 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -8,8 +8,6 @@
#include <linux/ipv6.h>
#include <linux/netdevice.h>
#include <linux/if_ether.h>
-#include <linux/if_pppox.h>
-#include <linux/ppp_defs.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
@@ -260,22 +258,6 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
return NF_STOLEN;
}
-static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
-{
- __be16 proto;
-
- proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
- sizeof(struct pppoe_hdr)));
- switch (proto) {
- case htons(PPP_IP):
- return htons(ETH_P_IP);
- case htons(PPP_IPV6):
- return htons(ETH_P_IPV6);
- }
-
- return 0;
-}
-
static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
u32 *offset)
{
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index a97e112fe406..c55ccd3cf2f8 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -9363,17 +9363,23 @@ int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
}
EXPORT_SYMBOL_GPL(nft_parse_u32_check);
-static unsigned int nft_parse_register(const struct nlattr *attr)
+static unsigned int nft_parse_register(const struct nlattr *attr, u32 *preg)
{
unsigned int reg;
reg = ntohl(nla_get_be32(attr));
switch (reg) {
case NFT_REG_VERDICT...NFT_REG_4:
- return reg * NFT_REG_SIZE / NFT_REG32_SIZE;
+ *preg = reg * NFT_REG_SIZE / NFT_REG32_SIZE;
+ break;
+ case NFT_REG32_00...NFT_REG32_15:
+ *preg = reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
+ break;
default:
- return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
+ return -ERANGE;
}
+
+ return 0;
}
/**
@@ -9415,7 +9421,10 @@ int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len)
u32 reg;
int err;
- reg = nft_parse_register(attr);
+ err = nft_parse_register(attr, &reg);
+ if (err < 0)
+ return err;
+
err = nft_validate_register_load(reg, len);
if (err < 0)
return err;
@@ -9470,7 +9479,10 @@ int nft_parse_register_store(const struct nft_ctx *ctx,
int err;
u32 reg;
- reg = nft_parse_register(attr);
+ err = nft_parse_register(attr, &reg);
+ if (err < 0)
+ return err;
+
err = nft_validate_register_store(ctx, reg, data, type, len);
if (err < 0)
return err;
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index c6c05b2412c4..53f40e473855 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -215,7 +215,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
const struct nft_rule_dp *rule, *last_rule;
const struct net *net = nft_net(pkt);
const struct nft_expr *expr, *last;
- struct nft_regs regs;
+ struct nft_regs regs = {};
unsigned int stackptr = 0;
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
bool genbit = READ_ONCE(net->nft.gencursor);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 7b344035bfe3..47a876ccd288 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -159,6 +159,8 @@ EXPORT_SYMBOL(do_trace_netlink_extack);
static inline u32 netlink_group_mask(u32 group)
{
+ if (group > 32)
+ return 0;
return group ? 1 << (group - 1) : 0;
}
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index c07afff57dd3..4a947c13c813 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -734,6 +734,57 @@ static bool skb_nfct_cached(struct net *net,
}
#if IS_ENABLED(CONFIG_NF_NAT)
+static void ovs_nat_update_key(struct sw_flow_key *key,
+ const struct sk_buff *skb,
+ enum nf_nat_manip_type maniptype)
+{
+ if (maniptype == NF_NAT_MANIP_SRC) {
+ __be16 src;
+
+ key->ct_state |= OVS_CS_F_SRC_NAT;
+ if (key->eth.type == htons(ETH_P_IP))
+ key->ipv4.addr.src = ip_hdr(skb)->saddr;
+ else if (key->eth.type == htons(ETH_P_IPV6))
+ memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr,
+ sizeof(key->ipv6.addr.src));
+ else
+ return;
+
+ if (key->ip.proto == IPPROTO_UDP)
+ src = udp_hdr(skb)->source;
+ else if (key->ip.proto == IPPROTO_TCP)
+ src = tcp_hdr(skb)->source;
+ else if (key->ip.proto == IPPROTO_SCTP)
+ src = sctp_hdr(skb)->source;
+ else
+ return;
+
+ key->tp.src = src;
+ } else {
+ __be16 dst;
+
+ key->ct_state |= OVS_CS_F_DST_NAT;
+ if (key->eth.type == htons(ETH_P_IP))
+ key->ipv4.addr.dst = ip_hdr(skb)->daddr;
+ else if (key->eth.type == htons(ETH_P_IPV6))
+ memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr,
+ sizeof(key->ipv6.addr.dst));
+ else
+ return;
+
+ if (key->ip.proto == IPPROTO_UDP)
+ dst = udp_hdr(skb)->dest;
+ else if (key->ip.proto == IPPROTO_TCP)
+ dst = tcp_hdr(skb)->dest;
+ else if (key->ip.proto == IPPROTO_SCTP)
+ dst = sctp_hdr(skb)->dest;
+ else
+ return;
+
+ key->tp.dst = dst;
+ }
+}
+
/* Modelled after nf_nat_ipv[46]_fn().
* range is only used for new, uninitialized NAT state.
* Returns either NF_ACCEPT or NF_DROP.
@@ -741,7 +792,7 @@ static bool skb_nfct_cached(struct net *net,
static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype)
+ enum nf_nat_manip_type maniptype, struct sw_flow_key *key)
{
int hooknum, nh_off, err = NF_ACCEPT;
@@ -813,58 +864,11 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
push:
skb_push_rcsum(skb, nh_off);
- return err;
-}
-
-static void ovs_nat_update_key(struct sw_flow_key *key,
- const struct sk_buff *skb,
- enum nf_nat_manip_type maniptype)
-{
- if (maniptype == NF_NAT_MANIP_SRC) {
- __be16 src;
-
- key->ct_state |= OVS_CS_F_SRC_NAT;
- if (key->eth.type == htons(ETH_P_IP))
- key->ipv4.addr.src = ip_hdr(skb)->saddr;
- else if (key->eth.type == htons(ETH_P_IPV6))
- memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr,
- sizeof(key->ipv6.addr.src));
- else
- return;
-
- if (key->ip.proto == IPPROTO_UDP)
- src = udp_hdr(skb)->source;
- else if (key->ip.proto == IPPROTO_TCP)
- src = tcp_hdr(skb)->source;
- else if (key->ip.proto == IPPROTO_SCTP)
- src = sctp_hdr(skb)->source;
- else
- return;
-
- key->tp.src = src;
- } else {
- __be16 dst;
-
- key->ct_state |= OVS_CS_F_DST_NAT;
- if (key->eth.type == htons(ETH_P_IP))
- key->ipv4.addr.dst = ip_hdr(skb)->daddr;
- else if (key->eth.type == htons(ETH_P_IPV6))
- memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr,
- sizeof(key->ipv6.addr.dst));
- else
- return;
-
- if (key->ip.proto == IPPROTO_UDP)
- dst = udp_hdr(skb)->dest;
- else if (key->ip.proto == IPPROTO_TCP)
- dst = tcp_hdr(skb)->dest;
- else if (key->ip.proto == IPPROTO_SCTP)
- dst = sctp_hdr(skb)->dest;
- else
- return;
+ /* Update the flow key if NAT successful. */
+ if (err == NF_ACCEPT)
+ ovs_nat_update_key(key, skb, maniptype);
- key->tp.dst = dst;
- }
+ return err;
}
/* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */
@@ -906,7 +910,7 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
} else {
return NF_ACCEPT; /* Connection is not NATed. */
}
- err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype);
+ err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype, key);
if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
if (ct->status & IPS_SRC_NAT) {
@@ -916,17 +920,13 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
maniptype = NF_NAT_MANIP_SRC;
err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range,
- maniptype);
+ maniptype, key);
} else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
err = ovs_ct_nat_execute(skb, ct, ctinfo, NULL,
- NF_NAT_MANIP_SRC);
+ NF_NAT_MANIP_SRC, key);
}
}
- /* Mark NAT done if successful and update the flow key. */
- if (err == NF_ACCEPT)
- ovs_nat_update_key(key, skb, maniptype);
-
return err;
}
#else /* !CONFIG_NF_NAT */
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 7545321c3440..17f8c523e33b 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2852,7 +2852,8 @@ static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list)
/* Try again later if dest link is congested */
if (tsk->cong_link_cnt) {
- sk_reset_timer(sk, &sk->sk_timer, msecs_to_jiffies(100));
+ sk_reset_timer(sk, &sk->sk_timer,
+ jiffies + msecs_to_jiffies(100));
return;
}
/* Prepare SYN for retransmit */
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 4247c4134f31..e71a312faa1e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2084,7 +2084,7 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other
if (ousk->oob_skb)
consume_skb(ousk->oob_skb);
- ousk->oob_skb = skb;
+ WRITE_ONCE(ousk->oob_skb, skb);
scm_stat_add(other, skb);
skb_queue_tail(&other->sk_receive_queue, skb);
@@ -2602,9 +2602,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
oob_skb = u->oob_skb;
- if (!(state->flags & MSG_PEEK)) {
- u->oob_skb = NULL;
- }
+ if (!(state->flags & MSG_PEEK))
+ WRITE_ONCE(u->oob_skb, NULL);
unix_state_unlock(sk);
@@ -2639,7 +2638,7 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
skb = NULL;
} else if (sock_flag(sk, SOCK_URGINLINE)) {
if (!(flags & MSG_PEEK)) {
- u->oob_skb = NULL;
+ WRITE_ONCE(u->oob_skb, NULL);
consume_skb(skb);
}
} else if (!(flags & MSG_PEEK)) {
@@ -3094,11 +3093,10 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCATMARK:
{
struct sk_buff *skb;
- struct unix_sock *u = unix_sk(sk);
int answ = 0;
skb = skb_peek(&sk->sk_receive_queue);
- if (skb && skb == u->oob_skb)
+ if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
answ = 1;
err = put_user(answ, (int __user *)arg);
}
@@ -3139,6 +3137,10 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
mask |= EPOLLIN | EPOLLRDNORM;
if (sk_is_readable(sk))
mask |= EPOLLIN | EPOLLRDNORM;
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (READ_ONCE(unix_sk(sk)->oob_skb))
+ mask |= EPOLLPRI;
+#endif
/* Connection-based need to check for termination and startup */
if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 2abd64e4d589..2c34caee0fd1 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -402,18 +402,8 @@ EXPORT_SYMBOL(xsk_tx_peek_release_desc_batch);
static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
{
struct net_device *dev = xs->dev;
- int err;
-
- rcu_read_lock();
- err = dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
- rcu_read_unlock();
-
- return err;
-}
-static int xsk_zc_xmit(struct xdp_sock *xs)
-{
- return xsk_wakeup(xs, XDP_WAKEUP_TX);
+ return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
}
static void xsk_destruct_skb(struct sk_buff *skb)
@@ -532,6 +522,12 @@ static int xsk_generic_xmit(struct sock *sk)
mutex_lock(&xs->mutex);
+ /* Since we dropped the RCU read lock, the socket state might have changed. */
+ if (unlikely(!xsk_is_bound(xs))) {
+ err = -ENXIO;
+ goto out;
+ }
+
if (xs->queue_id >= xs->dev->real_num_tx_queues)
goto out;
@@ -595,16 +591,26 @@ out:
return err;
}
-static int __xsk_sendmsg(struct sock *sk)
+static int xsk_xmit(struct sock *sk)
{
struct xdp_sock *xs = xdp_sk(sk);
+ int ret;
if (unlikely(!(xs->dev->flags & IFF_UP)))
return -ENETDOWN;
if (unlikely(!xs->tx))
return -ENOBUFS;
- return xs->zc ? xsk_zc_xmit(xs) : xsk_generic_xmit(sk);
+ if (xs->zc)
+ return xsk_wakeup(xs, XDP_WAKEUP_TX);
+
+ /* Drop the RCU lock since the SKB path might sleep. */
+ rcu_read_unlock();
+ ret = xsk_generic_xmit(sk);
+ /* Reaquire RCU lock before going into common code. */
+ rcu_read_lock();
+
+ return ret;
}
static bool xsk_no_wakeup(struct sock *sk)
@@ -618,7 +624,7 @@ static bool xsk_no_wakeup(struct sock *sk)
#endif
}
-static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
+static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
{
bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
struct sock *sk = sock->sk;
@@ -638,11 +644,22 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
pool = xs->pool;
if (pool->cached_need_wakeup & XDP_WAKEUP_TX)
- return __xsk_sendmsg(sk);
+ return xsk_xmit(sk);
return 0;
}
-static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags)
+static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
+{
+ int ret;
+
+ rcu_read_lock();
+ ret = __xsk_sendmsg(sock, m, total_len);
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static int __xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags)
{
bool need_wait = !(flags & MSG_DONTWAIT);
struct sock *sk = sock->sk;
@@ -668,6 +685,17 @@ static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int fl
return 0;
}
+static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags)
+{
+ int ret;
+
+ rcu_read_lock();
+ ret = __xsk_recvmsg(sock, m, len, flags);
+ rcu_read_unlock();
+
+ return ret;
+}
+
static __poll_t xsk_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait)
{
@@ -678,8 +706,11 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
sock_poll_wait(file, sock, wait);
- if (unlikely(!xsk_is_bound(xs)))
+ rcu_read_lock();
+ if (unlikely(!xsk_is_bound(xs))) {
+ rcu_read_unlock();
return mask;
+ }
pool = xs->pool;
@@ -688,7 +719,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
xsk_wakeup(xs, pool->cached_need_wakeup);
else
/* Poll needs to drive Tx also in copy mode */
- __xsk_sendmsg(sk);
+ xsk_xmit(sk);
}
if (xs->rx && !xskq_prod_is_empty(xs->rx))
@@ -696,6 +727,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
if (xs->tx && xsk_tx_writeable(xs))
mask |= EPOLLOUT | EPOLLWRNORM;
+ rcu_read_unlock();
return mask;
}
@@ -727,7 +759,6 @@ static void xsk_unbind_dev(struct xdp_sock *xs)
/* Wait for driver to stop using the xdp socket. */
xp_del_xsk(xs->pool, xs);
- xs->dev = NULL;
synchronize_net();
dev_put(dev);
}