summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-05 11:23:45 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-05 11:23:45 -0800
commit9d82f5eb3376cbae96ad36a063a9390de1694546 (patch)
treed52daee3296d28455aff25c98b23fffab5282cd8 /net
parent14365ea2b868c96e18da73a3f454c7bcdb0627c5 (diff)
parenta409caecb2e17fc475533738dd1c69b32e13fe09 (diff)
downloadlinux-9d82f5eb3376cbae96ad36a063a9390de1694546.tar.bz2
MMerge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: 1) Stretch ACKs can kill performance with Reno and CUBIC congestion control, largely due to LRO and GRO. Fix from Neal Cardwell. 2) Fix userland breakage because we accidently emit zero length netlink messages from the bridging code. From Roopa Prabhu. 3) Carry handling in generic csum_tcpudp_nofold is broken, fix from Karl Beldan. 4) Remove bogus dev_set_net() calls from CAIF driver, from Nicolas Dichtel. 5) Make sure PPP deflation never returns a length greater then the output buffer, otherwise we overflow and trigger skb_over_panic(). Fix from Florian Westphal. 6) COSA driver needs VIRT_TO_BUS Kconfig dependencies, from Arnd Bergmann. 7) Don't increase route cached MTU on datagram too big ICMPs. From Li Wei. 8) Fix error path leaks in nf_tables, from Pablo Neira Ayuso. 9) Fix bitmask handling regression in netlink that broke things like acpi userland tools. From Pablo Neira Ayuso. 10) Wrong header pointer passed to param_type2af() in SCTP code, from Saran Maruti Ramanara. 11) Stacked vlans not handled correctly by vlan_get_protocol(), from Toshiaki Makita. 12) Add missing DMA memory barrier to xgene driver, from Iyappan Subramanian. 13) Fix crash in rate estimators, from Eric Dumazet. 14) We've been adding various workarounds, one after another, for the change which added the per-net tcp_sock. It was meant to reduce socket contention but added lots of problems. Reduce this instead to a proper per-cpu socket and that rids us of all the daemons. From Eric Dumazet. 15) Fix memory corruption and OOPS in mlx4 driver, from Jack Morgenstein. 16) When we disabled UFO in the virtio_net device, it introduces some serious performance regressions. The orignal problem was IPV6 fragment ID generation, so fix that properly instead. From Vlad Yasevich. 17) sr9700 driver build breaks on xtensa because it defines macros with the same name as those used by the arch code. Use more unique names. From Chen Gang. 18) Fix endianness in new virio 1.0 mode of the vhost net driver, from Michael S Tsirkin. 19) Several sysctls were setting the maxlen attribute incorrectly, from Sasha Levin. 20) Don't accept an FQ scheduler quantum of zero, that leads to crashes. From Kenneth Klette Jonassen. 21) Fix dumping of non-existing actions in the packet scheduler classifier. From Ignacy Gawędzki. 22) Return the write work_done value when doing TX work in the qlcnic driver. 23) ip6gre_err accesses the info field with the wrong endianness, from Sabrina Dubroca. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (54 commits) sit: fix some __be16/u16 mismatches ipv6: fix sparse errors in ip6_make_flowlabel() net: remove some sparse warnings flow_keys: n_proto type should be __be16 ip6_gre: fix endianness errors in ip6gre_err qlcnic: Fix NAPI poll routine for Tx completion amd-xgbe: Set RSS enablement based on hardware features amd-xgbe: Adjust for zero-based traffic class count cls_api.c: Fix dumping of non-existing actions' stats. pkt_sched: fq: avoid hang when quantum 0 net: rds: use correct size for max unacked packets and bytes vhost/net: fix up num_buffers endian-ness gianfar: correct the bad expression while writing bit-pattern net: usb: sr9700: Use 'SR_' prefix for the common register macros Revert "drivers/net: Disable UFO through virtio" Revert "drivers/net, ipv6: Select IPv6 fragment idents for virtio UFO packets" ipv6: Select fragment id during UFO segmentation if not set. xen-netback: stop the guest rx thread after a fatal error net/mlx4_core: Fix kernel Oops (mem corruption) when working with more than 80 VFs isdn: off by one in connect_res() ...
Diffstat (limited to 'net')
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c29
-rw-r--r--net/caif/chnl_net.c1
-rw-r--r--net/core/dev.c37
-rw-r--r--net/core/rtnetlink.c6
-rw-r--r--net/ipv4/ip_output.c29
-rw-r--r--net/ipv4/route.c3
-rw-r--r--net/ipv4/tcp_bic.c2
-rw-r--r--net/ipv4/tcp_cong.c32
-rw-r--r--net/ipv4/tcp_cubic.c39
-rw-r--r--net/ipv4/tcp_ipv4.c37
-rw-r--r--net/ipv4/tcp_scalable.c3
-rw-r--r--net/ipv4/tcp_veno.c2
-rw-r--r--net/ipv4/tcp_yeah.c2
-rw-r--r--net/ipv6/ip6_gre.c4
-rw-r--r--net/ipv6/ip6_output.c14
-rw-r--r--net/ipv6/output_core.c41
-rw-r--r--net/ipv6/sit.c8
-rw-r--r--net/ipv6/udp_offload.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c33
-rw-r--r--net/netfilter/nf_tables_api.c28
-rw-r--r--net/netfilter/nft_masq.c26
-rw-r--r--net/netfilter/nft_nat.c40
-rw-r--r--net/netfilter/nft_redir.c25
-rw-r--r--net/netlink/af_netlink.c4
-rw-r--r--net/rds/sysctl.c4
-rw-r--r--net/sched/cls_api.c7
-rw-r--r--net/sched/sch_fq.c10
-rw-r--r--net/sctp/sm_make_chunk.c2
28 files changed, 274 insertions, 204 deletions
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index b0330aecbf97..3244aead0926 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -265,22 +265,12 @@ out:
data[NFT_REG_VERDICT].verdict = NF_DROP;
}
-static int nft_reject_bridge_validate_hooks(const struct nft_chain *chain)
+static int nft_reject_bridge_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
{
- struct nft_base_chain *basechain;
-
- if (chain->flags & NFT_BASE_CHAIN) {
- basechain = nft_base_chain(chain);
-
- switch (basechain->ops[0].hooknum) {
- case NF_BR_PRE_ROUTING:
- case NF_BR_LOCAL_IN:
- break;
- default:
- return -EOPNOTSUPP;
- }
- }
- return 0;
+ return nft_chain_validate_hooks(ctx->chain, (1 << NF_BR_PRE_ROUTING) |
+ (1 << NF_BR_LOCAL_IN));
}
static int nft_reject_bridge_init(const struct nft_ctx *ctx,
@@ -290,7 +280,7 @@ static int nft_reject_bridge_init(const struct nft_ctx *ctx,
struct nft_reject *priv = nft_expr_priv(expr);
int icmp_code, err;
- err = nft_reject_bridge_validate_hooks(ctx->chain);
+ err = nft_reject_bridge_validate(ctx, expr, NULL);
if (err < 0)
return err;
@@ -341,13 +331,6 @@ nla_put_failure:
return -1;
}
-static int nft_reject_bridge_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
-{
- return nft_reject_bridge_validate_hooks(ctx->chain);
-}
-
static struct nft_expr_type nft_reject_bridge_type;
static const struct nft_expr_ops nft_reject_bridge_ops = {
.type = &nft_reject_bridge_type,
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 4589ff67bfa9..67a4a36febd1 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -470,7 +470,6 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev,
ASSERT_RTNL();
caifdev = netdev_priv(dev);
caif_netlink_parms(data, &caifdev->conn_req);
- dev_net_set(caifdev->netdev, src_net);
ret = register_netdevice(dev);
if (ret)
diff --git a/net/core/dev.c b/net/core/dev.c
index 171420e75b03..7fe82929f509 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2352,7 +2352,6 @@ EXPORT_SYMBOL(skb_checksum_help);
__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
{
- unsigned int vlan_depth = skb->mac_len;
__be16 type = skb->protocol;
/* Tunnel gso handlers can set protocol to ethernet. */
@@ -2366,35 +2365,7 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
type = eth->h_proto;
}
- /* if skb->protocol is 802.1Q/AD then the header should already be
- * present at mac_len - VLAN_HLEN (if mac_len > 0), or at
- * ETH_HLEN otherwise
- */
- if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
- if (vlan_depth) {
- if (WARN_ON(vlan_depth < VLAN_HLEN))
- return 0;
- vlan_depth -= VLAN_HLEN;
- } else {
- vlan_depth = ETH_HLEN;
- }
- do {
- struct vlan_hdr *vh;
-
- if (unlikely(!pskb_may_pull(skb,
- vlan_depth + VLAN_HLEN)))
- return 0;
-
- vh = (struct vlan_hdr *)(skb->data + vlan_depth);
- type = vh->h_vlan_encapsulated_proto;
- vlan_depth += VLAN_HLEN;
- } while (type == htons(ETH_P_8021Q) ||
- type == htons(ETH_P_8021AD));
- }
-
- *depth = vlan_depth;
-
- return type;
+ return __vlan_get_protocol(skb, type, depth);
}
/**
@@ -5323,7 +5294,7 @@ void netdev_upper_dev_unlink(struct net_device *dev,
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
-void netdev_adjacent_add_links(struct net_device *dev)
+static void netdev_adjacent_add_links(struct net_device *dev)
{
struct netdev_adjacent *iter;
@@ -5348,7 +5319,7 @@ void netdev_adjacent_add_links(struct net_device *dev)
}
}
-void netdev_adjacent_del_links(struct net_device *dev)
+static void netdev_adjacent_del_links(struct net_device *dev)
{
struct netdev_adjacent *iter;
@@ -6656,7 +6627,7 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
if (!queue)
return NULL;
netdev_init_one_queue(dev, queue, NULL);
- queue->qdisc = &noop_qdisc;
+ RCU_INIT_POINTER(queue->qdisc, &noop_qdisc);
queue->qdisc_sleeping = &noop_qdisc;
rcu_assign_pointer(dev->ingress_queue, queue);
#endif
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9cf6fe9ddc0c..446cbaf81185 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2895,12 +2895,16 @@ static int rtnl_bridge_notify(struct net_device *dev, u16 flags)
goto errout;
}
+ if (!skb->len)
+ goto errout;
+
rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
return 0;
errout:
WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
- rtnl_set_sk_err(net, RTNLGRP_LINK, err);
+ if (err)
+ rtnl_set_sk_err(net, RTNLGRP_LINK, err);
return err;
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b50861b22b6b..c373c0708d97 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1506,23 +1506,8 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset,
/*
* Generic function to send a packet as reply to another packet.
* Used to send some TCP resets/acks so far.
- *
- * Use a fake percpu inet socket to avoid false sharing and contention.
*/
-static DEFINE_PER_CPU(struct inet_sock, unicast_sock) = {
- .sk = {
- .__sk_common = {
- .skc_refcnt = ATOMIC_INIT(1),
- },
- .sk_wmem_alloc = ATOMIC_INIT(1),
- .sk_allocation = GFP_ATOMIC,
- .sk_flags = (1UL << SOCK_USE_WRITE_QUEUE),
- },
- .pmtudisc = IP_PMTUDISC_WANT,
- .uc_ttl = -1,
-};
-
-void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
+void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
const struct ip_options *sopt,
__be32 daddr, __be32 saddr,
const struct ip_reply_arg *arg,
@@ -1532,9 +1517,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
struct ipcm_cookie ipc;
struct flowi4 fl4;
struct rtable *rt = skb_rtable(skb);
+ struct net *net = sock_net(sk);
struct sk_buff *nskb;
- struct sock *sk;
- struct inet_sock *inet;
int err;
if (__ip_options_echo(&replyopts.opt.opt, skb, sopt))
@@ -1565,15 +1549,11 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
if (IS_ERR(rt))
return;
- inet = &get_cpu_var(unicast_sock);
+ inet_sk(sk)->tos = arg->tos;
- inet->tos = arg->tos;
- sk = &inet->sk;
sk->sk_priority = skb->priority;
sk->sk_protocol = ip_hdr(skb)->protocol;
sk->sk_bound_dev_if = arg->bound_dev_if;
- sock_net_set(sk, net);
- __skb_queue_head_init(&sk->sk_write_queue);
sk->sk_sndbuf = sysctl_wmem_default;
err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base,
len, 0, &ipc, &rt, MSG_DONTWAIT);
@@ -1589,13 +1569,10 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
arg->csumoffset) = csum_fold(csum_add(nskb->csum,
arg->csum));
nskb->ip_summed = CHECKSUM_NONE;
- skb_orphan(nskb);
skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
ip_push_pending_frames(sk, &fl4);
}
out:
- put_cpu_var(unicast_sock);
-
ip_rt_put(rt);
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d58dd0ec3e53..52e1f2bf0ca2 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -966,6 +966,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
if (dst->dev->mtu < mtu)
return;
+ if (rt->rt_pmtu && rt->rt_pmtu < mtu)
+ return;
+
if (mtu < ip_rt_min_pmtu)
mtu = ip_rt_min_pmtu;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index bb395d46a389..c037644eafb7 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -150,7 +150,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
tcp_slow_start(tp, acked);
else {
bictcp_update(ca, tp->snd_cwnd);
- tcp_cong_avoid_ai(tp, ca->cnt);
+ tcp_cong_avoid_ai(tp, ca->cnt, 1);
}
}
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 27ead0dd16bc..8670e68e2ce6 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -291,26 +291,32 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
* ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and
* returns the leftover acks to adjust cwnd in congestion avoidance mode.
*/
-void tcp_slow_start(struct tcp_sock *tp, u32 acked)
+u32 tcp_slow_start(struct tcp_sock *tp, u32 acked)
{
u32 cwnd = tp->snd_cwnd + acked;
if (cwnd > tp->snd_ssthresh)
cwnd = tp->snd_ssthresh + 1;
+ acked -= cwnd - tp->snd_cwnd;
tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
+
+ return acked;
}
EXPORT_SYMBOL_GPL(tcp_slow_start);
-/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w) */
-void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w)
+/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w),
+ * for every packet that was ACKed.
+ */
+void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
{
+ tp->snd_cwnd_cnt += acked;
if (tp->snd_cwnd_cnt >= w) {
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
- tp->snd_cwnd_cnt = 0;
- } else {
- tp->snd_cwnd_cnt++;
+ u32 delta = tp->snd_cwnd_cnt / w;
+
+ tp->snd_cwnd_cnt -= delta * w;
+ tp->snd_cwnd += delta;
}
+ tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp);
}
EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);
@@ -329,11 +335,13 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
return;
/* In "safe" area, increase. */
- if (tp->snd_cwnd <= tp->snd_ssthresh)
- tcp_slow_start(tp, acked);
+ if (tp->snd_cwnd <= tp->snd_ssthresh) {
+ acked = tcp_slow_start(tp, acked);
+ if (!acked)
+ return;
+ }
/* In dangerous area, increase slowly. */
- else
- tcp_cong_avoid_ai(tp, tp->snd_cwnd);
+ tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
}
EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 6b6002416a73..4b276d1ed980 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -93,9 +93,7 @@ struct bictcp {
u32 epoch_start; /* beginning of an epoch */
u32 ack_cnt; /* number of acks */
u32 tcp_cwnd; /* estimated tcp cwnd */
-#define ACK_RATIO_SHIFT 4
-#define ACK_RATIO_LIMIT (32u << ACK_RATIO_SHIFT)
- u16 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */
+ u16 unused;
u8 sample_cnt; /* number of samples to decide curr_rtt */
u8 found; /* the exit point is found? */
u32 round_start; /* beginning of each round */
@@ -114,7 +112,6 @@ static inline void bictcp_reset(struct bictcp *ca)
ca->bic_K = 0;
ca->delay_min = 0;
ca->epoch_start = 0;
- ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
ca->ack_cnt = 0;
ca->tcp_cwnd = 0;
ca->found = 0;
@@ -205,23 +202,30 @@ static u32 cubic_root(u64 a)
/*
* Compute congestion window to use.
*/
-static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
+static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked)
{
u32 delta, bic_target, max_cnt;
u64 offs, t;
- ca->ack_cnt++; /* count the number of ACKs */
+ ca->ack_cnt += acked; /* count the number of ACKed packets */
if (ca->last_cwnd == cwnd &&
(s32)(tcp_time_stamp - ca->last_time) <= HZ / 32)
return;
+ /* The CUBIC function can update ca->cnt at most once per jiffy.
+ * On all cwnd reduction events, ca->epoch_start is set to 0,
+ * which will force a recalculation of ca->cnt.
+ */
+ if (ca->epoch_start && tcp_time_stamp == ca->last_time)
+ goto tcp_friendliness;
+
ca->last_cwnd = cwnd;
ca->last_time = tcp_time_stamp;
if (ca->epoch_start == 0) {
ca->epoch_start = tcp_time_stamp; /* record beginning */
- ca->ack_cnt = 1; /* start counting */
+ ca->ack_cnt = acked; /* start counting */
ca->tcp_cwnd = cwnd; /* syn with cubic */
if (ca->last_max_cwnd <= cwnd) {
@@ -283,6 +287,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
if (ca->last_max_cwnd == 0 && ca->cnt > 20)
ca->cnt = 20; /* increase cwnd 5% per RTT */
+tcp_friendliness:
/* TCP Friendly */
if (tcp_friendliness) {
u32 scale = beta_scale;
@@ -301,7 +306,6 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
}
}
- ca->cnt = (ca->cnt << ACK_RATIO_SHIFT) / ca->delayed_ack;
if (ca->cnt == 0) /* cannot be zero */
ca->cnt = 1;
}
@@ -317,11 +321,12 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (tp->snd_cwnd <= tp->snd_ssthresh) {
if (hystart && after(ack, ca->end_seq))
bictcp_hystart_reset(sk);
- tcp_slow_start(tp, acked);
- } else {
- bictcp_update(ca, tp->snd_cwnd);
- tcp_cong_avoid_ai(tp, ca->cnt);
+ acked = tcp_slow_start(tp, acked);
+ if (!acked)
+ return;
}
+ bictcp_update(ca, tp->snd_cwnd, acked);
+ tcp_cong_avoid_ai(tp, ca->cnt, acked);
}
static u32 bictcp_recalc_ssthresh(struct sock *sk)
@@ -411,20 +416,10 @@ static void hystart_update(struct sock *sk, u32 delay)
*/
static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
{
- const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
u32 delay;
- if (icsk->icsk_ca_state == TCP_CA_Open) {
- u32 ratio = ca->delayed_ack;
-
- ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT;
- ratio += cnt;
-
- ca->delayed_ack = clamp(ratio, 1U, ACK_RATIO_LIMIT);
- }
-
/* Some calls are for duplicates without timetamps */
if (rtt_us < 0)
return;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a3f72d7fc06c..d22f54482bab 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -683,7 +683,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
arg.bound_dev_if = sk->sk_bound_dev_if;
arg.tos = ip_hdr(skb)->tos;
- ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt,
+ ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
+ skb, &TCP_SKB_CB(skb)->header.h4.opt,
ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
&arg, arg.iov[0].iov_len);
@@ -767,7 +768,8 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
if (oif)
arg.bound_dev_if = oif;
arg.tos = tos;
- ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt,
+ ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
+ skb, &TCP_SKB_CB(skb)->header.h4.opt,
ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
&arg, arg.iov[0].iov_len);
@@ -2428,14 +2430,39 @@ struct proto tcp_prot = {
};
EXPORT_SYMBOL(tcp_prot);
+static void __net_exit tcp_sk_exit(struct net *net)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
+ free_percpu(net->ipv4.tcp_sk);
+}
+
static int __net_init tcp_sk_init(struct net *net)
{
+ int res, cpu;
+
+ net->ipv4.tcp_sk = alloc_percpu(struct sock *);
+ if (!net->ipv4.tcp_sk)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ struct sock *sk;
+
+ res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
+ IPPROTO_TCP, net);
+ if (res)
+ goto fail;
+ *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
+ }
net->ipv4.sysctl_tcp_ecn = 2;
return 0;
-}
-static void __net_exit tcp_sk_exit(struct net *net)
-{
+fail:
+ tcp_sk_exit(net);
+
+ return res;
}
static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 6824afb65d93..333bcb2415ff 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -25,7 +25,8 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (tp->snd_cwnd <= tp->snd_ssthresh)
tcp_slow_start(tp, acked);
else
- tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT));
+ tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT),
+ 1);
}
static u32 tcp_scalable_ssthresh(struct sock *sk)
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index a4d2d2d88dca..112151eeee45 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -159,7 +159,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
/* In the "non-congestive state", increase cwnd
* every rtt.
*/
- tcp_cong_avoid_ai(tp, tp->snd_cwnd);
+ tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1);
} else {
/* In the "congestive state", increase cwnd
* every other rtt.
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index cd7273218598..17d35662930d 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -92,7 +92,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
} else {
/* Reno */
- tcp_cong_avoid_ai(tp, tp->snd_cwnd);
+ tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1);
}
/* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt.
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 13cda4c6313b..01ccc28a686f 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -417,7 +417,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (code == ICMPV6_HDR_FIELD)
teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
- if (teli && teli == info - 2) {
+ if (teli && teli == be32_to_cpu(info) - 2) {
tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
if (tel->encap_limit == 0) {
net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
@@ -429,7 +429,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
break;
case ICMPV6_PKT_TOOBIG:
- mtu = info - offset;
+ mtu = be32_to_cpu(info) - offset;
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
t->dev->mtu = mtu;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index ce69a12ae48c..d28f2a2efb32 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -537,20 +537,6 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_secmark(to, from);
}
-static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
-{
- static u32 ip6_idents_hashrnd __read_mostly;
- u32 hash, id;
-
- net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
-
- hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd);
- hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash);
-
- id = ip_idents_reserve(hash, 1);
- fhdr->identification = htonl(id);
-}
-
int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
{
struct sk_buff *frag;
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 97f41a3e68d9..54520a0bd5e3 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -9,6 +9,24 @@
#include <net/addrconf.h>
#include <net/secure_seq.h>
+u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst, struct in6_addr *src)
+{
+ u32 hash, id;
+
+ hash = __ipv6_addr_jhash(dst, hashrnd);
+ hash = __ipv6_addr_jhash(src, hash);
+
+ /* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve,
+ * set the hight order instead thus minimizing possible future
+ * collisions.
+ */
+ id = ip_idents_reserve(hash, 1);
+ if (unlikely(!id))
+ id = 1 << 31;
+
+ return id;
+}
+
/* This function exists only for tap drivers that must support broken
* clients requesting UFO without specifying an IPv6 fragment ID.
*
@@ -22,7 +40,7 @@ void ipv6_proxy_select_ident(struct sk_buff *skb)
static u32 ip6_proxy_idents_hashrnd __read_mostly;
struct in6_addr buf[2];
struct in6_addr *addrs;
- u32 hash, id;
+ u32 id;
addrs = skb_header_pointer(skb,
skb_network_offset(skb) +
@@ -34,14 +52,25 @@ void ipv6_proxy_select_ident(struct sk_buff *skb)
net_get_random_once(&ip6_proxy_idents_hashrnd,
sizeof(ip6_proxy_idents_hashrnd));
- hash = __ipv6_addr_jhash(&addrs[1], ip6_proxy_idents_hashrnd);
- hash = __ipv6_addr_jhash(&addrs[0], hash);
-
- id = ip_idents_reserve(hash, 1);
- skb_shinfo(skb)->ip6_frag_id = htonl(id);
+ id = __ipv6_select_ident(ip6_proxy_idents_hashrnd,
+ &addrs[1], &addrs[0]);
+ skb_shinfo(skb)->ip6_frag_id = id;
}
EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
+void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
+{
+ static u32 ip6_idents_hashrnd __read_mostly;
+ u32 id;
+
+ net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
+
+ id = __ipv6_select_ident(ip6_idents_hashrnd, &rt->rt6i_dst.addr,
+ &rt->rt6i_src.addr);
+ fhdr->identification = htonl(id);
+}
+EXPORT_SYMBOL(ipv6_select_ident);
+
int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
{
u16 offset = sizeof(struct ipv6hdr);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 213546bd6d5d..cdbfe5af6187 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1506,12 +1506,12 @@ static bool ipip6_netlink_encap_parms(struct nlattr *data[],
if (data[IFLA_IPTUN_ENCAP_SPORT]) {
ret = true;
- ipencap->sport = nla_get_u16(data[IFLA_IPTUN_ENCAP_SPORT]);
+ ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
}
if (data[IFLA_IPTUN_ENCAP_DPORT]) {
ret = true;
- ipencap->dport = nla_get_u16(data[IFLA_IPTUN_ENCAP_DPORT]);
+ ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
}
return ret;
@@ -1707,9 +1707,9 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
tunnel->encap.type) ||
- nla_put_u16(skb, IFLA_IPTUN_ENCAP_SPORT,
+ nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
tunnel->encap.sport) ||
- nla_put_u16(skb, IFLA_IPTUN_ENCAP_DPORT,
+ nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
tunnel->encap.dport) ||
nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
tunnel->encap.flags))
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index b6aa8ed18257..a56276996b72 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -52,6 +52,10 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
+ /* Set the IPv6 fragment id if not set yet */
+ if (!skb_shinfo(skb)->ip6_frag_id)
+ ipv6_proxy_select_ident(skb);
+
segs = NULL;
goto out;
}
@@ -108,7 +112,11 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
fptr->nexthdr = nexthdr;
fptr->reserved = 0;
- fptr->identification = skb_shinfo(skb)->ip6_frag_id;
+ if (skb_shinfo(skb)->ip6_frag_id)
+ fptr->identification = skb_shinfo(skb)->ip6_frag_id;
+ else
+ ipv6_select_ident(fptr,
+ (struct rt6_info *)skb_dst(skb));
/* Fragment the skb. ipv6 header and the remaining fields of the
* fragment header are updated in ipv6_gso_segment()
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 990decba1fe4..b87ca32efa0b 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -659,16 +659,24 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
return err;
}
-static int ip_vs_route_me_harder(int af, struct sk_buff *skb)
+static int ip_vs_route_me_harder(int af, struct sk_buff *skb,
+ unsigned int hooknum)
{
+ if (!sysctl_snat_reroute(skb))
+ return 0;
+ /* Reroute replies only to remote clients (FORWARD and LOCAL_OUT) */
+ if (NF_INET_LOCAL_IN == hooknum)
+ return 0;
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
- if (sysctl_snat_reroute(skb) && ip6_route_me_harder(skb) != 0)
+ struct dst_entry *dst = skb_dst(skb);
+
+ if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) &&
+ ip6_route_me_harder(skb) != 0)
return 1;
} else
#endif
- if ((sysctl_snat_reroute(skb) ||
- skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
+ if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
ip_route_me_harder(skb, RTN_LOCAL) != 0)
return 1;
@@ -791,7 +799,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
union nf_inet_addr *snet,
__u8 protocol, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp,
- unsigned int offset, unsigned int ihl)
+ unsigned int offset, unsigned int ihl,
+ unsigned int hooknum)
{
unsigned int verdict = NF_DROP;
@@ -821,7 +830,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
#endif
ip_vs_nat_icmp(skb, pp, cp, 1);
- if (ip_vs_route_me_harder(af, skb))
+ if (ip_vs_route_me_harder(af, skb, hooknum))
goto out;
/* do the statistics and put it back */
@@ -916,7 +925,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
snet.ip = iph->saddr;
return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp,
- pp, ciph.len, ihl);
+ pp, ciph.len, ihl, hooknum);
}
#ifdef CONFIG_IP_VS_IPV6
@@ -981,7 +990,8 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
snet.in6 = ciph.saddr.in6;
writable = ciph.len;
return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp,
- pp, writable, sizeof(struct ipv6hdr));
+ pp, writable, sizeof(struct ipv6hdr),
+ hooknum);
}
#endif
@@ -1040,7 +1050,8 @@ static inline bool is_new_conn(const struct sk_buff *skb,
*/
static unsigned int
handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
- struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
+ struct ip_vs_conn *cp, struct ip_vs_iphdr *iph,
+ unsigned int hooknum)
{
struct ip_vs_protocol *pp = pd->pp;
@@ -1078,7 +1089,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* if it came from this machine itself. So re-compute
* the routing information.
*/
- if (ip_vs_route_me_harder(af, skb))
+ if (ip_vs_route_me_harder(af, skb, hooknum))
goto drop;
IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
@@ -1181,7 +1192,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
cp = pp->conn_out_get(af, skb, &iph, 0);
if (likely(cp))
- return handle_response(af, skb, pd, cp, &iph);
+ return handle_response(af, skb, pd, cp, &iph, hooknum);
if (sysctl_nat_icmp_send(net) &&
(pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_UDP ||
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3b3ddb4fb9ee..1ff04bcd4871 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1134,9 +1134,11 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
/* Restore old counters on this cpu, no problem. Per-cpu statistics
* are not exposed to userspace.
*/
+ preempt_disable();
stats = this_cpu_ptr(newstats);
stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+ preempt_enable();
return newstats;
}
@@ -1262,8 +1264,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN,
sizeof(struct nft_trans_chain));
- if (trans == NULL)
+ if (trans == NULL) {
+ free_percpu(stats);
return -ENOMEM;
+ }
nft_trans_chain_stats(trans) = stats;
nft_trans_chain_update(trans) = true;
@@ -1319,8 +1323,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
hookfn = type->hooks[hooknum];
basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
- if (basechain == NULL)
+ if (basechain == NULL) {
+ module_put(type->owner);
return -ENOMEM;
+ }
if (nla[NFTA_CHAIN_COUNTERS]) {
stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
@@ -3753,6 +3759,24 @@ int nft_chain_validate_dependency(const struct nft_chain *chain,
}
EXPORT_SYMBOL_GPL(nft_chain_validate_dependency);
+int nft_chain_validate_hooks(const struct nft_chain *chain,
+ unsigned int hook_flags)
+{
+ struct nft_base_chain *basechain;
+
+ if (chain->flags & NFT_BASE_CHAIN) {
+ basechain = nft_base_chain(chain);
+
+ if ((1 << basechain->ops[0].hooknum) & hook_flags)
+ return 0;
+
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_chain_validate_hooks);
+
/*
* Loop detection - walk through the ruleset beginning at the destination chain
* of a new jump until either the source chain is reached (loop) or all
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index d1ffd5eb3a9b..9aea747b43ea 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -21,6 +21,21 @@ const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
};
EXPORT_SYMBOL_GPL(nft_masq_policy);
+int nft_masq_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ int err;
+
+ err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
+ if (err < 0)
+ return err;
+
+ return nft_chain_validate_hooks(ctx->chain,
+ (1 << NF_INET_POST_ROUTING));
+}
+EXPORT_SYMBOL_GPL(nft_masq_validate);
+
int nft_masq_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
@@ -28,8 +43,8 @@ int nft_masq_init(const struct nft_ctx *ctx,
struct nft_masq *priv = nft_expr_priv(expr);
int err;
- err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
- if (err < 0)
+ err = nft_masq_validate(ctx, expr, NULL);
+ if (err)
return err;
if (tb[NFTA_MASQ_FLAGS] == NULL)
@@ -60,12 +75,5 @@ nla_put_failure:
}
EXPORT_SYMBOL_GPL(nft_masq_dump);
-int nft_masq_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
- const struct nft_data **data)
-{
- return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
-}
-EXPORT_SYMBOL_GPL(nft_masq_validate);
-
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index aff54fb1c8a0..a0837c6c9283 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -88,17 +88,40 @@ static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = {
[NFTA_NAT_FLAGS] = { .type = NLA_U32 },
};
-static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
- const struct nlattr * const tb[])
+static int nft_nat_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
{
struct nft_nat *priv = nft_expr_priv(expr);
- u32 family;
int err;
err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
if (err < 0)
return err;
+ switch (priv->type) {
+ case NFT_NAT_SNAT:
+ err = nft_chain_validate_hooks(ctx->chain,
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_IN));
+ break;
+ case NFT_NAT_DNAT:
+ err = nft_chain_validate_hooks(ctx->chain,
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT));
+ break;
+ }
+
+ return err;
+}
+
+static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_nat *priv = nft_expr_priv(expr);
+ u32 family;
+ int err;
+
if (tb[NFTA_NAT_TYPE] == NULL ||
(tb[NFTA_NAT_REG_ADDR_MIN] == NULL &&
tb[NFTA_NAT_REG_PROTO_MIN] == NULL))
@@ -115,6 +138,10 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return -EINVAL;
}
+ err = nft_nat_validate(ctx, expr, NULL);
+ if (err < 0)
+ return err;
+
if (tb[NFTA_NAT_FAMILY] == NULL)
return -EINVAL;
@@ -219,13 +246,6 @@ nla_put_failure:
return -1;
}
-static int nft_nat_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
-{
- return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
-}
-
static struct nft_expr_type nft_nat_type;
static const struct nft_expr_ops nft_nat_ops = {
.type = &nft_nat_type,
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 9e8093f28311..d7e9e93a4e90 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -23,6 +23,22 @@ const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = {
};
EXPORT_SYMBOL_GPL(nft_redir_policy);
+int nft_redir_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ int err;
+
+ err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
+ if (err < 0)
+ return err;
+
+ return nft_chain_validate_hooks(ctx->chain,
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT));
+}
+EXPORT_SYMBOL_GPL(nft_redir_validate);
+
int nft_redir_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
@@ -30,7 +46,7 @@ int nft_redir_init(const struct nft_ctx *ctx,
struct nft_redir *priv = nft_expr_priv(expr);
int err;
- err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
+ err = nft_redir_validate(ctx, expr, NULL);
if (err < 0)
return err;
@@ -88,12 +104,5 @@ nla_put_failure:
}
EXPORT_SYMBOL_GPL(nft_redir_dump);
-int nft_redir_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
- const struct nft_data **data)
-{
- return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
-}
-EXPORT_SYMBOL_GPL(nft_redir_validate);
-
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 02fdde28dada..75532efa51cd 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1438,7 +1438,7 @@ static void netlink_undo_bind(int group, long unsigned int groups,
for (undo = 0; undo < group; undo++)
if (test_bit(undo, &groups))
- nlk->netlink_unbind(sock_net(sk), undo);
+ nlk->netlink_unbind(sock_net(sk), undo + 1);
}
static int netlink_bind(struct socket *sock, struct sockaddr *addr,
@@ -1476,7 +1476,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
for (group = 0; group < nlk->ngroups; group++) {
if (!test_bit(group, &groups))
continue;
- err = nlk->netlink_bind(net, group);
+ err = nlk->netlink_bind(net, group + 1);
if (!err)
continue;
netlink_undo_bind(group, groups, sk);
diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c
index c3b0cd43eb56..c173f69e1479 100644
--- a/net/rds/sysctl.c
+++ b/net/rds/sysctl.c
@@ -71,14 +71,14 @@ static struct ctl_table rds_sysctl_rds_table[] = {
{
.procname = "max_unacked_packets",
.data = &rds_sysctl_max_unacked_packets,
- .maxlen = sizeof(unsigned long),
+ .maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "max_unacked_bytes",
.data = &rds_sysctl_max_unacked_bytes,
- .maxlen = sizeof(unsigned long),
+ .maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index aad6a679fb13..baef987fe2c0 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -556,8 +556,9 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
}
EXPORT_SYMBOL(tcf_exts_change);
-#define tcf_exts_first_act(ext) \
- list_first_entry(&(exts)->actions, struct tc_action, list)
+#define tcf_exts_first_act(ext) \
+ list_first_entry_or_null(&(exts)->actions, \
+ struct tc_action, list)
int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
{
@@ -603,7 +604,7 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
{
#ifdef CONFIG_NET_CLS_ACT
struct tc_action *a = tcf_exts_first_act(exts);
- if (tcf_action_copy_stats(skb, a, 1) < 0)
+ if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
return -1;
#endif
return 0;
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 9b05924cc386..333cd94ba381 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -670,8 +670,14 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_FQ_FLOW_PLIMIT])
q->flow_plimit = nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]);
- if (tb[TCA_FQ_QUANTUM])
- q->quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
+ if (tb[TCA_FQ_QUANTUM]) {
+ u32 quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
+
+ if (quantum > 0)
+ q->quantum = quantum;
+ else
+ err = -EINVAL;
+ }
if (tb[TCA_FQ_INITIAL_QUANTUM])
q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index e49e231cef52..06320c8c1c86 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2608,7 +2608,7 @@ do_addr_param:
addr_param = param.v + sizeof(sctp_addip_param_t);
- af = sctp_get_af_specific(param_type2af(param.p->type));
+ af = sctp_get_af_specific(param_type2af(addr_param->p.type));
if (af == NULL)
break;