summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_dev.c3
-rw-r--r--net/atm/clip.c4
-rw-r--r--net/atm/common.c22
-rw-r--r--net/bridge/br_netfilter_hooks.c3
-rw-r--r--net/bridge/netfilter/ebt_log.c34
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c6
-rw-r--r--net/core/drop_monitor.c5
-rw-r--r--net/core/ethtool.c1
-rw-r--r--net/core/fib_rules.c14
-rw-r--r--net/core/filter.c17
-rw-r--r--net/core/flow_dissector.c426
-rw-r--r--net/core/lwtunnel.c2
-rw-r--r--net/core/neighbour.c32
-rw-r--r--net/core/rtnetlink.c1
-rw-r--r--net/core/secure_seq.c13
-rw-r--r--net/core/sock.c121
-rw-r--r--net/core/sock_diag.c10
-rw-r--r--net/decnet/af_decnet.c13
-rw-r--r--net/dsa/slave.c12
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/arp.c6
-rw-r--r--net/ipv4/devinet.c32
-rw-r--r--net/ipv4/fib_notifier.c86
-rw-r--r--net/ipv4/fib_rules.c55
-rw-r--r--net/ipv4/fib_semantics.c11
-rw-r--r--net/ipv4/fib_trie.c108
-rw-r--r--net/ipv4/icmp.c19
-rw-r--r--net/ipv4/netfilter/arp_tables.c2
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c19
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c15
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c3
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c4
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/route.c92
-rw-r--r--net/ipv4/sysctl_net_ipv4.c16
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_input.c36
-rw-r--r--net/ipv4/tcp_ipv4.c41
-rw-r--r--net/ipv4/tcp_metrics.c147
-rw-r--r--net/ipv4/tcp_minisocks.c22
-rw-r--r--net/ipv4/tcp_westwood.c4
-rw-r--r--net/ipv6/addrconf.c124
-rw-r--r--net/ipv6/ndisc.c4
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c2
-rw-r--r--net/ipv6/tcp_ipv6.c32
-rw-r--r--net/ipv6/udp.c59
-rw-r--r--net/mpls/af_mpls.c98
-rw-r--r--net/mpls/internal.h7
-rw-r--r--net/mpls/mpls_iptunnel.c73
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c24
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_rr.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_wrr.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c57
-rw-r--r--net/netfilter/nf_conntrack_expect.c10
-rw-r--r--net/netfilter/nf_conntrack_netlink.c4
-rw-r--r--net/netfilter/nf_tables_api.c49
-rw-r--r--net/netfilter/nfnetlink_acct.c15
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c12
-rw-r--r--net/netfilter/nfnetlink_log.c14
-rw-r--r--net/netfilter/nft_compat.c8
-rw-r--r--net/netfilter/nft_counter.c3
-rw-r--r--net/netfilter/nft_ct.c171
-rw-r--r--net/netfilter/nft_dynset.c14
-rw-r--r--net/netfilter/nft_exthdr.c13
-rw-r--r--net/netfilter/nft_fib.c16
-rw-r--r--net/netfilter/nft_hash.c133
-rw-r--r--net/netfilter/nft_limit.c10
-rw-r--r--net/netfilter/nft_lookup.c14
-rw-r--r--net/netfilter/nft_masq.c4
-rw-r--r--net/netfilter/nft_meta.c4
-rw-r--r--net/netfilter/nft_nat.c4
-rw-r--r--net/netfilter/nft_objref.c14
-rw-r--r--net/netfilter/nft_quota.c3
-rw-r--r--net/netfilter/nft_redir.c4
-rw-r--r--net/netfilter/nft_reject.c5
-rw-r--r--net/netfilter/nft_reject_inet.c6
-rw-r--r--net/netfilter/nft_set_rbtree.c31
-rw-r--r--net/netfilter/xt_limit.c11
-rw-r--r--net/openvswitch/actions.c271
-rw-r--r--net/openvswitch/datapath.h2
-rw-r--r--net/openvswitch/flow_netlink.c141
-rw-r--r--net/rds/ib_cm.c5
-rw-r--r--net/rds/ib_fmr.c38
-rw-r--r--net/rds/ib_mr.h2
-rw-r--r--net/sched/act_ife.c4
-rw-r--r--net/sched/sch_api.c42
-rw-r--r--net/sched/sch_cbq.c5
-rw-r--r--net/sched/sch_drr.c2
-rw-r--r--net/sched/sch_dsmark.c2
-rw-r--r--net/sched/sch_fq_codel.c2
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--net/sched/sch_hfsc.c4
-rw-r--r--net/sched/sch_htb.c2
-rw-r--r--net/sched/sch_mq.c2
-rw-r--r--net/sched/sch_mqprio.c41
-rw-r--r--net/sched/sch_multiq.c2
-rw-r--r--net/sched/sch_netem.c26
-rw-r--r--net/sched/sch_prio.c5
-rw-r--r--net/sched/sch_qfq.c2
-rw-r--r--net/sched/sch_red.c2
-rw-r--r--net/sched/sch_sfb.c2
-rw-r--r--net/sched/sch_tbf.c2
-rw-r--r--net/sctp/sm_statefuns.c15
-rw-r--r--net/sctp/socket.c81
-rw-r--r--net/sctp/stream.c396
-rw-r--r--net/sctp/sysctl.c7
-rw-r--r--net/sctp/ulpevent.c56
116 files changed, 2484 insertions, 1233 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index e97ab824e368..9ee5787634e5 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -562,8 +562,7 @@ static int vlan_dev_init(struct net_device *dev)
NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC |
NETIF_F_ALL_FCOE;
- dev->features |= real_dev->vlan_features | NETIF_F_LLTX |
- NETIF_F_GSO_SOFTWARE;
+ dev->features |= dev->hw_features | NETIF_F_LLTX;
dev->gso_max_size = real_dev->gso_max_size;
dev->gso_max_segs = real_dev->gso_max_segs;
if (dev->features & NETIF_F_VLAN_FEATURES)
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 53b4ac09e7b7..ec527b62f79d 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -106,7 +106,7 @@ static void unlink_clip_vcc(struct clip_vcc *clip_vcc)
entry->expires = jiffies - 1;
/* force resolution or expiration */
error = neigh_update(entry->neigh, NULL, NUD_NONE,
- NEIGH_UPDATE_F_ADMIN);
+ NEIGH_UPDATE_F_ADMIN, 0);
if (error)
pr_crit("neigh_update failed with %d\n", error);
goto out;
@@ -481,7 +481,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
link_vcc(clip_vcc, entry);
}
error = neigh_update(neigh, llc_oui, NUD_PERMANENT,
- NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN);
+ NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN, 0);
neigh_release(neigh);
return error;
}
diff --git a/net/atm/common.c b/net/atm/common.c
index 9613381f5db0..f06422f4108d 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -62,21 +62,16 @@ static void vcc_remove_socket(struct sock *sk)
write_unlock_irq(&vcc_sklist_lock);
}
-static struct sk_buff *alloc_tx(struct atm_vcc *vcc, unsigned int size)
+static bool vcc_tx_ready(struct atm_vcc *vcc, unsigned int size)
{
- struct sk_buff *skb;
struct sock *sk = sk_atm(vcc);
if (sk_wmem_alloc_get(sk) && !atm_may_send(vcc, size)) {
pr_debug("Sorry: wmem_alloc = %d, size = %d, sndbuf = %d\n",
sk_wmem_alloc_get(sk), size, sk->sk_sndbuf);
- return NULL;
+ return false;
}
- while (!(skb = alloc_skb(size, GFP_KERNEL)))
- schedule();
- pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize);
- atomic_add(skb->truesize, &sk->sk_wmem_alloc);
- return skb;
+ return true;
}
static void vcc_sock_destruct(struct sock *sk)
@@ -606,7 +601,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
eff = (size+3) & ~3; /* align to word boundary */
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
error = 0;
- while (!(skb = alloc_tx(vcc, eff))) {
+ while (!vcc_tx_ready(vcc, eff)) {
if (m->msg_flags & MSG_DONTWAIT) {
error = -EAGAIN;
break;
@@ -628,6 +623,15 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
finish_wait(sk_sleep(sk), &wait);
if (error)
goto out;
+
+ skb = alloc_skb(eff, GFP_KERNEL);
+ if (!skb) {
+ error = -ENOMEM;
+ goto out;
+ }
+ pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize);
+ atomic_add(skb->truesize, &sk->sk_wmem_alloc);
+
skb->dev = NULL; /* for paths shared with net_device interfaces */
ATM_SKB(skb)->atm_options = vcc->atm_options;
if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) {
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 1f1e62095464..067cf0313449 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -997,13 +997,10 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
if (!elem)
return okfn(net, sk, skb);
- /* We may already have this, but read-locks nest anyway */
- rcu_read_lock();
nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
sk, net, okfn);
ret = nf_hook_slow(skb, &state, elem);
- rcu_read_unlock();
if (ret == 1)
ret = okfn(net, sk, skb);
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 98b9c8e8615e..707caea39743 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -62,10 +62,10 @@ print_ports(const struct sk_buff *skb, uint8_t protocol, int offset)
pptr = skb_header_pointer(skb, offset,
sizeof(_ports), &_ports);
if (pptr == NULL) {
- printk(" INCOMPLETE TCP/UDP header");
+ pr_cont(" INCOMPLETE TCP/UDP header");
return;
}
- printk(" SPT=%u DPT=%u", ntohs(pptr->src), ntohs(pptr->dst));
+ pr_cont(" SPT=%u DPT=%u", ntohs(pptr->src), ntohs(pptr->dst));
}
}
@@ -100,11 +100,11 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
if (ih == NULL) {
- printk(" INCOMPLETE IP header");
+ pr_cont(" INCOMPLETE IP header");
goto out;
}
- printk(" IP SRC=%pI4 IP DST=%pI4, IP tos=0x%02X, IP proto=%d",
- &ih->saddr, &ih->daddr, ih->tos, ih->protocol);
+ pr_cont(" IP SRC=%pI4 IP DST=%pI4, IP tos=0x%02X, IP proto=%d",
+ &ih->saddr, &ih->daddr, ih->tos, ih->protocol);
print_ports(skb, ih->protocol, ih->ihl*4);
goto out;
}
@@ -120,11 +120,11 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
if (ih == NULL) {
- printk(" INCOMPLETE IPv6 header");
+ pr_cont(" INCOMPLETE IPv6 header");
goto out;
}
- printk(" IPv6 SRC=%pI6 IPv6 DST=%pI6, IPv6 priority=0x%01X, Next Header=%d",
- &ih->saddr, &ih->daddr, ih->priority, ih->nexthdr);
+ pr_cont(" IPv6 SRC=%pI6 IPv6 DST=%pI6, IPv6 priority=0x%01X, Next Header=%d",
+ &ih->saddr, &ih->daddr, ih->priority, ih->nexthdr);
nexthdr = ih->nexthdr;
offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr, &frag_off);
if (offset_ph == -1)
@@ -142,12 +142,12 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
if (ah == NULL) {
- printk(" INCOMPLETE ARP header");
+ pr_cont(" INCOMPLETE ARP header");
goto out;
}
- printk(" ARP HTYPE=%d, PTYPE=0x%04x, OPCODE=%d",
- ntohs(ah->ar_hrd), ntohs(ah->ar_pro),
- ntohs(ah->ar_op));
+ pr_cont(" ARP HTYPE=%d, PTYPE=0x%04x, OPCODE=%d",
+ ntohs(ah->ar_hrd), ntohs(ah->ar_pro),
+ ntohs(ah->ar_op));
/* If it's for Ethernet and the lengths are OK,
* then log the ARP payload
@@ -161,17 +161,17 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
ap = skb_header_pointer(skb, sizeof(_arph),
sizeof(_arpp), &_arpp);
if (ap == NULL) {
- printk(" INCOMPLETE ARP payload");
+ pr_cont(" INCOMPLETE ARP payload");
goto out;
}
- printk(" ARP MAC SRC=%pM ARP IP SRC=%pI4 ARP MAC DST=%pM ARP IP DST=%pI4",
- ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst);
+ pr_cont(" ARP MAC SRC=%pM ARP IP SRC=%pI4 ARP MAC DST=%pM ARP IP DST=%pI4",
+ ap->mac_src, ap->ip_src,
+ ap->mac_dst, ap->ip_dst);
}
}
out:
- printk("\n");
+ pr_cont("\n");
spin_unlock_bh(&ebt_log_lock);
-
}
static unsigned int
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 206dc266ecd2..346ef6b00b8f 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -375,11 +375,7 @@ static int nft_reject_bridge_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_reject *priv = nft_expr_priv(expr);
- int icmp_code, err;
-
- err = nft_reject_bridge_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
+ int icmp_code;
if (tb[NFTA_REJECT_TYPE] == NULL)
return -EINVAL;
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index fb55327dcfea..70ccda233bd1 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -412,9 +412,8 @@ static int __init init_net_drop_monitor(void)
for_each_possible_cpu(cpu) {
data = &per_cpu(dm_cpu_data, cpu);
INIT_WORK(&data->dm_alert_work, send_dm_alert);
- init_timer(&data->send_timer);
- data->send_timer.data = (unsigned long)data;
- data->send_timer.function = sched_send_work;
+ setup_timer(&data->send_timer, sched_send_work,
+ (unsigned long)data);
spin_lock_init(&data->lock);
reset_per_cpu_data(data);
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index aecb2c7241b6..905a88ad28e0 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -109,6 +109,7 @@ static const char
rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = {
[ETH_RSS_HASH_TOP_BIT] = "toeplitz",
[ETH_RSS_HASH_XOR_BIT] = "xor",
+ [ETH_RSS_HASH_CRC32_BIT] = "crc32",
};
static const char
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index b6791d94841d..816e3ccb0ec9 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -23,6 +23,20 @@ static const struct fib_kuid_range fib_kuid_range_unset = {
KUIDT_INIT(~0),
};
+bool fib_rule_matchall(const struct fib_rule *rule)
+{
+ if (rule->iifindex || rule->oifindex || rule->mark || rule->tun_id ||
+ rule->flags)
+ return false;
+ if (rule->suppress_ifgroup != -1 || rule->suppress_prefixlen != -1)
+ return false;
+ if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) ||
+ !uid_eq(rule->uid_range.end, fib_kuid_range_unset.end))
+ return false;
+ return true;
+}
+EXPORT_SYMBOL_GPL(fib_rule_matchall);
+
int fib_default_rule_add(struct fib_rules_ops *ops,
u32 pref, u32 table, u32 flags)
{
diff --git a/net/core/filter.c b/net/core/filter.c
index ebaeaf2e46e8..c7f0ccd1c0d3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -928,7 +928,7 @@ static void sk_filter_release_rcu(struct rcu_head *rcu)
*/
static void sk_filter_release(struct sk_filter *fp)
{
- if (atomic_dec_and_test(&fp->refcnt))
+ if (refcount_dec_and_test(&fp->refcnt))
call_rcu(&fp->rcu, sk_filter_release_rcu);
}
@@ -943,20 +943,27 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
/* try to charge the socket memory if there is space available
* return true on success
*/
-bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
+static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
{
u32 filter_size = bpf_prog_size(fp->prog->len);
/* same check as in sock_kmalloc() */
if (filter_size <= sysctl_optmem_max &&
atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
- atomic_inc(&fp->refcnt);
atomic_add(filter_size, &sk->sk_omem_alloc);
return true;
}
return false;
}
+bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
+{
+ bool ret = __sk_filter_charge(sk, fp);
+ if (ret)
+ refcount_inc(&fp->refcnt);
+ return ret;
+}
+
static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
{
struct sock_filter *old_prog;
@@ -1179,12 +1186,12 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
return -ENOMEM;
fp->prog = prog;
- atomic_set(&fp->refcnt, 0);
- if (!sk_filter_charge(sk, fp)) {
+ if (!__sk_filter_charge(sk, fp)) {
kfree(fp);
return -ENOMEM;
}
+ refcount_set(&fp->refcnt, 1);
old_fp = rcu_dereference_protected(sk->sk_filter,
lockdep_sock_is_held(sk));
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index c35aae13c8d2..5f3ae922fcd1 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -113,6 +113,216 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
}
EXPORT_SYMBOL(__skb_flow_get_ports);
+enum flow_dissect_ret {
+ FLOW_DISSECT_RET_OUT_GOOD,
+ FLOW_DISSECT_RET_OUT_BAD,
+ FLOW_DISSECT_RET_OUT_PROTO_AGAIN,
+};
+
+static enum flow_dissect_ret
+__skb_flow_dissect_mpls(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container, void *data, int nhoff, int hlen)
+{
+ struct flow_dissector_key_keyid *key_keyid;
+ struct mpls_label *hdr, _hdr[2];
+
+ if (!dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_MPLS_ENTROPY))
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
+ hlen, &_hdr);
+ if (!hdr)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ if ((ntohl(hdr[0].entry) & MPLS_LS_LABEL_MASK) >>
+ MPLS_LS_LABEL_SHIFT == MPLS_LABEL_ENTROPY) {
+ key_keyid = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
+ target_container);
+ key_keyid->keyid = hdr[1].entry & htonl(MPLS_LS_LABEL_MASK);
+ }
+ return FLOW_DISSECT_RET_OUT_GOOD;
+}
+
+static enum flow_dissect_ret
+__skb_flow_dissect_arp(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container, void *data, int nhoff, int hlen)
+{
+ struct flow_dissector_key_arp *key_arp;
+ struct {
+ unsigned char ar_sha[ETH_ALEN];
+ unsigned char ar_sip[4];
+ unsigned char ar_tha[ETH_ALEN];
+ unsigned char ar_tip[4];
+ } *arp_eth, _arp_eth;
+ const struct arphdr *arp;
+ struct arphdr *_arp;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ARP))
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data,
+ hlen, &_arp);
+ if (!arp)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
+ arp->ar_pro != htons(ETH_P_IP) ||
+ arp->ar_hln != ETH_ALEN ||
+ arp->ar_pln != 4 ||
+ (arp->ar_op != htons(ARPOP_REPLY) &&
+ arp->ar_op != htons(ARPOP_REQUEST)))
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp),
+ sizeof(_arp_eth), data,
+ hlen, &_arp_eth);
+ if (!arp_eth)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ key_arp = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ARP,
+ target_container);
+
+ memcpy(&key_arp->sip, arp_eth->ar_sip, sizeof(key_arp->sip));
+ memcpy(&key_arp->tip, arp_eth->ar_tip, sizeof(key_arp->tip));
+
+ /* Only store the lower byte of the opcode;
+ * this covers ARPOP_REPLY and ARPOP_REQUEST.
+ */
+ key_arp->op = ntohs(arp->ar_op) & 0xff;
+
+ ether_addr_copy(key_arp->sha, arp_eth->ar_sha);
+ ether_addr_copy(key_arp->tha, arp_eth->ar_tha);
+
+ return FLOW_DISSECT_RET_OUT_GOOD;
+}
+
+static enum flow_dissect_ret
+__skb_flow_dissect_gre(const struct sk_buff *skb,
+ struct flow_dissector_key_control *key_control,
+ struct flow_dissector *flow_dissector,
+ void *target_container, void *data,
+ __be16 *p_proto, int *p_nhoff, int *p_hlen,
+ unsigned int flags)
+{
+ struct flow_dissector_key_keyid *key_keyid;
+ struct gre_base_hdr *hdr, _hdr;
+ int offset = 0;
+ u16 gre_ver;
+
+ hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr),
+ data, *p_hlen, &_hdr);
+ if (!hdr)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ /* Only look inside GRE without routing */
+ if (hdr->flags & GRE_ROUTING)
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ /* Only look inside GRE for version 0 and 1 */
+ gre_ver = ntohs(hdr->flags & GRE_VERSION);
+ if (gre_ver > 1)
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ *p_proto = hdr->protocol;
+ if (gre_ver) {
+ /* Version1 must be PPTP, and check the flags */
+ if (!(*p_proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY)))
+ return FLOW_DISSECT_RET_OUT_GOOD;
+ }
+
+ offset += sizeof(struct gre_base_hdr);
+
+ if (hdr->flags & GRE_CSUM)
+ offset += sizeof(((struct gre_full_hdr *) 0)->csum) +
+ sizeof(((struct gre_full_hdr *) 0)->reserved1);
+
+ if (hdr->flags & GRE_KEY) {
+ const __be32 *keyid;
+ __be32 _keyid;
+
+ keyid = __skb_header_pointer(skb, *p_nhoff + offset,
+ sizeof(_keyid),
+ data, *p_hlen, &_keyid);
+ if (!keyid)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_GRE_KEYID)) {
+ key_keyid = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_GRE_KEYID,
+ target_container);
+ if (gre_ver == 0)
+ key_keyid->keyid = *keyid;
+ else
+ key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
+ }
+ offset += sizeof(((struct gre_full_hdr *) 0)->key);
+ }
+
+ if (hdr->flags & GRE_SEQ)
+ offset += sizeof(((struct pptp_gre_header *) 0)->seq);
+
+ if (gre_ver == 0) {
+ if (*p_proto == htons(ETH_P_TEB)) {
+ const struct ethhdr *eth;
+ struct ethhdr _eth;
+
+ eth = __skb_header_pointer(skb, *p_nhoff + offset,
+ sizeof(_eth),
+ data, *p_hlen, &_eth);
+ if (!eth)
+ return FLOW_DISSECT_RET_OUT_BAD;
+ *p_proto = eth->h_proto;
+ offset += sizeof(*eth);
+
+ /* Cap headers that we access via pointers at the
+ * end of the Ethernet header as our maximum alignment
+ * at that point is only 2 bytes.
+ */
+ if (NET_IP_ALIGN)
+ *p_hlen = *p_nhoff + offset;
+ }
+ } else { /* version 1, must be PPTP */
+ u8 _ppp_hdr[PPP_HDRLEN];
+ u8 *ppp_hdr;
+
+ if (hdr->flags & GRE_ACK)
+ offset += sizeof(((struct pptp_gre_header *) 0)->ack);
+
+ ppp_hdr = __skb_header_pointer(skb, *p_nhoff + offset,
+ sizeof(_ppp_hdr),
+ data, *p_hlen, _ppp_hdr);
+ if (!ppp_hdr)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ switch (PPP_PROTOCOL(ppp_hdr)) {
+ case PPP_IP:
+ *p_proto = htons(ETH_P_IP);
+ break;
+ case PPP_IPV6:
+ *p_proto = htons(ETH_P_IPV6);
+ break;
+ default:
+ /* Could probably catch some more like MPLS */
+ break;
+ }
+
+ offset += PPP_HDRLEN;
+ }
+
+ *p_nhoff += offset;
+ key_control->flags |= FLOW_DIS_ENCAPSULATION;
+ if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ return FLOW_DISSECT_RET_OUT_PROTO_AGAIN;
+}
+
/**
* __skb_flow_dissect - extract the flow_keys struct and return it
* @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
@@ -138,12 +348,10 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector_key_control *key_control;
struct flow_dissector_key_basic *key_basic;
struct flow_dissector_key_addrs *key_addrs;
- struct flow_dissector_key_arp *key_arp;
struct flow_dissector_key_ports *key_ports;
struct flow_dissector_key_icmp *key_icmp;
struct flow_dissector_key_tags *key_tags;
struct flow_dissector_key_vlan *key_vlan;
- struct flow_dissector_key_keyid *key_keyid;
bool skip_vlan = false;
u8 ip_proto = 0;
bool ret;
@@ -181,7 +389,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
memcpy(key_eth_addrs, &eth->h_dest, sizeof(*key_eth_addrs));
}
-again:
+proto_again:
switch (proto) {
case htons(ETH_P_IP): {
const struct iphdr *iph;
@@ -284,7 +492,7 @@ ipv6:
proto = vlan->h_vlan_encapsulated_proto;
nhoff += sizeof(*vlan);
if (skip_vlan)
- goto again;
+ goto proto_again;
}
skip_vlan = true;
@@ -307,7 +515,7 @@ ipv6:
}
}
- goto again;
+ goto proto_again;
}
case htons(ETH_P_PPP_SES): {
struct {
@@ -349,31 +557,17 @@ ipv6:
}
case htons(ETH_P_MPLS_UC):
- case htons(ETH_P_MPLS_MC): {
- struct mpls_label *hdr, _hdr[2];
+ case htons(ETH_P_MPLS_MC):
mpls:
- hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
- hlen, &_hdr);
- if (!hdr)
- goto out_bad;
-
- if ((ntohl(hdr[0].entry) & MPLS_LS_LABEL_MASK) >>
- MPLS_LS_LABEL_SHIFT == MPLS_LABEL_ENTROPY) {
- if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) {
- key_keyid = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
- target_container);
- key_keyid->keyid = hdr[1].entry &
- htonl(MPLS_LS_LABEL_MASK);
- }
-
+ switch (__skb_flow_dissect_mpls(skb, flow_dissector,
+ target_container, data,
+ nhoff, hlen)) {
+ case FLOW_DISSECT_RET_OUT_GOOD:
goto out_good;
+ case FLOW_DISSECT_RET_OUT_BAD:
+ default:
+ goto out_bad;
}
-
- goto out_good;
- }
-
case htons(ETH_P_FCOE):
if ((hlen - nhoff) < FCOE_HEADER_LEN)
goto out_bad;
@@ -382,177 +576,33 @@ mpls:
goto out_good;
case htons(ETH_P_ARP):
- case htons(ETH_P_RARP): {
- struct {
- unsigned char ar_sha[ETH_ALEN];
- unsigned char ar_sip[4];
- unsigned char ar_tha[ETH_ALEN];
- unsigned char ar_tip[4];
- } *arp_eth, _arp_eth;
- const struct arphdr *arp;
- struct arphdr *_arp;
-
- arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data,
- hlen, &_arp);
- if (!arp)
- goto out_bad;
-
- if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
- arp->ar_pro != htons(ETH_P_IP) ||
- arp->ar_hln != ETH_ALEN ||
- arp->ar_pln != 4 ||
- (arp->ar_op != htons(ARPOP_REPLY) &&
- arp->ar_op != htons(ARPOP_REQUEST)))
- goto out_bad;
-
- arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp),
- sizeof(_arp_eth), data,
- hlen,
- &_arp_eth);
- if (!arp_eth)
+ case htons(ETH_P_RARP):
+ switch (__skb_flow_dissect_arp(skb, flow_dissector,
+ target_container, data,
+ nhoff, hlen)) {
+ case FLOW_DISSECT_RET_OUT_GOOD:
+ goto out_good;
+ case FLOW_DISSECT_RET_OUT_BAD:
+ default:
goto out_bad;
-
- if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_ARP)) {
-
- key_arp = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_ARP,
- target_container);
-
- memcpy(&key_arp->sip, arp_eth->ar_sip,
- sizeof(key_arp->sip));
- memcpy(&key_arp->tip, arp_eth->ar_tip,
- sizeof(key_arp->tip));
-
- /* Only store the lower byte of the opcode;
- * this covers ARPOP_REPLY and ARPOP_REQUEST.
- */
- key_arp->op = ntohs(arp->ar_op) & 0xff;
-
- ether_addr_copy(key_arp->sha, arp_eth->ar_sha);
- ether_addr_copy(key_arp->tha, arp_eth->ar_tha);
}
-
- goto out_good;
- }
-
default:
goto out_bad;
}
ip_proto_again:
switch (ip_proto) {
- case IPPROTO_GRE: {
- struct gre_base_hdr *hdr, _hdr;
- u16 gre_ver;
- int offset = 0;
-
- hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
- if (!hdr)
+ case IPPROTO_GRE:
+ switch (__skb_flow_dissect_gre(skb, key_control, flow_dissector,
+ target_container, data,
+ &proto, &nhoff, &hlen, flags)) {
+ case FLOW_DISSECT_RET_OUT_GOOD:
+ goto out_good;
+ case FLOW_DISSECT_RET_OUT_BAD:
goto out_bad;
-
- /* Only look inside GRE without routing */
- if (hdr->flags & GRE_ROUTING)
- break;
-
- /* Only look inside GRE for version 0 and 1 */
- gre_ver = ntohs(hdr->flags & GRE_VERSION);
- if (gre_ver > 1)
- break;
-
- proto = hdr->protocol;
- if (gre_ver) {
- /* Version1 must be PPTP, and check the flags */
- if (!(proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY)))
- break;
- }
-
- offset += sizeof(struct gre_base_hdr);
-
- if (hdr->flags & GRE_CSUM)
- offset += sizeof(((struct gre_full_hdr *)0)->csum) +
- sizeof(((struct gre_full_hdr *)0)->reserved1);
-
- if (hdr->flags & GRE_KEY) {
- const __be32 *keyid;
- __be32 _keyid;
-
- keyid = __skb_header_pointer(skb, nhoff + offset, sizeof(_keyid),
- data, hlen, &_keyid);
- if (!keyid)
- goto out_bad;
-
- if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_GRE_KEYID)) {
- key_keyid = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_GRE_KEYID,
- target_container);
- if (gre_ver == 0)
- key_keyid->keyid = *keyid;
- else
- key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
- }
- offset += sizeof(((struct gre_full_hdr *)0)->key);
+ case FLOW_DISSECT_RET_OUT_PROTO_AGAIN:
+ goto proto_again;
}
-
- if (hdr->flags & GRE_SEQ)
- offset += sizeof(((struct pptp_gre_header *)0)->seq);
-
- if (gre_ver == 0) {
- if (proto == htons(ETH_P_TEB)) {
- const struct ethhdr *eth;
- struct ethhdr _eth;
-
- eth = __skb_header_pointer(skb, nhoff + offset,
- sizeof(_eth),
- data, hlen, &_eth);
- if (!eth)
- goto out_bad;
- proto = eth->h_proto;
- offset += sizeof(*eth);
-
- /* Cap headers that we access via pointers at the
- * end of the Ethernet header as our maximum alignment
- * at that point is only 2 bytes.
- */
- if (NET_IP_ALIGN)
- hlen = (nhoff + offset);
- }
- } else { /* version 1, must be PPTP */
- u8 _ppp_hdr[PPP_HDRLEN];
- u8 *ppp_hdr;
-
- if (hdr->flags & GRE_ACK)
- offset += sizeof(((struct pptp_gre_header *)0)->ack);
-
- ppp_hdr = __skb_header_pointer(skb, nhoff + offset,
- sizeof(_ppp_hdr),
- data, hlen, _ppp_hdr);
- if (!ppp_hdr)
- goto out_bad;
-
- switch (PPP_PROTOCOL(ppp_hdr)) {
- case PPP_IP:
- proto = htons(ETH_P_IP);
- break;
- case PPP_IPV6:
- proto = htons(ETH_P_IPV6);
- break;
- default:
- /* Could probably catch some more like MPLS */
- break;
- }
-
- offset += PPP_HDRLEN;
- }
-
- nhoff += offset;
- key_control->flags |= FLOW_DIS_ENCAPSULATION;
- if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
- goto out_good;
-
- goto again;
- }
case NEXTHDR_HOP:
case NEXTHDR_ROUTING:
case NEXTHDR_DEST: {
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 6df9f8fabf0c..b5888190223c 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -162,7 +162,6 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining)
struct rtnexthop *rtnh = (struct rtnexthop *)attr;
struct nlattr *nla_entype;
struct nlattr *attrs;
- struct nlattr *nla;
u16 encap_type;
int attrlen;
@@ -170,7 +169,6 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining)
attrlen = rtnh_attrlen(rtnh);
if (attrlen > 0) {
attrs = rtnh_attrs(rtnh);
- nla = nla_find(attrs, attrlen, RTA_ENCAP);
nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
if (nla_entype) {
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index e7c12caa20c8..7069f5e4a361 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -52,8 +52,9 @@ do { \
#define PNEIGH_HASHMASK 0xF
static void neigh_timer_handler(unsigned long arg);
-static void __neigh_notify(struct neighbour *n, int type, int flags);
-static void neigh_update_notify(struct neighbour *neigh);
+static void __neigh_notify(struct neighbour *n, int type, int flags,
+ u32 pid);
+static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
#ifdef CONFIG_PROC_FS
@@ -99,7 +100,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh)
if (neigh->parms->neigh_cleanup)
neigh->parms->neigh_cleanup(neigh);
- __neigh_notify(neigh, RTM_DELNEIGH, 0);
+ __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
neigh_release(neigh);
}
@@ -948,7 +949,7 @@ out:
}
if (notify)
- neigh_update_notify(neigh);
+ neigh_update_notify(neigh, 0);
neigh_release(neigh);
}
@@ -1072,7 +1073,7 @@ static void neigh_update_hhs(struct neighbour *neigh)
*/
int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
- u32 flags)
+ u32 flags, u32 nlmsg_pid)
{
u8 old;
int err;
@@ -1229,7 +1230,7 @@ out:
write_unlock_bh(&neigh->lock);
if (notify)
- neigh_update_notify(neigh);
+ neigh_update_notify(neigh, nlmsg_pid);
return err;
}
@@ -1260,7 +1261,7 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
lladdr || !dev->addr_len);
if (neigh)
neigh_update(neigh, lladdr, NUD_STALE,
- NEIGH_UPDATE_F_OVERRIDE);
+ NEIGH_UPDATE_F_OVERRIDE, 0);
return neigh;
}
EXPORT_SYMBOL(neigh_event_ns);
@@ -1638,7 +1639,8 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
err = neigh_update(neigh, NULL, NUD_FAILED,
NEIGH_UPDATE_F_OVERRIDE |
- NEIGH_UPDATE_F_ADMIN);
+ NEIGH_UPDATE_F_ADMIN,
+ NETLINK_CB(skb).portid);
neigh_release(neigh);
out:
@@ -1729,7 +1731,8 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
neigh_event_send(neigh, NULL);
err = 0;
} else
- err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
+ err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
+ NETLINK_CB(skb).portid);
neigh_release(neigh);
out:
@@ -2229,10 +2232,10 @@ nla_put_failure:
return -EMSGSIZE;
}
-static void neigh_update_notify(struct neighbour *neigh)
+static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
{
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
- __neigh_notify(neigh, RTM_NEWNEIGH, 0);
+ __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
}
static bool neigh_master_filtered(struct net_device *dev, int master_idx)
@@ -2830,7 +2833,8 @@ static inline size_t neigh_nlmsg_size(void)
+ nla_total_size(4); /* NDA_PROBES */
}
-static void __neigh_notify(struct neighbour *n, int type, int flags)
+static void __neigh_notify(struct neighbour *n, int type, int flags,
+ u32 pid)
{
struct net *net = dev_net(n->dev);
struct sk_buff *skb;
@@ -2840,7 +2844,7 @@ static void __neigh_notify(struct neighbour *n, int type, int flags)
if (skb == NULL)
goto errout;
- err = neigh_fill_info(skb, n, 0, 0, type, flags);
+ err = neigh_fill_info(skb, n, pid, 0, type, flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -2856,7 +2860,7 @@ errout:
void neigh_app_ns(struct neighbour *n)
{
- __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
+ __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
}
EXPORT_SYMBOL(neigh_app_ns);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index c4e84c558240..9c3947a43eff 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -4185,6 +4185,7 @@ void __init rtnetlink_init(void)
rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL);
rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, NULL);
rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL);
rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL);
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 758f140b6bed..fb87e78a2cc7 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -45,8 +45,8 @@ static u32 seq_scale(u32 seq)
#endif
#if IS_ENABLED(CONFIG_IPV6)
-u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
- __be16 sport, __be16 dport, u32 *tsoff)
+u32 secure_tcpv6_seq_and_tsoff(const __be32 *saddr, const __be32 *daddr,
+ __be16 sport, __be16 dport, u32 *tsoff)
{
const struct {
struct in6_addr saddr;
@@ -66,7 +66,7 @@ u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
*tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0;
return seq_scale(hash);
}
-EXPORT_SYMBOL(secure_tcpv6_sequence_number);
+EXPORT_SYMBOL(secure_tcpv6_seq_and_tsoff);
u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport)
@@ -89,14 +89,13 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#ifdef CONFIG_INET
-/* secure_tcp_sequence_number(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
+/* secure_tcp_seq_and_tsoff(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
* but fortunately, `sport' cannot be 0 in any circumstances. If this changes,
* it would be easy enough to have the former function use siphash_4u32, passing
* the arguments as separate u32.
*/
-
-u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
- __be16 sport, __be16 dport, u32 *tsoff)
+u32 secure_tcp_seq_and_tsoff(__be32 saddr, __be32 daddr,
+ __be16 sport, __be16 dport, u32 *tsoff)
{
u64 hash;
net_secret_init();
diff --git a/net/core/sock.c b/net/core/sock.c
index 2c4f574168fb..1b9030ee6f4b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -247,12 +247,66 @@ static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
_sock_locks("k-clock-")
};
+static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
+ "rlock-AF_UNSPEC", "rlock-AF_UNIX" , "rlock-AF_INET" ,
+ "rlock-AF_AX25" , "rlock-AF_IPX" , "rlock-AF_APPLETALK",
+ "rlock-AF_NETROM", "rlock-AF_BRIDGE" , "rlock-AF_ATMPVC" ,
+ "rlock-AF_X25" , "rlock-AF_INET6" , "rlock-AF_ROSE" ,
+ "rlock-AF_DECnet", "rlock-AF_NETBEUI" , "rlock-AF_SECURITY" ,
+ "rlock-AF_KEY" , "rlock-AF_NETLINK" , "rlock-AF_PACKET" ,
+ "rlock-AF_ASH" , "rlock-AF_ECONET" , "rlock-AF_ATMSVC" ,
+ "rlock-AF_RDS" , "rlock-AF_SNA" , "rlock-AF_IRDA" ,
+ "rlock-AF_PPPOX" , "rlock-AF_WANPIPE" , "rlock-AF_LLC" ,
+ "rlock-27" , "rlock-28" , "rlock-AF_CAN" ,
+ "rlock-AF_TIPC" , "rlock-AF_BLUETOOTH", "rlock-AF_IUCV" ,
+ "rlock-AF_RXRPC" , "rlock-AF_ISDN" , "rlock-AF_PHONET" ,
+ "rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG" ,
+ "rlock-AF_NFC" , "rlock-AF_VSOCK" , "rlock-AF_KCM" ,
+ "rlock-AF_QIPCRTR", "rlock-AF_SMC" , "rlock-AF_MAX"
+};
+static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
+ "wlock-AF_UNSPEC", "wlock-AF_UNIX" , "wlock-AF_INET" ,
+ "wlock-AF_AX25" , "wlock-AF_IPX" , "wlock-AF_APPLETALK",
+ "wlock-AF_NETROM", "wlock-AF_BRIDGE" , "wlock-AF_ATMPVC" ,
+ "wlock-AF_X25" , "wlock-AF_INET6" , "wlock-AF_ROSE" ,
+ "wlock-AF_DECnet", "wlock-AF_NETBEUI" , "wlock-AF_SECURITY" ,
+ "wlock-AF_KEY" , "wlock-AF_NETLINK" , "wlock-AF_PACKET" ,
+ "wlock-AF_ASH" , "wlock-AF_ECONET" , "wlock-AF_ATMSVC" ,
+ "wlock-AF_RDS" , "wlock-AF_SNA" , "wlock-AF_IRDA" ,
+ "wlock-AF_PPPOX" , "wlock-AF_WANPIPE" , "wlock-AF_LLC" ,
+ "wlock-27" , "wlock-28" , "wlock-AF_CAN" ,
+ "wlock-AF_TIPC" , "wlock-AF_BLUETOOTH", "wlock-AF_IUCV" ,
+ "wlock-AF_RXRPC" , "wlock-AF_ISDN" , "wlock-AF_PHONET" ,
+ "wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG" ,
+ "wlock-AF_NFC" , "wlock-AF_VSOCK" , "wlock-AF_KCM" ,
+ "wlock-AF_QIPCRTR", "wlock-AF_SMC" , "wlock-AF_MAX"
+};
+static const char *const af_family_elock_key_strings[AF_MAX+1] = {
+ "elock-AF_UNSPEC", "elock-AF_UNIX" , "elock-AF_INET" ,
+ "elock-AF_AX25" , "elock-AF_IPX" , "elock-AF_APPLETALK",
+ "elock-AF_NETROM", "elock-AF_BRIDGE" , "elock-AF_ATMPVC" ,
+ "elock-AF_X25" , "elock-AF_INET6" , "elock-AF_ROSE" ,
+ "elock-AF_DECnet", "elock-AF_NETBEUI" , "elock-AF_SECURITY" ,
+ "elock-AF_KEY" , "elock-AF_NETLINK" , "elock-AF_PACKET" ,
+ "elock-AF_ASH" , "elock-AF_ECONET" , "elock-AF_ATMSVC" ,
+ "elock-AF_RDS" , "elock-AF_SNA" , "elock-AF_IRDA" ,
+ "elock-AF_PPPOX" , "elock-AF_WANPIPE" , "elock-AF_LLC" ,
+ "elock-27" , "elock-28" , "elock-AF_CAN" ,
+ "elock-AF_TIPC" , "elock-AF_BLUETOOTH", "elock-AF_IUCV" ,
+ "elock-AF_RXRPC" , "elock-AF_ISDN" , "elock-AF_PHONET" ,
+ "elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG" ,
+ "elock-AF_NFC" , "elock-AF_VSOCK" , "elock-AF_KCM" ,
+ "elock-AF_QIPCRTR", "elock-AF_SMC" , "elock-AF_MAX"
+};
/*
- * sk_callback_lock locking rules are per-address-family,
+ * sk_callback_lock and sk queues locking rules are per-address-family,
* so split the lock classes by using a per-AF key:
*/
static struct lock_class_key af_callback_keys[AF_MAX];
+static struct lock_class_key af_rlock_keys[AF_MAX];
+static struct lock_class_key af_wlock_keys[AF_MAX];
+static struct lock_class_key af_elock_keys[AF_MAX];
static struct lock_class_key af_kern_callback_keys[AF_MAX];
/* Take into consideration the size of the struct sk_buff overhead in the
@@ -1259,6 +1313,21 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sk->sk_incoming_cpu;
break;
+ case SO_MEMINFO:
+ {
+ u32 meminfo[SK_MEMINFO_VARS];
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ sk_get_meminfo(sk, meminfo);
+
+ len = min_t(unsigned int, len, sizeof(meminfo));
+ if (copy_to_user(optval, &meminfo, len))
+ return -EFAULT;
+
+ goto lenout;
+ }
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
@@ -1483,6 +1552,27 @@ void sk_free(struct sock *sk)
}
EXPORT_SYMBOL(sk_free);
+static void sk_init_common(struct sock *sk)
+{
+ skb_queue_head_init(&sk->sk_receive_queue);
+ skb_queue_head_init(&sk->sk_write_queue);
+ skb_queue_head_init(&sk->sk_error_queue);
+
+ rwlock_init(&sk->sk_callback_lock);
+ lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
+ af_rlock_keys + sk->sk_family,
+ af_family_rlock_key_strings[sk->sk_family]);
+ lockdep_set_class_and_name(&sk->sk_write_queue.lock,
+ af_wlock_keys + sk->sk_family,
+ af_family_wlock_key_strings[sk->sk_family]);
+ lockdep_set_class_and_name(&sk->sk_error_queue.lock,
+ af_elock_keys + sk->sk_family,
+ af_family_elock_key_strings[sk->sk_family]);
+ lockdep_set_class_and_name(&sk->sk_callback_lock,
+ af_callback_keys + sk->sk_family,
+ af_family_clock_key_strings[sk->sk_family]);
+}
+
/**
* sk_clone_lock - clone a socket, and lock its clone
* @sk: the socket to clone
@@ -1516,13 +1606,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
*/
atomic_set(&newsk->sk_wmem_alloc, 1);
atomic_set(&newsk->sk_omem_alloc, 0);
- skb_queue_head_init(&newsk->sk_receive_queue);
- skb_queue_head_init(&newsk->sk_write_queue);
-
- rwlock_init(&newsk->sk_callback_lock);
- lockdep_set_class_and_name(&newsk->sk_callback_lock,
- af_callback_keys + newsk->sk_family,
- af_family_clock_key_strings[newsk->sk_family]);
+ sk_init_common(newsk);
newsk->sk_dst_cache = NULL;
newsk->sk_dst_pending_confirm = 0;
@@ -1533,7 +1617,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
sock_reset_flag(newsk, SOCK_DONE);
- skb_queue_head_init(&newsk->sk_error_queue);
filter = rcu_dereference_protected(newsk->sk_filter, 1);
if (filter != NULL)
@@ -2466,10 +2549,7 @@ EXPORT_SYMBOL(sk_stop_timer);
void sock_init_data(struct socket *sock, struct sock *sk)
{
- skb_queue_head_init(&sk->sk_receive_queue);
- skb_queue_head_init(&sk->sk_write_queue);
- skb_queue_head_init(&sk->sk_error_queue);
-
+ sk_init_common(sk);
sk->sk_send_head = NULL;
init_timer(&sk->sk_timer);
@@ -2802,6 +2882,21 @@ void sk_common_release(struct sock *sk)
}
EXPORT_SYMBOL(sk_common_release);
+void sk_get_meminfo(const struct sock *sk, u32 *mem)
+{
+ memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
+
+ mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
+ mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
+ mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
+ mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
+ mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
+ mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
+ mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
+ mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
+ mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
+}
+
#ifdef CONFIG_PROC_FS
#define PROTO_INUSE_NR 64 /* should be enough for the first time */
struct prot_inuse {
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 6b10573cc9fa..8d11ee75a100 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -59,15 +59,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
{
u32 mem[SK_MEMINFO_VARS];
- mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
- mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
- mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
- mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
- mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
- mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
- mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
- mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
- mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
+ sk_get_meminfo(sk, mem);
return nla_put(skb, attrtype, sizeof(mem), &mem);
}
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 7de5b40a5d0d..9afa2a5030b2 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -132,6 +132,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat
#include <net/neighbour.h>
#include <net/dst.h>
#include <net/fib_rules.h>
+#include <net/tcp.h>
#include <net/dn.h>
#include <net/dn_nsp.h>
#include <net/dn_dev.h>
@@ -1469,18 +1470,18 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us
case DSO_NODELAY:
if (optlen != sizeof(int))
return -EINVAL;
- if (scp->nonagle == 2)
+ if (scp->nonagle == TCP_NAGLE_CORK)
return -EINVAL;
- scp->nonagle = (u.val == 0) ? 0 : 1;
+ scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_OFF;
/* if (scp->nonagle == 1) { Push pending frames } */
break;
case DSO_CORK:
if (optlen != sizeof(int))
return -EINVAL;
- if (scp->nonagle == 1)
+ if (scp->nonagle == TCP_NAGLE_OFF)
return -EINVAL;
- scp->nonagle = (u.val == 0) ? 0 : 2;
+ scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_CORK;
/* if (scp->nonagle == 0) { Push pending frames } */
break;
@@ -1608,14 +1609,14 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us
case DSO_NODELAY:
if (r_len > sizeof(int))
r_len = sizeof(int);
- val = (scp->nonagle == 1);
+ val = (scp->nonagle == TCP_NAGLE_OFF);
r_data = &val;
break;
case DSO_CORK:
if (r_len > sizeof(int))
r_len = sizeof(int);
- val = (scp->nonagle == 2);
+ val = (scp->nonagle == TCP_NAGLE_CORK);
r_data = &val;
break;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index c34872e1febc..78128acfbf63 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -419,8 +419,8 @@ static int dsa_slave_vlan_filtering(struct net_device *dev,
return 0;
}
-static int dsa_fastest_ageing_time(struct dsa_switch *ds,
- unsigned int ageing_time)
+static unsigned int dsa_fastest_ageing_time(struct dsa_switch *ds,
+ unsigned int ageing_time)
{
int i;
@@ -443,9 +443,13 @@ static int dsa_slave_ageing_time(struct net_device *dev,
unsigned long ageing_jiffies = clock_t_to_jiffies(attr->u.ageing_time);
unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies);
- /* bridge skips -EOPNOTSUPP, so skip the prepare phase */
- if (switchdev_trans_ph_prepare(trans))
+ if (switchdev_trans_ph_prepare(trans)) {
+ if (ds->ageing_time_min && ageing_time < ds->ageing_time_min)
+ return -ERANGE;
+ if (ds->ageing_time_max && ageing_time > ds->ageing_time_max)
+ return -ERANGE;
return 0;
+ }
/* Keep the fastest ageing time in case of multiple bridges */
p->dp->ageing_time = ageing_time;
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index c6d4238ff94a..f83de23a30e7 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -11,7 +11,7 @@ obj-y := route.o inetpeer.o protocol.o \
tcp_rate.o tcp_recovery.o \
tcp_offload.o datagram.o raw.o udp.o udplite.o \
udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
- fib_frontend.o fib_semantics.o fib_trie.o \
+ fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \
inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o
obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 51b27ae09fbd..0937b34c27ca 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -872,7 +872,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
skb->pkt_type != PACKET_HOST)
state = NUD_STALE;
neigh_update(n, sha, state,
- override ? NEIGH_UPDATE_F_OVERRIDE : 0);
+ override ? NEIGH_UPDATE_F_OVERRIDE : 0, 0);
neigh_release(n);
}
@@ -1033,7 +1033,7 @@ static int arp_req_set(struct net *net, struct arpreq *r,
err = neigh_update(neigh, (r->arp_flags & ATF_COM) ?
r->arp_ha.sa_data : NULL, state,
NEIGH_UPDATE_F_OVERRIDE |
- NEIGH_UPDATE_F_ADMIN);
+ NEIGH_UPDATE_F_ADMIN, 0);
neigh_release(neigh);
}
return err;
@@ -1084,7 +1084,7 @@ static int arp_invalidate(struct net_device *dev, __be32 ip)
if (neigh->nud_state & ~NUD_NOARP)
err = neigh_update(neigh, NULL, NUD_FAILED,
NEIGH_UPDATE_F_OVERRIDE|
- NEIGH_UPDATE_F_ADMIN);
+ NEIGH_UPDATE_F_ADMIN, 0);
neigh_release(neigh);
}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index cebedd545e5e..927f1d4b8c80 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1192,6 +1192,18 @@ out:
return done;
}
+static __be32 in_dev_select_addr(const struct in_device *in_dev,
+ int scope)
+{
+ for_primary_ifa(in_dev) {
+ if (ifa->ifa_scope != RT_SCOPE_LINK &&
+ ifa->ifa_scope <= scope)
+ return ifa->ifa_local;
+ } endfor_ifa(in_dev);
+
+ return 0;
+}
+
__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
{
__be32 addr = 0;
@@ -1228,13 +1240,9 @@ no_in_dev:
if (master_idx &&
(dev = dev_get_by_index_rcu(net, master_idx)) &&
(in_dev = __in_dev_get_rcu(dev))) {
- for_primary_ifa(in_dev) {
- if (ifa->ifa_scope != RT_SCOPE_LINK &&
- ifa->ifa_scope <= scope) {
- addr = ifa->ifa_local;
- goto out_unlock;
- }
- } endfor_ifa(in_dev);
+ addr = in_dev_select_addr(in_dev, scope);
+ if (addr)
+ goto out_unlock;
}
/* Not loopback addresses on loopback should be preferred
@@ -1249,13 +1257,9 @@ no_in_dev:
if (!in_dev)
continue;
- for_primary_ifa(in_dev) {
- if (ifa->ifa_scope != RT_SCOPE_LINK &&
- ifa->ifa_scope <= scope) {
- addr = ifa->ifa_local;
- goto out_unlock;
- }
- } endfor_ifa(in_dev);
+ addr = in_dev_select_addr(in_dev, scope);
+ if (addr)
+ goto out_unlock;
}
out_unlock:
rcu_read_unlock();
diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c
new file mode 100644
index 000000000000..e0714d975947
--- /dev/null
+++ b/net/ipv4/fib_notifier.c
@@ -0,0 +1,86 @@
+#include <linux/rtnetlink.h>
+#include <linux/notifier.h>
+#include <linux/rcupdate.h>
+#include <linux/kernel.h>
+#include <net/net_namespace.h>
+#include <net/netns/ipv4.h>
+#include <net/ip_fib.h>
+
+static ATOMIC_NOTIFIER_HEAD(fib_chain);
+
+int call_fib_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_notifier_info *info)
+{
+ info->net = net;
+ return nb->notifier_call(nb, event_type, info);
+}
+
+int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+ struct fib_notifier_info *info)
+{
+ net->ipv4.fib_seq++;
+ info->net = net;
+ return atomic_notifier_call_chain(&fib_chain, event_type, info);
+}
+
+static unsigned int fib_seq_sum(void)
+{
+ unsigned int fib_seq = 0;
+ struct net *net;
+
+ rtnl_lock();
+ for_each_net(net)
+ fib_seq += net->ipv4.fib_seq;
+ rtnl_unlock();
+
+ return fib_seq;
+}
+
+static bool fib_dump_is_consistent(struct notifier_block *nb,
+ void (*cb)(struct notifier_block *nb),
+ unsigned int fib_seq)
+{
+ atomic_notifier_chain_register(&fib_chain, nb);
+ if (fib_seq == fib_seq_sum())
+ return true;
+ atomic_notifier_chain_unregister(&fib_chain, nb);
+ if (cb)
+ cb(nb);
+ return false;
+}
+
+#define FIB_DUMP_MAX_RETRIES 5
+int register_fib_notifier(struct notifier_block *nb,
+ void (*cb)(struct notifier_block *nb))
+{
+ int retries = 0;
+
+ do {
+ unsigned int fib_seq = fib_seq_sum();
+ struct net *net;
+
+ /* Mutex semantics guarantee that every change done to
+ * FIB tries before we read the change sequence counter
+ * is now visible to us.
+ */
+ rcu_read_lock();
+ for_each_net_rcu(net) {
+ fib_rules_notify(net, nb);
+ fib_notify(net, nb);
+ }
+ rcu_read_unlock();
+
+ if (fib_dump_is_consistent(nb, cb, fib_seq))
+ return 0;
+ } while (++retries < FIB_DUMP_MAX_RETRIES);
+
+ return -EBUSY;
+}
+EXPORT_SYMBOL(register_fib_notifier);
+
+int unregister_fib_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&fib_chain, nb);
+}
+EXPORT_SYMBOL(unregister_fib_notifier);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 2e50062f642d..778ecf977eb2 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -47,6 +47,27 @@ struct fib4_rule {
#endif
};
+static bool fib4_rule_matchall(const struct fib_rule *rule)
+{
+ struct fib4_rule *r = container_of(rule, struct fib4_rule, common);
+
+ if (r->dst_len || r->src_len || r->tos)
+ return false;
+ return fib_rule_matchall(rule);
+}
+
+bool fib4_rule_default(const struct fib_rule *rule)
+{
+ if (!fib4_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL ||
+ rule->l3mdev)
+ return false;
+ if (rule->table != RT_TABLE_LOCAL && rule->table != RT_TABLE_MAIN &&
+ rule->table != RT_TABLE_DEFAULT)
+ return false;
+ return true;
+}
+EXPORT_SYMBOL_GPL(fib4_rule_default);
+
int __fib_lookup(struct net *net, struct flowi4 *flp,
struct fib_result *res, unsigned int flags)
{
@@ -164,12 +185,36 @@ static struct fib_table *fib_empty_table(struct net *net)
return NULL;
}
+static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
+ enum fib_event_type event_type,
+ struct fib_rule *rule)
+{
+ struct fib_rule_notifier_info info = {
+ .rule = rule,
+ };
+
+ return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
static int call_fib_rule_notifiers(struct net *net,
- enum fib_event_type event_type)
+ enum fib_event_type event_type,
+ struct fib_rule *rule)
+{
+ struct fib_rule_notifier_info info = {
+ .rule = rule,
+ };
+
+ return call_fib_notifiers(net, event_type, &info.info);
+}
+
+/* Called with rcu_read_lock() */
+void fib_rules_notify(struct net *net, struct notifier_block *nb)
{
- struct fib_notifier_info info;
+ struct fib_rules_ops *ops = net->ipv4.rules_ops;
+ struct fib_rule *rule;
- return call_fib_notifiers(net, event_type, &info);
+ list_for_each_entry_rcu(rule, &ops->rules_list, list)
+ call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule);
}
static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = {
@@ -228,7 +273,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule4->tos = frh->tos;
net->ipv4.fib_has_custom_rules = true;
- call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD);
+ call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule);
err = 0;
errout:
@@ -250,7 +295,7 @@ static int fib4_rule_delete(struct fib_rule *rule)
net->ipv4.fib_num_tclassid_users--;
#endif
net->ipv4.fib_has_custom_rules = true;
- call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL);
+ call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule);
errout:
return err;
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 317026a39cfa..da449ddb8cc1 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -57,7 +57,6 @@ static unsigned int fib_info_cnt;
static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
#ifdef CONFIG_IP_ROUTE_MULTIPATH
-u32 fib_multipath_secret __read_mostly;
#define for_nexthops(fi) { \
int nhsel; const struct fib_nh *nh; \
@@ -576,9 +575,6 @@ static void fib_rebalance(struct fib_info *fi)
atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
} endfor_nexthops(fi);
-
- net_get_random_once(&fib_multipath_secret,
- sizeof(fib_multipath_secret));
}
static inline void fib_add_weight(struct fib_info *fi,
@@ -1641,7 +1637,7 @@ void fib_select_multipath(struct fib_result *res, int hash)
#endif
void fib_select_path(struct net *net, struct fib_result *res,
- struct flowi4 *fl4, int mp_hash)
+ struct flowi4 *fl4, const struct sk_buff *skb)
{
bool oif_check;
@@ -1650,10 +1646,9 @@ void fib_select_path(struct net *net, struct fib_result *res,
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi->fib_nhs > 1 && oif_check) {
- if (mp_hash < 0)
- mp_hash = get_hash_from_flowi4(fl4) >> 1;
+ int h = fib_multipath_hash(res->fi, fl4, skb);
- fib_select_multipath(res, mp_hash);
+ fib_select_multipath(res, h);
}
else
#endif
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 2f0d8233950f..1201409ba1dc 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -84,43 +84,6 @@
#include <trace/events/fib.h>
#include "fib_lookup.h"
-static unsigned int fib_seq_sum(void)
-{
- unsigned int fib_seq = 0;
- struct net *net;
-
- rtnl_lock();
- for_each_net(net)
- fib_seq += net->ipv4.fib_seq;
- rtnl_unlock();
-
- return fib_seq;
-}
-
-static ATOMIC_NOTIFIER_HEAD(fib_chain);
-
-static int call_fib_notifier(struct notifier_block *nb, struct net *net,
- enum fib_event_type event_type,
- struct fib_notifier_info *info)
-{
- info->net = net;
- return nb->notifier_call(nb, event_type, info);
-}
-
-static void fib_rules_notify(struct net *net, struct notifier_block *nb,
- enum fib_event_type event_type)
-{
-#ifdef CONFIG_IP_MULTIPLE_TABLES
- struct fib_notifier_info info;
-
- if (net->ipv4.fib_has_custom_rules)
- call_fib_notifier(nb, net, event_type, &info);
-#endif
-}
-
-static void fib_notify(struct net *net, struct notifier_block *nb,
- enum fib_event_type event_type);
-
static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type, u32 dst,
int dst_len, struct fib_info *fi,
@@ -137,62 +100,6 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
return call_fib_notifier(nb, net, event_type, &info.info);
}
-static bool fib_dump_is_consistent(struct notifier_block *nb,
- void (*cb)(struct notifier_block *nb),
- unsigned int fib_seq)
-{
- atomic_notifier_chain_register(&fib_chain, nb);
- if (fib_seq == fib_seq_sum())
- return true;
- atomic_notifier_chain_unregister(&fib_chain, nb);
- if (cb)
- cb(nb);
- return false;
-}
-
-#define FIB_DUMP_MAX_RETRIES 5
-int register_fib_notifier(struct notifier_block *nb,
- void (*cb)(struct notifier_block *nb))
-{
- int retries = 0;
-
- do {
- unsigned int fib_seq = fib_seq_sum();
- struct net *net;
-
- /* Mutex semantics guarantee that every change done to
- * FIB tries before we read the change sequence counter
- * is now visible to us.
- */
- rcu_read_lock();
- for_each_net_rcu(net) {
- fib_rules_notify(net, nb, FIB_EVENT_RULE_ADD);
- fib_notify(net, nb, FIB_EVENT_ENTRY_ADD);
- }
- rcu_read_unlock();
-
- if (fib_dump_is_consistent(nb, cb, fib_seq))
- return 0;
- } while (++retries < FIB_DUMP_MAX_RETRIES);
-
- return -EBUSY;
-}
-EXPORT_SYMBOL(register_fib_notifier);
-
-int unregister_fib_notifier(struct notifier_block *nb)
-{
- return atomic_notifier_chain_unregister(&fib_chain, nb);
-}
-EXPORT_SYMBOL(unregister_fib_notifier);
-
-int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
- struct fib_notifier_info *info)
-{
- net->ipv4.fib_seq++;
- info->net = net;
- return atomic_notifier_call_chain(&fib_chain, event_type, info);
-}
-
static int call_fib_entry_notifiers(struct net *net,
enum fib_event_type event_type, u32 dst,
int dst_len, struct fib_info *fi,
@@ -1995,8 +1902,7 @@ int fib_table_flush(struct net *net, struct fib_table *tb)
}
static void fib_leaf_notify(struct net *net, struct key_vector *l,
- struct fib_table *tb, struct notifier_block *nb,
- enum fib_event_type event_type)
+ struct fib_table *tb, struct notifier_block *nb)
{
struct fib_alias *fa;
@@ -2012,22 +1918,21 @@ static void fib_leaf_notify(struct net *net, struct key_vector *l,
if (tb->tb_id != fa->tb_id)
continue;
- call_fib_entry_notifier(nb, net, event_type, l->key,
+ call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key,
KEYLENGTH - fa->fa_slen, fi, fa->fa_tos,
fa->fa_type, fa->tb_id);
}
}
static void fib_table_notify(struct net *net, struct fib_table *tb,
- struct notifier_block *nb,
- enum fib_event_type event_type)
+ struct notifier_block *nb)
{
struct trie *t = (struct trie *)tb->tb_data;
struct key_vector *l, *tp = t->kv;
t_key key = 0;
while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
- fib_leaf_notify(net, l, tb, nb, event_type);
+ fib_leaf_notify(net, l, tb, nb);
key = l->key + 1;
/* stop in case of wrap around */
@@ -2036,8 +1941,7 @@ static void fib_table_notify(struct net *net, struct fib_table *tb,
}
}
-static void fib_notify(struct net *net, struct notifier_block *nb,
- enum fib_event_type event_type)
+void fib_notify(struct net *net, struct notifier_block *nb)
{
unsigned int h;
@@ -2046,7 +1950,7 @@ static void fib_notify(struct net *net, struct notifier_block *nb,
struct fib_table *tb;
hlist_for_each_entry_rcu(tb, head, tb_hlist)
- fib_table_notify(net, tb, nb, event_type);
+ fib_table_notify(net, tb, nb);
}
}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index fc310db2708b..43318b5f5647 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -464,22 +464,6 @@ out_bh_enable:
local_bh_enable();
}
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-
-/* Source and destination is swapped. See ip_multipath_icmp_hash */
-static int icmp_multipath_hash_skb(const struct sk_buff *skb)
-{
- const struct iphdr *iph = ip_hdr(skb);
-
- return fib_multipath_hash(iph->daddr, iph->saddr);
-}
-
-#else
-
-#define icmp_multipath_hash_skb(skb) (-1)
-
-#endif
-
static struct rtable *icmp_route_lookup(struct net *net,
struct flowi4 *fl4,
struct sk_buff *skb_in,
@@ -505,8 +489,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev);
security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
- rt = __ip_route_output_key_hash(net, fl4,
- icmp_multipath_hash_skb(skb_in));
+ rt = __ip_route_output_key_hash(net, fl4, skb_in);
if (IS_ERR(rt))
return rt;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 6241a81fd7f5..f17dab1dee6e 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -562,8 +562,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
}
- if (ret != 0)
- goto out_free;
ret = -EINVAL;
if (i != repl->num_entries)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 52f26459efc3..fcbdc0c49b0e 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -22,6 +22,7 @@
#include <linux/icmp.h>
#include <linux/if_arp.h>
#include <linux/seq_file.h>
+#include <linux/refcount.h>
#include <linux/netfilter_arp.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
@@ -40,8 +41,8 @@ MODULE_DESCRIPTION("Xtables: CLUSTERIP target");
struct clusterip_config {
struct list_head list; /* list of all configs */
- atomic_t refcount; /* reference count */
- atomic_t entries; /* number of entries/rules
+ refcount_t refcount; /* reference count */
+ refcount_t entries; /* number of entries/rules
* referencing us */
__be32 clusterip; /* the IP address */
@@ -77,7 +78,7 @@ struct clusterip_net {
static inline void
clusterip_config_get(struct clusterip_config *c)
{
- atomic_inc(&c->refcount);
+ refcount_inc(&c->refcount);
}
@@ -89,7 +90,7 @@ static void clusterip_config_rcu_free(struct rcu_head *head)
static inline void
clusterip_config_put(struct clusterip_config *c)
{
- if (atomic_dec_and_test(&c->refcount))
+ if (refcount_dec_and_test(&c->refcount))
call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
}
@@ -103,7 +104,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
struct clusterip_net *cn = net_generic(net, clusterip_net_id);
local_bh_disable();
- if (atomic_dec_and_lock(&c->entries, &cn->lock)) {
+ if (refcount_dec_and_lock(&c->entries, &cn->lock)) {
list_del_rcu(&c->list);
spin_unlock(&cn->lock);
local_bh_enable();
@@ -149,10 +150,10 @@ clusterip_config_find_get(struct net *net, __be32 clusterip, int entry)
c = NULL;
else
#endif
- if (unlikely(!atomic_inc_not_zero(&c->refcount)))
+ if (unlikely(!refcount_inc_not_zero(&c->refcount)))
c = NULL;
else if (entry)
- atomic_inc(&c->entries);
+ refcount_inc(&c->entries);
}
rcu_read_unlock_bh();
@@ -188,8 +189,8 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
clusterip_config_init_nodelist(c, i);
c->hash_mode = i->hash_mode;
c->hash_initval = i->hash_initval;
- atomic_set(&c->refcount, 1);
- atomic_set(&c->entries, 1);
+ refcount_set(&c->refcount, 1);
+ refcount_set(&c->entries, 1);
spin_lock_bh(&cn->lock);
if (__clusterip_config_find(net, ip)) {
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index c9b52c361da2..ef49989c93b1 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -998,18 +998,6 @@ err_id_free:
*
*****************************************************************************/
-static void hex_dump(const unsigned char *buf, size_t len)
-{
- size_t i;
-
- for (i = 0; i < len; i++) {
- if (i && !(i % 16))
- printk("\n");
- printk("%02x ", *(buf + i));
- }
- printk("\n");
-}
-
/*
* Parse and mangle SNMP message according to mapping.
* (And this is the fucking 'basic' method).
@@ -1026,7 +1014,8 @@ static int snmp_parse_mangle(unsigned char *msg,
struct snmp_object *obj;
if (debug > 1)
- hex_dump(msg, len);
+ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 16, 1,
+ msg, len, 0);
asn1_open(&ctx, msg, len);
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 146d86105183..7cd8d0d918f8 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -104,7 +104,6 @@ EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put);
void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
{
struct sk_buff *nskb;
- const struct iphdr *oiph;
struct iphdr *niph;
const struct tcphdr *oth;
struct tcphdr _oth;
@@ -116,8 +115,6 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
return;
- oiph = ip_hdr(oldskb);
-
nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
LL_MAX_HEADER, GFP_ATOMIC);
if (!nskb)
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 2981291910dd..f4e4462cb5bb 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -90,7 +90,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
- nft_fib_store_result(dest, priv->result, pkt,
+ nft_fib_store_result(dest, priv, pkt,
nft_in(pkt)->ifindex);
return;
}
@@ -99,7 +99,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (ipv4_is_zeronet(iph->saddr)) {
if (ipv4_is_lbcast(iph->daddr) ||
ipv4_is_local_multicast(iph->daddr)) {
- nft_fib_store_result(dest, priv->result, pkt,
+ nft_fib_store_result(dest, priv, pkt,
get_ifindex(pkt->skb->dev));
return;
}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 69cf49e8356d..4ccbf464d1ac 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -199,7 +199,6 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TW", LINUX_MIB_TIMEWAITED),
SNMP_MIB_ITEM("TWRecycled", LINUX_MIB_TIMEWAITRECYCLED),
SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED),
- SNMP_MIB_ITEM("PAWSPassive", LINUX_MIB_PAWSPASSIVEREJECTED),
SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED),
SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED),
SNMP_MIB_ITEM("DelayedACKs", LINUX_MIB_DELAYEDACKS),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8471dd116771..5dda1ef81c7e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1734,45 +1734,97 @@ out:
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
-
/* To make ICMP packets follow the right flow, the multipath hash is
- * calculated from the inner IP addresses in reverse order.
+ * calculated from the inner IP addresses.
*/
-static int ip_multipath_icmp_hash(struct sk_buff *skb)
+static void ip_multipath_l3_keys(const struct sk_buff *skb,
+ struct flow_keys *hash_keys)
{
const struct iphdr *outer_iph = ip_hdr(skb);
- struct icmphdr _icmph;
+ const struct iphdr *inner_iph;
const struct icmphdr *icmph;
struct iphdr _inner_iph;
- const struct iphdr *inner_iph;
+ struct icmphdr _icmph;
+
+ hash_keys->addrs.v4addrs.src = outer_iph->saddr;
+ hash_keys->addrs.v4addrs.dst = outer_iph->daddr;
+ if (likely(outer_iph->protocol != IPPROTO_ICMP))
+ return;
if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
- goto standard_hash;
+ return;
icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
&_icmph);
if (!icmph)
- goto standard_hash;
+ return;
if (icmph->type != ICMP_DEST_UNREACH &&
icmph->type != ICMP_REDIRECT &&
icmph->type != ICMP_TIME_EXCEEDED &&
- icmph->type != ICMP_PARAMETERPROB) {
- goto standard_hash;
- }
+ icmph->type != ICMP_PARAMETERPROB)
+ return;
inner_iph = skb_header_pointer(skb,
outer_iph->ihl * 4 + sizeof(_icmph),
sizeof(_inner_iph), &_inner_iph);
if (!inner_iph)
- goto standard_hash;
+ return;
+ hash_keys->addrs.v4addrs.src = inner_iph->saddr;
+ hash_keys->addrs.v4addrs.dst = inner_iph->daddr;
+}
- return fib_multipath_hash(inner_iph->daddr, inner_iph->saddr);
+/* if skb is set it will be used and fl4 can be NULL */
+int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
+ const struct sk_buff *skb)
+{
+ struct net *net = fi->fib_net;
+ struct flow_keys hash_keys;
+ u32 mhash;
-standard_hash:
- return fib_multipath_hash(outer_iph->saddr, outer_iph->daddr);
-}
+ switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
+ case 0:
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ if (skb) {
+ ip_multipath_l3_keys(skb, &hash_keys);
+ } else {
+ hash_keys.addrs.v4addrs.src = fl4->saddr;
+ hash_keys.addrs.v4addrs.dst = fl4->daddr;
+ }
+ break;
+ case 1:
+ /* skb is currently provided only when forwarding */
+ if (skb) {
+ unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+ struct flow_keys keys;
+
+ /* short-circuit if we already have L4 hash present */
+ if (skb->l4_hash)
+ return skb_get_hash_raw(skb) >> 1;
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ skb_flow_dissect_flow_keys(skb, &keys, flag);
+ hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+ hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
+ hash_keys.ports.src = keys.ports.src;
+ hash_keys.ports.dst = keys.ports.dst;
+ hash_keys.basic.ip_proto = keys.basic.ip_proto;
+ } else {
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ hash_keys.addrs.v4addrs.src = fl4->saddr;
+ hash_keys.addrs.v4addrs.dst = fl4->daddr;
+ hash_keys.ports.src = fl4->fl4_sport;
+ hash_keys.ports.dst = fl4->fl4_dport;
+ hash_keys.basic.ip_proto = fl4->flowi4_proto;
+ }
+ break;
+ }
+ mhash = flow_hash_from_keys(&hash_keys);
+ return mhash >> 1;
+}
+EXPORT_SYMBOL_GPL(fib_multipath_hash);
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
static int ip_mkroute_input(struct sk_buff *skb,
@@ -1782,12 +1834,8 @@ static int ip_mkroute_input(struct sk_buff *skb,
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi && res->fi->fib_nhs > 1) {
- int h;
+ int h = fib_multipath_hash(res->fi, NULL, skb);
- if (unlikely(ip_hdr(skb)->protocol == IPPROTO_ICMP))
- h = ip_multipath_icmp_hash(skb);
- else
- h = fib_multipath_hash(saddr, daddr);
fib_select_multipath(res, h);
}
#endif
@@ -2203,7 +2251,7 @@ add:
*/
struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
- int mp_hash)
+ const struct sk_buff *skb)
{
struct net_device *dev_out = NULL;
__u8 tos = RT_FL_TOS(fl4);
@@ -2365,7 +2413,7 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
goto make_route;
}
- fib_select_path(net, &res, fl4, mp_hash);
+ fib_select_path(net, &res, fl4, skb);
dev_out = FIB_RES_DEV(res);
fl4->flowi4_oif = dev_out->ifindex;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d6880a6149ee..711c3e2e17b1 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -981,13 +981,6 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec
},
{
- .procname = "tcp_tw_recycle",
- .data = &init_net.ipv4.tcp_death_row.sysctl_tw_recycle,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "tcp_max_syn_backlog",
.data = &init_net.ipv4.sysctl_max_syn_backlog,
.maxlen = sizeof(int),
@@ -1004,6 +997,15 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = &zero,
.extra2 = &one,
},
+ {
+ .procname = "fib_multipath_hash_policy",
+ .data = &init_net.ipv4.sysctl_fib_multipath_hash_policy,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
#endif
{
.procname = "ip_unprivileged_port_start",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1e319a525d51..1665948dff8c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2470,7 +2470,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
/* Values greater than interface MTU won't take effect. However
* at the point when this call is done we typically don't yet
* know which interface is going to be used */
- if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) {
+ if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) {
err = -EINVAL;
break;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c43119726a62..a75c48f62e27 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6324,36 +6324,14 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
goto drop_and_free;
if (isn && tmp_opt.tstamp_ok)
- af_ops->init_seq(skb, &tcp_rsk(req)->ts_off);
+ af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off);
if (!want_cookie && !isn) {
- /* VJ's idea. We save last timestamp seen
- * from the destination in peer table, when entering
- * state TIME-WAIT, and check against it before
- * accepting new connection request.
- *
- * If "isn" is not zero, this request hit alive
- * timewait bucket, so that all the necessary checks
- * are made in the function processing timewait state.
- */
- if (net->ipv4.tcp_death_row.sysctl_tw_recycle) {
- bool strict;
-
- dst = af_ops->route_req(sk, &fl, req, &strict);
-
- if (dst && strict &&
- !tcp_peer_is_proven(req, dst, true,
- tmp_opt.saw_tstamp)) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
- goto drop_and_release;
- }
- }
/* Kill the following clause, if you dislike this way. */
- else if (!net->ipv4.sysctl_tcp_syncookies &&
- (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
- (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
- !tcp_peer_is_proven(req, dst, false,
- tmp_opt.saw_tstamp)) {
+ if (!net->ipv4.sysctl_tcp_syncookies &&
+ (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+ (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
+ !tcp_peer_is_proven(req, dst)) {
/* Without syncookies last quarter of
* backlog is filled with destinations,
* proven to be alive.
@@ -6366,10 +6344,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
goto drop_and_release;
}
- isn = af_ops->init_seq(skb, &tcp_rsk(req)->ts_off);
+ isn = af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off);
}
if (!dst) {
- dst = af_ops->route_req(sk, &fl, req, NULL);
+ dst = af_ops->route_req(sk, &fl, req);
if (!dst)
goto drop_and_free;
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 575e19dcc017..7482b5d11861 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -94,12 +94,12 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
struct inet_hashinfo tcp_hashinfo;
EXPORT_SYMBOL(tcp_hashinfo);
-static u32 tcp_v4_init_sequence(const struct sk_buff *skb, u32 *tsoff)
+static u32 tcp_v4_init_seq_and_tsoff(const struct sk_buff *skb, u32 *tsoff)
{
- return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
- ip_hdr(skb)->saddr,
- tcp_hdr(skb)->dest,
- tcp_hdr(skb)->source, tsoff);
+ return secure_tcp_seq_and_tsoff(ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr,
+ tcp_hdr(skb)->dest,
+ tcp_hdr(skb)->source, tsoff);
}
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -198,10 +198,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
tp->write_seq = 0;
}
- if (tcp_death_row->sysctl_tw_recycle &&
- !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
- tcp_fetch_timewait_stamp(sk, &rt->dst);
-
inet->inet_dport = usin->sin_port;
sk_daddr_set(sk, daddr);
@@ -236,11 +232,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
rt = NULL;
if (likely(!tp->repair)) {
- seq = secure_tcp_sequence_number(inet->inet_saddr,
- inet->inet_daddr,
- inet->inet_sport,
- usin->sin_port,
- &tp->tsoffset);
+ seq = secure_tcp_seq_and_tsoff(inet->inet_saddr,
+ inet->inet_daddr,
+ inet->inet_sport,
+ usin->sin_port,
+ &tp->tsoffset);
if (!tp->write_seq)
tp->write_seq = seq;
}
@@ -1217,19 +1213,9 @@ static void tcp_v4_init_req(struct request_sock *req,
static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
struct flowi *fl,
- const struct request_sock *req,
- bool *strict)
+ const struct request_sock *req)
{
- struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
-
- if (strict) {
- if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
- *strict = true;
- else
- *strict = false;
- }
-
- return dst;
+ return inet_csk_route_req(sk, &fl->u.ip4, req);
}
struct request_sock_ops tcp_request_sock_ops __read_mostly = {
@@ -1253,7 +1239,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
.cookie_init_seq = cookie_v4_init_sequence,
#endif
.route_req = tcp_v4_route_req,
- .init_seq = tcp_v4_init_sequence,
+ .init_seq_tsoff = tcp_v4_init_seq_and_tsoff,
.send_synack = tcp_v4_send_synack,
};
@@ -2466,7 +2452,6 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_tw_reuse = 0;
cnt = tcp_hashinfo.ehash_mask + 1;
- net->ipv4.tcp_death_row.sysctl_tw_recycle = 0;
net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2;
net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 0f46e5fe31ad..9d0d4f39e42b 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -45,8 +45,6 @@ struct tcp_metrics_block {
struct inetpeer_addr tcpm_saddr;
struct inetpeer_addr tcpm_daddr;
unsigned long tcpm_stamp;
- u32 tcpm_ts;
- u32 tcpm_ts_stamp;
u32 tcpm_lock;
u32 tcpm_vals[TCP_METRIC_MAX_KERNEL + 1];
struct tcp_fastopen_metrics tcpm_fastopen;
@@ -123,8 +121,6 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
- tm->tcpm_ts = 0;
- tm->tcpm_ts_stamp = 0;
if (fastopen_clear) {
tm->tcpm_fastopen.mss = 0;
tm->tcpm_fastopen.syn_loss = 0;
@@ -273,48 +269,6 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
return tm;
}
-static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw)
-{
- struct tcp_metrics_block *tm;
- struct inetpeer_addr saddr, daddr;
- unsigned int hash;
- struct net *net;
-
- if (tw->tw_family == AF_INET) {
- inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr);
- inetpeer_set_addr_v4(&daddr, tw->tw_daddr);
- hash = ipv4_addr_hash(tw->tw_daddr);
- }
-#if IS_ENABLED(CONFIG_IPV6)
- else if (tw->tw_family == AF_INET6) {
- if (ipv6_addr_v4mapped(&tw->tw_v6_daddr)) {
- inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr);
- inetpeer_set_addr_v4(&daddr, tw->tw_daddr);
- hash = ipv4_addr_hash(tw->tw_daddr);
- } else {
- inetpeer_set_addr_v6(&saddr, &tw->tw_v6_rcv_saddr);
- inetpeer_set_addr_v6(&daddr, &tw->tw_v6_daddr);
- hash = ipv6_addr_hash(&tw->tw_v6_daddr);
- }
- }
-#endif
- else
- return NULL;
-
- net = twsk_net(tw);
- hash ^= net_hash_mix(net);
- hash = hash_32(hash, tcp_metrics_hash_log);
-
- for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm;
- tm = rcu_dereference(tm->tcpm_next)) {
- if (addr_same(&tm->tcpm_saddr, &saddr) &&
- addr_same(&tm->tcpm_daddr, &daddr) &&
- net_eq(tm_net(tm), net))
- break;
- }
- return tm;
-}
-
static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
struct dst_entry *dst,
bool create)
@@ -573,8 +527,7 @@ reset:
tp->snd_cwnd_stamp = tcp_time_stamp;
}
-bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
- bool paws_check, bool timestamps)
+bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
{
struct tcp_metrics_block *tm;
bool ret;
@@ -584,94 +537,10 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
rcu_read_lock();
tm = __tcp_get_metrics_req(req, dst);
- if (paws_check) {
- if (tm &&
- (u32)get_seconds() - tm->tcpm_ts_stamp < TCP_PAWS_MSL &&
- ((s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW ||
- !timestamps))
- ret = false;
- else
- ret = true;
- } else {
- if (tm && tcp_metric_get(tm, TCP_METRIC_RTT) && tm->tcpm_ts_stamp)
- ret = true;
- else
- ret = false;
- }
- rcu_read_unlock();
-
- return ret;
-}
-
-void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst)
-{
- struct tcp_metrics_block *tm;
-
- rcu_read_lock();
- tm = tcp_get_metrics(sk, dst, true);
- if (tm) {
- struct tcp_sock *tp = tcp_sk(sk);
-
- if ((u32)get_seconds() - tm->tcpm_ts_stamp <= TCP_PAWS_MSL) {
- tp->rx_opt.ts_recent_stamp = tm->tcpm_ts_stamp;
- tp->rx_opt.ts_recent = tm->tcpm_ts;
- }
- }
- rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(tcp_fetch_timewait_stamp);
-
-/* VJ's idea. Save last timestamp seen from this destination and hold
- * it at least for normal timewait interval to use for duplicate
- * segment detection in subsequent connections, before they enter
- * synchronized state.
- */
-bool tcp_remember_stamp(struct sock *sk)
-{
- struct dst_entry *dst = __sk_dst_get(sk);
- bool ret = false;
-
- if (dst) {
- struct tcp_metrics_block *tm;
-
- rcu_read_lock();
- tm = tcp_get_metrics(sk, dst, true);
- if (tm) {
- struct tcp_sock *tp = tcp_sk(sk);
-
- if ((s32)(tm->tcpm_ts - tp->rx_opt.ts_recent) <= 0 ||
- ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
- tm->tcpm_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
- tm->tcpm_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
- tm->tcpm_ts = tp->rx_opt.ts_recent;
- }
- ret = true;
- }
- rcu_read_unlock();
- }
- return ret;
-}
-
-bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw)
-{
- struct tcp_metrics_block *tm;
- bool ret = false;
-
- rcu_read_lock();
- tm = __tcp_get_metrics_tw(tw);
- if (tm) {
- const struct tcp_timewait_sock *tcptw;
- struct sock *sk = (struct sock *) tw;
-
- tcptw = tcp_twsk(sk);
- if ((s32)(tm->tcpm_ts - tcptw->tw_ts_recent) <= 0 ||
- ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
- tm->tcpm_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
- tm->tcpm_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
- tm->tcpm_ts = tcptw->tw_ts_recent;
- }
+ if (tm && tcp_metric_get(tm, TCP_METRIC_RTT))
ret = true;
- }
+ else
+ ret = false;
rcu_read_unlock();
return ret;
@@ -791,14 +660,6 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
jiffies - tm->tcpm_stamp,
TCP_METRICS_ATTR_PAD) < 0)
goto nla_put_failure;
- if (tm->tcpm_ts_stamp) {
- if (nla_put_s32(msg, TCP_METRICS_ATTR_TW_TS_STAMP,
- (s32) (get_seconds() - tm->tcpm_ts_stamp)) < 0)
- goto nla_put_failure;
- if (nla_put_u32(msg, TCP_METRICS_ATTR_TW_TSVAL,
- tm->tcpm_ts) < 0)
- goto nla_put_failure;
- }
{
int n = 0;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 65c0f3d13eca..1e217948be62 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -94,7 +94,6 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
struct tcp_options_received tmp_opt;
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
bool paws_reject = false;
- struct inet_timewait_death_row *tcp_death_row = &sock_net((struct sock*)tw)->ipv4.tcp_death_row;
tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
@@ -149,12 +148,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
}
- if (tcp_death_row->sysctl_tw_recycle &&
- tcptw->tw_ts_recent_stamp &&
- tcp_tw_remember_stamp(tw))
- inet_twsk_reschedule(tw, tw->tw_timeout);
- else
- inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
+ inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
return TCP_TW_ACK;
}
@@ -259,12 +253,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
struct inet_timewait_sock *tw;
- bool recycle_ok = false;
struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
- if (tcp_death_row->sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
- recycle_ok = tcp_remember_stamp(sk);
-
tw = inet_twsk_alloc(sk, tcp_death_row, state);
if (tw) {
@@ -317,13 +307,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
if (timeo < rto)
timeo = rto;
- if (recycle_ok) {
- tw->tw_timeout = rto;
- } else {
- tw->tw_timeout = TCP_TIMEWAIT_LEN;
- if (state == TCP_TIME_WAIT)
- timeo = TCP_TIMEWAIT_LEN;
- }
+ tw->tw_timeout = TCP_TIMEWAIT_LEN;
+ if (state == TCP_TIME_WAIT)
+ timeo = TCP_TIMEWAIT_LEN;
inet_twsk_schedule(tw, timeo);
/* Linkage updates. */
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index fed66dc0e0f5..9775453b8d17 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -265,8 +265,8 @@ static size_t tcp_westwood_info(struct sock *sk, u32 ext, int *attr,
if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
info->vegas.tcpv_enabled = 1;
info->vegas.tcpv_rttcnt = 0;
- info->vegas.tcpv_rtt = jiffies_to_usecs(ca->rtt),
- info->vegas.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min),
+ info->vegas.tcpv_rtt = jiffies_to_usecs(ca->rtt);
+ info->vegas.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min);
*attr = INET_DIAG_VEGASINFO;
return sizeof(struct tcpvegas_info);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 363172527e43..dff5beb26a01 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -224,6 +224,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.accept_ra_rtr_pref = 1,
.rtr_probe_interval = 60 * HZ,
#ifdef CONFIG_IPV6_ROUTE_INFO
+ .accept_ra_rt_info_min_plen = 0,
.accept_ra_rt_info_max_plen = 0,
#endif
#endif
@@ -245,6 +246,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
#endif
.enhanced_dad = 1,
.addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64,
+ .disable_policy = 0,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -276,6 +278,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.accept_ra_rtr_pref = 1,
.rtr_probe_interval = 60 * HZ,
#ifdef CONFIG_IPV6_ROUTE_INFO
+ .accept_ra_rt_info_min_plen = 0,
.accept_ra_rt_info_max_plen = 0,
#endif
#endif
@@ -297,6 +300,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
#endif
.enhanced_dad = 1,
.addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64,
+ .disable_policy = 0,
};
/* Check if a valid qdisc is available */
@@ -944,6 +948,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
const struct in6_addr *peer_addr, int pfxlen,
int scope, u32 flags, u32 valid_lft, u32 prefered_lft)
{
+ struct net *net = dev_net(idev->dev);
struct inet6_ifaddr *ifa = NULL;
struct rt6_info *rt;
unsigned int hash;
@@ -990,6 +995,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
goto out;
}
+ if (net->ipv6.devconf_all->disable_policy ||
+ idev->cnf.disable_policy)
+ rt->dst.flags |= DST_NOPOLICY;
+
neigh_parms_data_state_setall(idev->nd_parms);
ifa->addr = *addr;
@@ -4972,6 +4981,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_RTR_PROBE_INTERVAL] =
jiffies_to_msecs(cnf->rtr_probe_interval);
#ifdef CONFIG_IPV6_ROUTE_INFO
+ array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] = cnf->accept_ra_rt_info_min_plen;
array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
#endif
#endif
@@ -5003,6 +5013,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
#endif
array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad;
array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode;
+ array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy;
}
static inline size_t inet6_ifla6_size(void)
@@ -5827,6 +5838,105 @@ int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl,
return ret;
}
+static
+void addrconf_set_nopolicy(struct rt6_info *rt, int action)
+{
+ if (rt) {
+ if (action)
+ rt->dst.flags |= DST_NOPOLICY;
+ else
+ rt->dst.flags &= ~DST_NOPOLICY;
+ }
+}
+
+static
+void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
+{
+ struct inet6_ifaddr *ifa;
+
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ spin_lock(&ifa->lock);
+ if (ifa->rt) {
+ struct rt6_info *rt = ifa->rt;
+ struct fib6_table *table = rt->rt6i_table;
+ int cpu;
+
+ read_lock(&table->tb6_lock);
+ addrconf_set_nopolicy(ifa->rt, val);
+ if (rt->rt6i_pcpu) {
+ for_each_possible_cpu(cpu) {
+ struct rt6_info **rtp;
+
+ rtp = per_cpu_ptr(rt->rt6i_pcpu, cpu);
+ addrconf_set_nopolicy(*rtp, val);
+ }
+ }
+ read_unlock(&table->tb6_lock);
+ }
+ spin_unlock(&ifa->lock);
+ }
+ read_unlock_bh(&idev->lock);
+}
+
+static
+int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val)
+{
+ struct inet6_dev *idev;
+ struct net *net;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ *valp = val;
+
+ net = (struct net *)ctl->extra2;
+ if (valp == &net->ipv6.devconf_dflt->disable_policy) {
+ rtnl_unlock();
+ return 0;
+ }
+
+ if (valp == &net->ipv6.devconf_all->disable_policy) {
+ struct net_device *dev;
+
+ for_each_netdev(net, dev) {
+ idev = __in6_dev_get(dev);
+ if (idev)
+ addrconf_disable_policy_idev(idev, val);
+ }
+ } else {
+ idev = (struct inet6_dev *)ctl->extra1;
+ addrconf_disable_policy_idev(idev, val);
+ }
+
+ rtnl_unlock();
+ return 0;
+}
+
+static
+int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int *valp = ctl->data;
+ int val = *valp;
+ loff_t pos = *ppos;
+ struct ctl_table lctl;
+ int ret;
+
+ lctl = *ctl;
+ lctl.data = &val;
+ ret = proc_dointvec(&lctl, write, buffer, lenp, ppos);
+
+ if (write && (*valp != val))
+ ret = addrconf_disable_policy(ctl, valp, val);
+
+ if (ret)
+ *ppos = pos;
+
+ return ret;
+}
+
static int minus_one = -1;
static const int one = 1;
static const int two_five_five = 255;
@@ -6015,6 +6125,13 @@ static const struct ctl_table addrconf_sysctl[] = {
},
#ifdef CONFIG_IPV6_ROUTE_INFO
{
+ .procname = "accept_ra_rt_info_min_plen",
+ .data = &ipv6_devconf.accept_ra_rt_info_min_plen,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "accept_ra_rt_info_max_plen",
.data = &ipv6_devconf.accept_ra_rt_info_max_plen,
.maxlen = sizeof(int),
@@ -6185,6 +6302,13 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = addrconf_sysctl_addr_gen_mode,
},
{
+ .procname = "disable_policy",
+ .data = &ipv6_devconf.disable_policy,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = addrconf_sysctl_disable_policy,
+ },
+ {
/* sentinel */
}
};
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 7ebac630d3c6..b5812b3f7539 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -732,7 +732,7 @@ void ndisc_update(const struct net_device *dev, struct neighbour *neigh,
const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type,
struct ndisc_options *ndopts)
{
- neigh_update(neigh, lladdr, new, flags);
+ neigh_update(neigh, lladdr, new, flags, 0);
/* report ndisc ops about neighbour update */
ndisc_ops_update(dev, neigh, flags, icmp6_type, ndopts);
}
@@ -1418,6 +1418,8 @@ skip_linkparms:
if (ri->prefix_len == 0 &&
!in6_dev->cnf.accept_ra_defrtr)
continue;
+ if (ri->prefix_len < in6_dev->cnf.accept_ra_rt_info_min_plen)
+ continue;
if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
continue;
rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3,
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 765facf03d45..e8d88d82636b 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -159,7 +159,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
- nft_fib_store_result(dest, priv->result, pkt,
+ nft_fib_store_result(dest, priv, pkt,
nft_in(pkt)->ifindex);
return;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 49fa2e8c3fa9..0f08d718a002 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -101,12 +101,12 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
}
}
-static u32 tcp_v6_init_sequence(const struct sk_buff *skb, u32 *tsoff)
+static u32 tcp_v6_init_seq_and_tsoff(const struct sk_buff *skb, u32 *tsoff)
{
- return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
- ipv6_hdr(skb)->saddr.s6_addr32,
- tcp_hdr(skb)->dest,
- tcp_hdr(skb)->source, tsoff);
+ return secure_tcpv6_seq_and_tsoff(ipv6_hdr(skb)->daddr.s6_addr32,
+ ipv6_hdr(skb)->saddr.s6_addr32,
+ tcp_hdr(skb)->dest,
+ tcp_hdr(skb)->source, tsoff);
}
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
@@ -265,11 +265,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sk->sk_gso_type = SKB_GSO_TCPV6;
ip6_dst_store(sk, dst, NULL, NULL);
- if (tcp_death_row->sysctl_tw_recycle &&
- !tp->rx_opt.ts_recent_stamp &&
- ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr))
- tcp_fetch_timewait_stamp(sk, dst);
-
icsk->icsk_ext_hdr_len = 0;
if (opt)
icsk->icsk_ext_hdr_len = opt->opt_flen +
@@ -287,11 +282,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sk_set_txhash(sk);
if (likely(!tp->repair)) {
- seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
- sk->sk_v6_daddr.s6_addr32,
- inet->inet_sport,
- inet->inet_dport,
- &tp->tsoffset);
+ seq = secure_tcpv6_seq_and_tsoff(np->saddr.s6_addr32,
+ sk->sk_v6_daddr.s6_addr32,
+ inet->inet_sport,
+ inet->inet_dport,
+ &tp->tsoffset);
if (!tp->write_seq)
tp->write_seq = seq;
}
@@ -727,11 +722,8 @@ static void tcp_v6_init_req(struct request_sock *req,
static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
struct flowi *fl,
- const struct request_sock *req,
- bool *strict)
+ const struct request_sock *req)
{
- if (strict)
- *strict = true;
return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
}
@@ -757,7 +749,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
.cookie_init_seq = cookie_v6_init_sequence,
#endif
.route_req = tcp_v6_route_req,
- .init_seq = tcp_v6_init_sequence,
+ .init_seq_tsoff = tcp_v6_init_seq_and_tsoff,
.send_synack = tcp_v6_send_synack,
};
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e28082f0a307..b793ed1d2a36 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -864,6 +864,64 @@ discard:
return 0;
}
+static struct sock *__udp6_lib_demux_lookup(struct net *net,
+ __be16 loc_port, const struct in6_addr *loc_addr,
+ __be16 rmt_port, const struct in6_addr *rmt_addr,
+ int dif)
+{
+ struct sock *sk;
+
+ rcu_read_lock();
+ sk = __udp6_lib_lookup(net, rmt_addr, rmt_port, loc_addr, loc_port,
+ dif, &udp_table, NULL);
+ if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+ sk = NULL;
+ rcu_read_unlock();
+
+ return sk;
+}
+
+static void udp_v6_early_demux(struct sk_buff *skb)
+{
+ struct net *net = dev_net(skb->dev);
+ const struct udphdr *uh;
+ struct sock *sk;
+ struct dst_entry *dst;
+ int dif = skb->dev->ifindex;
+
+ if (!pskb_may_pull(skb, skb_transport_offset(skb) +
+ sizeof(struct udphdr)))
+ return;
+
+ uh = udp_hdr(skb);
+
+ if (skb->pkt_type == PACKET_HOST)
+ sk = __udp6_lib_demux_lookup(net, uh->dest,
+ &ipv6_hdr(skb)->daddr,
+ uh->source, &ipv6_hdr(skb)->saddr,
+ dif);
+ else
+ return;
+
+ if (!sk)
+ return;
+
+ skb->sk = sk;
+ skb->destructor = sock_efree;
+ dst = READ_ONCE(sk->sk_rx_dst);
+
+ if (dst)
+ dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
+ if (dst) {
+ if (dst->flags & DST_NOCACHE) {
+ if (likely(atomic_inc_not_zero(&dst->__refcnt)))
+ skb_dst_set(skb, dst);
+ } else {
+ skb_dst_set_noref(skb, dst);
+ }
+ }
+}
+
static __inline__ int udpv6_rcv(struct sk_buff *skb)
{
return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP);
@@ -1379,6 +1437,7 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
#endif
static const struct inet6_protocol udpv6_protocol = {
+ .early_demux = udp_v6_early_demux,
.handler = udpv6_rcv,
.err_handler = udpv6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 6414079aa729..82589b2abf3c 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -32,7 +32,9 @@
#define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1)
static int zero = 0;
+static int one = 1;
static int label_limit = (1 << 20) - 1;
+static int ttl_max = 255;
static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
struct nlmsghdr *nlh, struct net *net, u32 portid,
@@ -220,8 +222,8 @@ out:
return &rt->rt_nh[nh_index];
}
-static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
- struct mpls_entry_decoded dec)
+static bool mpls_egress(struct net *net, struct mpls_route *rt,
+ struct sk_buff *skb, struct mpls_entry_decoded dec)
{
enum mpls_payload_type payload_type;
bool success = false;
@@ -246,22 +248,46 @@ static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
switch (payload_type) {
case MPT_IPV4: {
struct iphdr *hdr4 = ip_hdr(skb);
+ u8 new_ttl;
skb->protocol = htons(ETH_P_IP);
+
+ /* If propagating TTL, take the decremented TTL from
+ * the incoming MPLS header, otherwise decrement the
+ * TTL, but only if not 0 to avoid underflow.
+ */
+ if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
+ (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+ net->mpls.ip_ttl_propagate))
+ new_ttl = dec.ttl;
+ else
+ new_ttl = hdr4->ttl ? hdr4->ttl - 1 : 0;
+
csum_replace2(&hdr4->check,
htons(hdr4->ttl << 8),
- htons(dec.ttl << 8));
- hdr4->ttl = dec.ttl;
+ htons(new_ttl << 8));
+ hdr4->ttl = new_ttl;
success = true;
break;
}
case MPT_IPV6: {
struct ipv6hdr *hdr6 = ipv6_hdr(skb);
skb->protocol = htons(ETH_P_IPV6);
- hdr6->hop_limit = dec.ttl;
+
+ /* If propagating TTL, take the decremented TTL from
+ * the incoming MPLS header, otherwise decrement the
+ * hop limit, but only if not 0 to avoid underflow.
+ */
+ if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
+ (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+ net->mpls.ip_ttl_propagate))
+ hdr6->hop_limit = dec.ttl;
+ else if (hdr6->hop_limit)
+ hdr6->hop_limit = hdr6->hop_limit - 1;
success = true;
break;
}
case MPT_UNSPEC:
+ /* Should have decided which protocol it is by now */
break;
}
@@ -361,7 +387,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
if (unlikely(!new_header_size && dec.bos)) {
/* Penultimate hop popping */
- if (!mpls_egress(rt, skb, dec))
+ if (!mpls_egress(dev_net(out_dev), rt, skb, dec))
goto err;
} else {
bool bos;
@@ -412,6 +438,7 @@ static struct packet_type mpls_packet_type __read_mostly = {
static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = {
[RTA_DST] = { .type = NLA_U32 },
[RTA_OIF] = { .type = NLA_U32 },
+ [RTA_TTL_PROPAGATE] = { .type = NLA_U8 },
};
struct mpls_route_config {
@@ -421,6 +448,7 @@ struct mpls_route_config {
u8 rc_via_alen;
u8 rc_via[MAX_VIA_ALEN];
u32 rc_label;
+ u8 rc_ttl_propagate;
u8 rc_output_labels;
u32 rc_output_label[MAX_NEW_LABELS];
u32 rc_nlflags;
@@ -856,6 +884,7 @@ static int mpls_route_add(struct mpls_route_config *cfg)
rt->rt_protocol = cfg->rc_protocol;
rt->rt_payload_type = cfg->rc_payload_type;
+ rt->rt_ttl_propagate = cfg->rc_ttl_propagate;
if (cfg->rc_mp)
err = mpls_nh_build_multi(cfg, rt);
@@ -1584,6 +1613,7 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->rc_label = LABEL_NOT_SPECIFIED;
cfg->rc_protocol = rtm->rtm_protocol;
cfg->rc_via_table = MPLS_NEIGH_TABLE_UNSPEC;
+ cfg->rc_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
cfg->rc_nlflags = nlh->nlmsg_flags;
cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid;
cfg->rc_nlinfo.nlh = nlh;
@@ -1630,6 +1660,17 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->rc_mp_len = nla_len(nla);
break;
}
+ case RTA_TTL_PROPAGATE:
+ {
+ u8 ttl_propagate = nla_get_u8(nla);
+
+ if (ttl_propagate > 1)
+ goto errout;
+ cfg->rc_ttl_propagate = ttl_propagate ?
+ MPLS_TTL_PROP_ENABLED :
+ MPLS_TTL_PROP_DISABLED;
+ break;
+ }
default:
/* Unsupported attribute */
goto errout;
@@ -1690,6 +1731,15 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
if (nla_put_labels(skb, RTA_DST, 1, &label))
goto nla_put_failure;
+
+ if (rt->rt_ttl_propagate != MPLS_TTL_PROP_DEFAULT) {
+ bool ttl_propagate =
+ rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED;
+
+ if (nla_put_u8(skb, RTA_TTL_PROPAGATE,
+ ttl_propagate))
+ goto nla_put_failure;
+ }
if (rt->rt_nhn == 1) {
const struct mpls_nh *nh = rt->rt_nh;
@@ -1800,7 +1850,8 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
{
size_t payload =
NLMSG_ALIGN(sizeof(struct rtmsg))
- + nla_total_size(4); /* RTA_DST */
+ + nla_total_size(4) /* RTA_DST */
+ + nla_total_size(1); /* RTA_TTL_PROPAGATE */
if (rt->rt_nhn == 1) {
struct mpls_nh *nh = rt->rt_nh;
@@ -1884,6 +1935,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo);
rt0->rt_protocol = RTPROT_KERNEL;
rt0->rt_payload_type = MPT_IPV4;
+ rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
rt0->rt_nh->nh_via_alen = lo->addr_len;
memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr,
@@ -1897,6 +1949,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo);
rt2->rt_protocol = RTPROT_KERNEL;
rt2->rt_payload_type = MPT_IPV6;
+ rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
rt2->rt_nh->nh_via_alen = lo->addr_len;
memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr,
@@ -1978,6 +2031,9 @@ static int mpls_platform_labels(struct ctl_table *table, int write,
return ret;
}
+#define MPLS_NS_SYSCTL_OFFSET(field) \
+ (&((struct net *)0)->field)
+
static const struct ctl_table mpls_table[] = {
{
.procname = "platform_labels",
@@ -1986,21 +2042,47 @@ static const struct ctl_table mpls_table[] = {
.mode = 0644,
.proc_handler = mpls_platform_labels,
},
+ {
+ .procname = "ip_ttl_propagate",
+ .data = MPLS_NS_SYSCTL_OFFSET(mpls.ip_ttl_propagate),
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
+ .procname = "default_ttl",
+ .data = MPLS_NS_SYSCTL_OFFSET(mpls.default_ttl),
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ .extra2 = &ttl_max,
+ },
{ }
};
static int mpls_net_init(struct net *net)
{
struct ctl_table *table;
+ int i;
net->mpls.platform_labels = 0;
net->mpls.platform_label = NULL;
+ net->mpls.ip_ttl_propagate = 1;
+ net->mpls.default_ttl = 255;
table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL);
if (table == NULL)
return -ENOMEM;
- table[0].data = net;
+ /* Table data contains only offsets relative to the base of
+ * the mdev at this point, so make them absolute.
+ */
+ for (i = 0; i < ARRAY_SIZE(mpls_table) - 1; i++)
+ table[i].data = (char *)net + (uintptr_t)table[i].data;
+
net->mpls.ctl = register_net_sysctl(net, "net/mpls", table);
if (net->mpls.ctl == NULL) {
kfree(table);
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index 76360d8b9579..62928d8fabd1 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -90,6 +90,12 @@ struct mpls_nh { /* next hop label forwarding entry */
u8 nh_via_table;
};
+enum mpls_ttl_propagation {
+ MPLS_TTL_PROP_DEFAULT,
+ MPLS_TTL_PROP_ENABLED,
+ MPLS_TTL_PROP_DISABLED,
+};
+
/* The route, nexthops and vias are stored together in the same memory
* block:
*
@@ -116,6 +122,7 @@ struct mpls_route { /* next hop label forwarding entry */
u8 rt_protocol;
u8 rt_payload_type;
u8 rt_max_alen;
+ u8 rt_ttl_propagate;
unsigned int rt_nhn;
unsigned int rt_nhn_alive;
struct mpls_nh rt_nh[0];
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index e4e4424f9eb1..22f71fce0bfb 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -29,6 +29,7 @@
static const struct nla_policy mpls_iptunnel_policy[MPLS_IPTUNNEL_MAX + 1] = {
[MPLS_IPTUNNEL_DST] = { .type = NLA_U32 },
+ [MPLS_IPTUNNEL_TTL] = { .type = NLA_U8 },
};
static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en)
@@ -49,6 +50,7 @@ static int mpls_xmit(struct sk_buff *skb)
struct rtable *rt = NULL;
struct rt6_info *rt6 = NULL;
struct mpls_dev *out_mdev;
+ struct net *net;
int err = 0;
bool bos;
int i;
@@ -56,17 +58,7 @@ static int mpls_xmit(struct sk_buff *skb)
/* Find the output device */
out_dev = dst->dev;
-
- /* Obtain the ttl */
- if (dst->ops->family == AF_INET) {
- ttl = ip_hdr(skb)->ttl;
- rt = (struct rtable *)dst;
- } else if (dst->ops->family == AF_INET6) {
- ttl = ipv6_hdr(skb)->hop_limit;
- rt6 = (struct rt6_info *)dst;
- } else {
- goto drop;
- }
+ net = dev_net(out_dev);
skb_orphan(skb);
@@ -78,6 +70,38 @@ static int mpls_xmit(struct sk_buff *skb)
tun_encap_info = mpls_lwtunnel_encap(dst->lwtstate);
+ /* Obtain the ttl using the following set of rules.
+ *
+ * LWT ttl propagation setting:
+ * - disabled => use default TTL value from LWT
+ * - enabled => use TTL value from IPv4/IPv6 header
+ * - default =>
+ * Global ttl propagation setting:
+ * - disabled => use default TTL value from global setting
+ * - enabled => use TTL value from IPv4/IPv6 header
+ */
+ if (dst->ops->family == AF_INET) {
+ if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
+ ttl = tun_encap_info->default_ttl;
+ else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+ !net->mpls.ip_ttl_propagate)
+ ttl = net->mpls.default_ttl;
+ else
+ ttl = ip_hdr(skb)->ttl;
+ rt = (struct rtable *)dst;
+ } else if (dst->ops->family == AF_INET6) {
+ if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
+ ttl = tun_encap_info->default_ttl;
+ else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+ !net->mpls.ip_ttl_propagate)
+ ttl = net->mpls.default_ttl;
+ else
+ ttl = ipv6_hdr(skb)->hop_limit;
+ rt6 = (struct rt6_info *)dst;
+ } else {
+ goto drop;
+ }
+
/* Verify the destination can hold the packet */
new_header_size = mpls_encap_size(tun_encap_info);
mtu = mpls_dev_mtu(out_dev);
@@ -160,6 +184,17 @@ static int mpls_build_state(struct nlattr *nla,
&tun_encap_info->labels, tun_encap_info->label);
if (ret)
goto errout;
+
+ tun_encap_info->ttl_propagate = MPLS_TTL_PROP_DEFAULT;
+
+ if (tb[MPLS_IPTUNNEL_TTL]) {
+ tun_encap_info->default_ttl = nla_get_u8(tb[MPLS_IPTUNNEL_TTL]);
+ /* TTL 0 implies propagate from IP header */
+ tun_encap_info->ttl_propagate = tun_encap_info->default_ttl ?
+ MPLS_TTL_PROP_DISABLED :
+ MPLS_TTL_PROP_ENABLED;
+ }
+
newts->type = LWTUNNEL_ENCAP_MPLS;
newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT;
newts->headroom = mpls_encap_size(tun_encap_info);
@@ -186,6 +221,10 @@ static int mpls_fill_encap_info(struct sk_buff *skb,
tun_encap_info->label))
goto nla_put_failure;
+ if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT &&
+ nla_put_u8(skb, MPLS_IPTUNNEL_TTL, tun_encap_info->default_ttl))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -195,10 +234,16 @@ nla_put_failure:
static int mpls_encap_nlsize(struct lwtunnel_state *lwtstate)
{
struct mpls_iptunnel_encap *tun_encap_info;
+ int nlsize;
tun_encap_info = mpls_lwtunnel_encap(lwtstate);
- return nla_total_size(tun_encap_info->labels * 4);
+ nlsize = nla_total_size(tun_encap_info->labels * 4);
+
+ if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT)
+ nlsize += nla_total_size(1);
+
+ return nlsize;
}
static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
@@ -207,7 +252,9 @@ static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
struct mpls_iptunnel_encap *b_hdr = mpls_lwtunnel_encap(b);
int l;
- if (a_hdr->labels != b_hdr->labels)
+ if (a_hdr->labels != b_hdr->labels ||
+ a_hdr->ttl_propagate != b_hdr->ttl_propagate ||
+ a_hdr->default_ttl != b_hdr->default_ttl)
return 1;
for (l = 0; l < MAX_NEW_LABELS; l++)
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index e6a2753dff9e..3d2ac71a83ec 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -181,7 +181,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
cp->flags |= IP_VS_CONN_F_HASHED;
- atomic_inc(&cp->refcnt);
+ refcount_inc(&cp->refcnt);
hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]);
ret = 1;
} else {
@@ -215,7 +215,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
if (cp->flags & IP_VS_CONN_F_HASHED) {
hlist_del_rcu(&cp->c_list);
cp->flags &= ~IP_VS_CONN_F_HASHED;
- atomic_dec(&cp->refcnt);
+ refcount_dec(&cp->refcnt);
ret = 1;
} else
ret = 0;
@@ -242,13 +242,13 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)
if (cp->flags & IP_VS_CONN_F_HASHED) {
ret = false;
/* Decrease refcnt and unlink conn only if we are last user */
- if (atomic_cmpxchg(&cp->refcnt, 1, 0) == 1) {
+ if (refcount_dec_if_one(&cp->refcnt)) {
hlist_del_rcu(&cp->c_list);
cp->flags &= ~IP_VS_CONN_F_HASHED;
ret = true;
}
} else
- ret = atomic_read(&cp->refcnt) ? false : true;
+ ret = refcount_read(&cp->refcnt) ? false : true;
spin_unlock(&cp->lock);
ct_write_unlock_bh(hash);
@@ -475,7 +475,7 @@ static void __ip_vs_conn_put_timer(struct ip_vs_conn *cp)
void ip_vs_conn_put(struct ip_vs_conn *cp)
{
if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) &&
- (atomic_read(&cp->refcnt) == 1) &&
+ (refcount_read(&cp->refcnt) == 1) &&
!timer_pending(&cp->timer))
/* expire connection immediately */
__ip_vs_conn_put_notimer(cp);
@@ -617,8 +617,8 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
- cp->flags, atomic_read(&cp->refcnt),
- atomic_read(&dest->refcnt));
+ cp->flags, refcount_read(&cp->refcnt),
+ refcount_read(&dest->refcnt));
/* Update the connection counters */
if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
@@ -714,8 +714,8 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
- cp->flags, atomic_read(&cp->refcnt),
- atomic_read(&dest->refcnt));
+ cp->flags, refcount_read(&cp->refcnt),
+ refcount_read(&dest->refcnt));
/* Update the connection counters */
if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
@@ -863,10 +863,10 @@ static void ip_vs_conn_expire(unsigned long data)
expire_later:
IP_VS_DBG(7, "delayed: conn->refcnt=%d conn->n_control=%d\n",
- atomic_read(&cp->refcnt),
+ refcount_read(&cp->refcnt),
atomic_read(&cp->n_control));
- atomic_inc(&cp->refcnt);
+ refcount_inc(&cp->refcnt);
cp->timeout = 60*HZ;
if (ipvs->sync_state & IP_VS_STATE_MASTER)
@@ -941,7 +941,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
* it in the table, so that other thread run ip_vs_random_dropentry
* but cannot drop this entry.
*/
- atomic_set(&cp->refcnt, 1);
+ refcount_set(&cp->refcnt, 1);
cp->control = NULL;
atomic_set(&cp->n_control, 0);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index db40050f8785..b4a746d0e39b 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -542,7 +542,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
- cp->flags, atomic_read(&cp->refcnt));
+ cp->flags, refcount_read(&cp->refcnt));
ip_vs_conn_stats(cp, svc);
return cp;
@@ -1193,7 +1193,7 @@ struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc,
IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
- cp->flags, atomic_read(&cp->refcnt));
+ cp->flags, refcount_read(&cp->refcnt));
LeaveFunction(12);
return cp;
}
@@ -2231,8 +2231,6 @@ static int __net_init __ip_vs_init(struct net *net)
if (ip_vs_sync_net_init(ipvs) < 0)
goto sync_fail;
- printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n",
- sizeof(struct netns_ipvs), ipvs->gen);
return 0;
/*
* Error handling
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 5aeb0dde6ccc..541aa7694775 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -699,7 +699,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
dest->vfwmark,
IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port),
- atomic_read(&dest->refcnt));
+ refcount_read(&dest->refcnt));
if (dest->af == dest_af &&
ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
dest->port == dport &&
@@ -934,7 +934,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
atomic_set(&dest->activeconns, 0);
atomic_set(&dest->inactconns, 0);
atomic_set(&dest->persistconns, 0);
- atomic_set(&dest->refcnt, 1);
+ refcount_set(&dest->refcnt, 1);
INIT_HLIST_NODE(&dest->d_list);
spin_lock_init(&dest->dst_lock);
@@ -998,7 +998,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
"dest->refcnt=%d, service %u/%s:%u\n",
IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport),
- atomic_read(&dest->refcnt),
+ refcount_read(&dest->refcnt),
dest->vfwmark,
IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
ntohs(dest->vport));
@@ -1074,7 +1074,7 @@ static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest,
spin_lock_bh(&ipvs->dest_trash_lock);
IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
- atomic_read(&dest->refcnt));
+ refcount_read(&dest->refcnt));
if (list_empty(&ipvs->dest_trash) && !cleanup)
mod_timer(&ipvs->dest_trash_timer,
jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
@@ -1157,7 +1157,7 @@ static void ip_vs_dest_trash_expire(unsigned long data)
spin_lock(&ipvs->dest_trash_lock);
list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
- if (atomic_read(&dest->refcnt) > 1)
+ if (refcount_read(&dest->refcnt) > 1)
continue;
if (dest->idle_start) {
if (time_before(now, dest->idle_start +
@@ -1545,7 +1545,7 @@ ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
dev->name,
IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port),
- atomic_read(&dest->refcnt));
+ refcount_read(&dest->refcnt));
__ip_vs_dst_cache_reset(dest);
}
spin_unlock_bh(&dest->dst_lock);
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 5824927cf8e0..b6aa4a970c6e 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -448,7 +448,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 703f11877bee..c13ff575f9f7 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -204,7 +204,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
}
@@ -249,7 +249,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
__func__,
IP_VS_DBG_ADDR(most->af, &most->addr), ntohs(most->port),
atomic_read(&most->activeconns),
- atomic_read(&most->refcnt),
+ refcount_read(&most->refcnt),
atomic_read(&most->weight), moh);
return most;
}
@@ -612,7 +612,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index a8b63401e773..7d9d4ac596ca 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -110,7 +110,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index d952d67f904d..56f8e4b204ff 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -447,7 +447,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
ntohs(cp->cport),
sctp_state_name(cp->state),
sctp_state_name(next_state),
- atomic_read(&cp->refcnt));
+ refcount_read(&cp->refcnt));
if (dest) {
if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
(next_state != IP_VS_SCTP_S_ESTABLISHED)) {
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 5117bcb7d2f0..12dc8d5bc37d 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -557,7 +557,7 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
ntohs(cp->cport),
tcp_state_name(cp->state),
tcp_state_name(new_state),
- atomic_read(&cp->refcnt));
+ refcount_read(&cp->refcnt));
if (dest) {
if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index 58bacfc461ee..ee0530d14c5f 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -97,7 +97,7 @@ stop:
"activeconns %d refcnt %d weight %d\n",
IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
atomic_read(&dest->activeconns),
- atomic_read(&dest->refcnt), atomic_read(&dest->weight));
+ refcount_read(&dest->refcnt), atomic_read(&dest->weight));
return dest;
}
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index f8e2d00f528b..ab23cf203437 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -111,7 +111,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index 6b366fd90554..6add39e0ec20 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -83,7 +83,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
- atomic_read(&least->refcnt),
+ refcount_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 17e6d4406ca7..62258dd457ac 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -218,7 +218,7 @@ found:
"activeconns %d refcnt %d weight %d\n",
IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
atomic_read(&dest->activeconns),
- atomic_read(&dest->refcnt),
+ refcount_read(&dest->refcnt),
atomic_read(&dest->weight));
mark->cl = dest;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index ffb78e5f7b70..3d621b8d7b8a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1133,7 +1133,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
/* Allocate a new conntrack: we return -ENOMEM if classification
failed due to stress. Otherwise it really is unclassifiable. */
-static struct nf_conntrack_tuple_hash *
+static noinline struct nf_conntrack_tuple_hash *
init_conntrack(struct net *net, struct nf_conn *tmpl,
const struct nf_conntrack_tuple *tuple,
struct nf_conntrack_l3proto *l3proto,
@@ -1241,21 +1241,20 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
return &ct->tuplehash[IP_CT_DIR_ORIGINAL];
}
-/* On success, returns conntrack ptr, sets skb->_nfct | ctinfo */
-static inline struct nf_conn *
+/* On success, returns 0, sets skb->_nfct | ctinfo */
+static int
resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
u_int16_t l3num,
u_int8_t protonum,
struct nf_conntrack_l3proto *l3proto,
- struct nf_conntrack_l4proto *l4proto,
- int *set_reply,
- enum ip_conntrack_info *ctinfo)
+ struct nf_conntrack_l4proto *l4proto)
{
const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_hash *h;
+ enum ip_conntrack_info ctinfo;
struct nf_conntrack_zone tmp;
struct nf_conn *ct;
u32 hash;
@@ -1264,7 +1263,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
dataoff, l3num, protonum, net, &tuple, l3proto,
l4proto)) {
pr_debug("Can't get tuple\n");
- return NULL;
+ return 0;
}
/* look for tuple match */
@@ -1275,33 +1274,30 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
skb, dataoff, hash);
if (!h)
- return NULL;
+ return 0;
if (IS_ERR(h))
- return (void *)h;
+ return PTR_ERR(h);
}
ct = nf_ct_tuplehash_to_ctrack(h);
/* It exists; we have (non-exclusive) reference. */
if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
- *ctinfo = IP_CT_ESTABLISHED_REPLY;
- /* Please set reply bit if this packet OK */
- *set_reply = 1;
+ ctinfo = IP_CT_ESTABLISHED_REPLY;
} else {
/* Once we've had two way comms, always ESTABLISHED. */
if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
pr_debug("normal packet for %p\n", ct);
- *ctinfo = IP_CT_ESTABLISHED;
+ ctinfo = IP_CT_ESTABLISHED;
} else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
pr_debug("related packet for %p\n", ct);
- *ctinfo = IP_CT_RELATED;
+ ctinfo = IP_CT_RELATED;
} else {
pr_debug("new packet for %p\n", ct);
- *ctinfo = IP_CT_NEW;
+ ctinfo = IP_CT_NEW;
}
- *set_reply = 0;
}
- nf_ct_set(skb, ct, *ctinfo);
- return ct;
+ nf_ct_set(skb, ct, ctinfo);
+ return 0;
}
unsigned int
@@ -1315,7 +1311,6 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
unsigned int *timeouts;
unsigned int dataoff;
u_int8_t protonum;
- int set_reply = 0;
int ret;
tmpl = nf_ct_get(skb, &ctinfo);
@@ -1358,23 +1353,22 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
goto out;
}
repeat:
- ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
- l3proto, l4proto, &set_reply, &ctinfo);
- if (!ct) {
- /* Not valid part of a connection */
- NF_CT_STAT_INC_ATOMIC(net, invalid);
- ret = NF_ACCEPT;
- goto out;
- }
-
- if (IS_ERR(ct)) {
+ ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
+ l3proto, l4proto);
+ if (ret < 0) {
/* Too stressed to deal. */
NF_CT_STAT_INC_ATOMIC(net, drop);
ret = NF_DROP;
goto out;
}
- NF_CT_ASSERT(skb_nfct(skb));
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct) {
+ /* Not valid part of a connection */
+ NF_CT_STAT_INC_ATOMIC(net, invalid);
+ ret = NF_ACCEPT;
+ goto out;
+ }
/* Decide what timeout policy we want to apply to this flow. */
timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
@@ -1399,7 +1393,8 @@ repeat:
goto out;
}
- if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
+ if (ctinfo == IP_CT_ESTABLISHED_REPLY &&
+ !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_REPLY, ct);
out:
if (tmpl)
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 4b2e1fb28bb4..cb29e598605f 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -133,7 +133,7 @@ nf_ct_expect_find_get(struct net *net,
rcu_read_lock();
i = __nf_ct_expect_find(net, zone, tuple);
- if (i && !atomic_inc_not_zero(&i->use))
+ if (i && !refcount_inc_not_zero(&i->use))
i = NULL;
rcu_read_unlock();
@@ -186,7 +186,7 @@ nf_ct_find_expectation(struct net *net,
return NULL;
if (exp->flags & NF_CT_EXPECT_PERMANENT) {
- atomic_inc(&exp->use);
+ refcount_inc(&exp->use);
return exp;
} else if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp);
@@ -275,7 +275,7 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
return NULL;
new->master = me;
- atomic_set(&new->use, 1);
+ refcount_set(&new->use, 1);
return new;
}
EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
@@ -348,7 +348,7 @@ static void nf_ct_expect_free_rcu(struct rcu_head *head)
void nf_ct_expect_put(struct nf_conntrack_expect *exp)
{
- if (atomic_dec_and_test(&exp->use))
+ if (refcount_dec_and_test(&exp->use))
call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
}
EXPORT_SYMBOL_GPL(nf_ct_expect_put);
@@ -361,7 +361,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
unsigned int h = nf_ct_expect_dst_hash(net, &exp->tuple);
/* two references : one for hash insert, one for the timer */
- atomic_add(2, &exp->use);
+ refcount_add(2, &exp->use);
hlist_add_head(&exp->lnode, &master_help->expectations);
master_help->expecting[exp->class]++;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 6806b5e73567..d49cc1e03c5b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2693,7 +2693,7 @@ restart:
cb->nlh->nlmsg_seq,
IPCTNL_MSG_EXP_NEW,
exp) < 0) {
- if (!atomic_inc_not_zero(&exp->use))
+ if (!refcount_inc_not_zero(&exp->use))
continue;
cb->args[1] = (unsigned long)exp;
goto out;
@@ -2739,7 +2739,7 @@ restart:
cb->nlh->nlmsg_seq,
IPCTNL_MSG_EXP_NEW,
exp) < 0) {
- if (!atomic_inc_not_zero(&exp->use))
+ if (!refcount_inc_not_zero(&exp->use))
continue;
cb->args[1] = (unsigned long)exp;
goto out;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 434c739dfeca..2d822d2fd830 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1772,8 +1772,19 @@ static int nf_tables_newexpr(const struct nft_ctx *ctx,
goto err1;
}
+ if (ops->validate) {
+ const struct nft_data *data = NULL;
+
+ err = ops->validate(ctx, expr, &data);
+ if (err < 0)
+ goto err2;
+ }
+
return 0;
+err2:
+ if (ops->destroy)
+ ops->destroy(ctx, expr);
err1:
expr->ops = NULL;
return err;
@@ -2523,8 +2534,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
return 0;
}
-struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
- const struct nlattr *nla, u8 genmask)
+static struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
+ const struct nlattr *nla, u8 genmask)
{
struct nft_set *set;
@@ -2538,11 +2549,10 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
}
return ERR_PTR(-ENOENT);
}
-EXPORT_SYMBOL_GPL(nf_tables_set_lookup);
-struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
- const struct nlattr *nla,
- u8 genmask)
+static struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
+ const struct nlattr *nla,
+ u8 genmask)
{
struct nft_trans *trans;
u32 id = ntohl(nla_get_be32(nla));
@@ -2557,7 +2567,25 @@ struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
}
return ERR_PTR(-ENOENT);
}
-EXPORT_SYMBOL_GPL(nf_tables_set_lookup_byid);
+
+struct nft_set *nft_set_lookup(const struct net *net,
+ const struct nft_table *table,
+ const struct nlattr *nla_set_name,
+ const struct nlattr *nla_set_id,
+ u8 genmask)
+{
+ struct nft_set *set;
+
+ set = nf_tables_set_lookup(table, nla_set_name, genmask);
+ if (IS_ERR(set)) {
+ if (!nla_set_id)
+ return set;
+
+ set = nf_tables_set_lookup_byid(net, nla_set_id, genmask);
+ }
+ return set;
+}
+EXPORT_SYMBOL_GPL(nft_set_lookup);
static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
const char *name)
@@ -4064,7 +4092,8 @@ static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = {
[NFTA_OBJ_DATA] = { .type = NLA_NESTED },
};
-static struct nft_object *nft_obj_init(const struct nft_object_type *type,
+static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
+ const struct nft_object_type *type,
const struct nlattr *attr)
{
struct nlattr *tb[type->maxattr + 1];
@@ -4084,7 +4113,7 @@ static struct nft_object *nft_obj_init(const struct nft_object_type *type,
if (obj == NULL)
goto err1;
- err = type->init((const struct nlattr * const *)tb, obj);
+ err = type->init(ctx, (const struct nlattr * const *)tb, obj);
if (err < 0)
goto err2;
@@ -4192,7 +4221,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
if (IS_ERR(type))
return PTR_ERR(type);
- obj = nft_obj_init(type, nla[NFTA_OBJ_DATA]);
+ obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto err1;
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index d44d89b56127..c86da174a5fc 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/atomic.h>
+#include <linux/refcount.h>
#include <linux/netlink.h>
#include <linux/rculist.h>
#include <linux/slab.h>
@@ -32,7 +33,7 @@ struct nf_acct {
atomic64_t bytes;
unsigned long flags;
struct list_head head;
- atomic_t refcnt;
+ refcount_t refcnt;
char name[NFACCT_NAME_MAX];
struct rcu_head rcu_head;
char data[0];
@@ -123,7 +124,7 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl,
atomic64_set(&nfacct->pkts,
be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS])));
}
- atomic_set(&nfacct->refcnt, 1);
+ refcount_set(&nfacct->refcnt, 1);
list_add_tail_rcu(&nfacct->head, &net->nfnl_acct_list);
return 0;
}
@@ -166,7 +167,7 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
NFACCT_PAD) ||
nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes),
NFACCT_PAD) ||
- nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt))))
+ nla_put_be32(skb, NFACCT_USE, htonl(refcount_read(&acct->refcnt))))
goto nla_put_failure;
if (acct->flags & NFACCT_F_QUOTA) {
u64 *quota = (u64 *)acct->data;
@@ -329,7 +330,7 @@ static int nfnl_acct_try_del(struct nf_acct *cur)
/* We want to avoid races with nfnl_acct_put. So only when the current
* refcnt is 1, we decrease it to 0.
*/
- if (atomic_cmpxchg(&cur->refcnt, 1, 0) == 1) {
+ if (refcount_dec_if_one(&cur->refcnt)) {
/* We are protected by nfnl mutex. */
list_del_rcu(&cur->head);
kfree_rcu(cur, rcu_head);
@@ -413,7 +414,7 @@ struct nf_acct *nfnl_acct_find_get(struct net *net, const char *acct_name)
if (!try_module_get(THIS_MODULE))
goto err;
- if (!atomic_inc_not_zero(&cur->refcnt)) {
+ if (!refcount_inc_not_zero(&cur->refcnt)) {
module_put(THIS_MODULE);
goto err;
}
@@ -429,7 +430,7 @@ EXPORT_SYMBOL_GPL(nfnl_acct_find_get);
void nfnl_acct_put(struct nf_acct *acct)
{
- if (atomic_dec_and_test(&acct->refcnt))
+ if (refcount_dec_and_test(&acct->refcnt))
kfree_rcu(acct, rcu_head);
module_put(THIS_MODULE);
@@ -502,7 +503,7 @@ static void __net_exit nfnl_acct_net_exit(struct net *net)
list_for_each_entry_safe(cur, tmp, &net->nfnl_acct_list, head) {
list_del_rcu(&cur->head);
- if (atomic_dec_and_test(&cur->refcnt))
+ if (refcount_dec_and_test(&cur->refcnt))
kfree_rcu(cur, rcu_head);
}
}
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 139e0867e56e..baa75f3ab7e7 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -138,7 +138,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME]));
timeout->l3num = l3num;
timeout->l4proto = l4proto;
- atomic_set(&timeout->refcnt, 1);
+ refcount_set(&timeout->refcnt, 1);
list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list);
return 0;
@@ -172,7 +172,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->l3num)) ||
nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4proto->l4proto) ||
nla_put_be32(skb, CTA_TIMEOUT_USE,
- htonl(atomic_read(&timeout->refcnt))))
+ htonl(refcount_read(&timeout->refcnt))))
goto nla_put_failure;
if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
@@ -339,7 +339,7 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
/* We want to avoid races with ctnl_timeout_put. So only when the
* current refcnt is 1, we decrease it to 0.
*/
- if (atomic_cmpxchg(&timeout->refcnt, 1, 0) == 1) {
+ if (refcount_dec_if_one(&timeout->refcnt)) {
/* We are protected by nfnl mutex. */
list_del_rcu(&timeout->head);
nf_ct_l4proto_put(timeout->l4proto);
@@ -536,7 +536,7 @@ ctnl_timeout_find_get(struct net *net, const char *name)
if (!try_module_get(THIS_MODULE))
goto err;
- if (!atomic_inc_not_zero(&timeout->refcnt)) {
+ if (!refcount_inc_not_zero(&timeout->refcnt)) {
module_put(THIS_MODULE);
goto err;
}
@@ -550,7 +550,7 @@ err:
static void ctnl_timeout_put(struct ctnl_timeout *timeout)
{
- if (atomic_dec_and_test(&timeout->refcnt))
+ if (refcount_dec_and_test(&timeout->refcnt))
kfree_rcu(timeout, rcu_head);
module_put(THIS_MODULE);
@@ -601,7 +601,7 @@ static void __net_exit cttimeout_net_exit(struct net *net)
list_del_rcu(&cur->head);
nf_ct_l4proto_put(cur->l4proto);
- if (atomic_dec_and_test(&cur->refcnt))
+ if (refcount_dec_and_test(&cur->refcnt))
kfree_rcu(cur, rcu_head);
}
}
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 08247bf7d7b8..ecd857b75ffe 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -40,6 +40,8 @@
#include <net/netfilter/nfnetlink_log.h>
#include <linux/atomic.h>
+#include <linux/refcount.h>
+
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
#include "../bridge/br_private.h"
@@ -57,7 +59,7 @@
struct nfulnl_instance {
struct hlist_node hlist; /* global list of instances */
spinlock_t lock;
- atomic_t use; /* use count */
+ refcount_t use; /* use count */
unsigned int qlen; /* number of nlmsgs in skb */
struct sk_buff *skb; /* pre-allocatd skb */
@@ -115,7 +117,7 @@ __instance_lookup(struct nfnl_log_net *log, u_int16_t group_num)
static inline void
instance_get(struct nfulnl_instance *inst)
{
- atomic_inc(&inst->use);
+ refcount_inc(&inst->use);
}
static struct nfulnl_instance *
@@ -125,7 +127,7 @@ instance_lookup_get(struct nfnl_log_net *log, u_int16_t group_num)
rcu_read_lock_bh();
inst = __instance_lookup(log, group_num);
- if (inst && !atomic_inc_not_zero(&inst->use))
+ if (inst && !refcount_inc_not_zero(&inst->use))
inst = NULL;
rcu_read_unlock_bh();
@@ -145,7 +147,7 @@ static void nfulnl_instance_free_rcu(struct rcu_head *head)
static void
instance_put(struct nfulnl_instance *inst)
{
- if (inst && atomic_dec_and_test(&inst->use))
+ if (inst && refcount_dec_and_test(&inst->use))
call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu);
}
@@ -180,7 +182,7 @@ instance_create(struct net *net, u_int16_t group_num,
INIT_HLIST_NODE(&inst->hlist);
spin_lock_init(&inst->lock);
/* needs to be two, since we _put() after creation */
- atomic_set(&inst->use, 2);
+ refcount_set(&inst->use, 2);
setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst);
@@ -1031,7 +1033,7 @@ static int seq_show(struct seq_file *s, void *v)
inst->group_num,
inst->peer_portid, inst->qlen,
inst->copy_mode, inst->copy_range,
- inst->flushtimeout, atomic_read(&inst->use));
+ inst->flushtimeout, refcount_read(&inst->use));
return 0;
}
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index c21e7eb8dce0..fab6bf3f955e 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -230,10 +230,6 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
union nft_entry e = {};
int ret;
- ret = nft_compat_chain_validate_dependency(target->table, ctx->chain);
- if (ret < 0)
- goto err;
-
target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info);
if (ctx->nla[NFTA_RULE_COMPAT]) {
@@ -419,10 +415,6 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
union nft_entry e = {};
int ret;
- ret = nft_compat_chain_validate_dependency(match->table, ctx->chain);
- if (ret < 0)
- goto err;
-
match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info);
if (ctx->nla[NFTA_RULE_COMPAT]) {
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 7f8422213341..67a710ebde09 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -82,7 +82,8 @@ static int nft_counter_do_init(const struct nlattr * const tb[],
return 0;
}
-static int nft_counter_obj_init(const struct nlattr * const tb[],
+static int nft_counter_obj_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
struct nft_object *obj)
{
struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 0264258c46fe..640fe5a5865e 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -32,6 +32,12 @@ struct nft_ct {
};
};
+struct nft_ct_helper_obj {
+ struct nf_conntrack_helper *helper4;
+ struct nf_conntrack_helper *helper6;
+ u8 l4proto;
+};
+
#ifdef CONFIG_NF_CONNTRACK_ZONES
static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template);
static unsigned int nft_ct_pcpu_template_refcnt __read_mostly;
@@ -733,6 +739,162 @@ static struct nft_expr_type nft_notrack_type __read_mostly = {
.owner = THIS_MODULE,
};
+static int nft_ct_helper_obj_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
+ struct nft_object *obj)
+{
+ struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+ struct nf_conntrack_helper *help4, *help6;
+ char name[NF_CT_HELPER_NAME_LEN];
+ int family = ctx->afi->family;
+
+ if (!tb[NFTA_CT_HELPER_NAME] || !tb[NFTA_CT_HELPER_L4PROTO])
+ return -EINVAL;
+
+ priv->l4proto = nla_get_u8(tb[NFTA_CT_HELPER_L4PROTO]);
+ if (!priv->l4proto)
+ return -ENOENT;
+
+ nla_strlcpy(name, tb[NFTA_CT_HELPER_NAME], sizeof(name));
+
+ if (tb[NFTA_CT_HELPER_L3PROTO])
+ family = ntohs(nla_get_be16(tb[NFTA_CT_HELPER_L3PROTO]));
+
+ help4 = NULL;
+ help6 = NULL;
+
+ switch (family) {
+ case NFPROTO_IPV4:
+ if (ctx->afi->family == NFPROTO_IPV6)
+ return -EINVAL;
+
+ help4 = nf_conntrack_helper_try_module_get(name, family,
+ priv->l4proto);
+ break;
+ case NFPROTO_IPV6:
+ if (ctx->afi->family == NFPROTO_IPV4)
+ return -EINVAL;
+
+ help6 = nf_conntrack_helper_try_module_get(name, family,
+ priv->l4proto);
+ break;
+ case NFPROTO_NETDEV: /* fallthrough */
+ case NFPROTO_BRIDGE: /* same */
+ case NFPROTO_INET:
+ help4 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV4,
+ priv->l4proto);
+ help6 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV6,
+ priv->l4proto);
+ break;
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ /* && is intentional; only error if INET found neither ipv4 or ipv6 */
+ if (!help4 && !help6)
+ return -ENOENT;
+
+ priv->helper4 = help4;
+ priv->helper6 = help6;
+
+ return 0;
+}
+
+static void nft_ct_helper_obj_destroy(struct nft_object *obj)
+{
+ struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+
+ if (priv->helper4)
+ module_put(priv->helper4->me);
+ if (priv->helper6)
+ module_put(priv->helper6->me);
+}
+
+static void nft_ct_helper_obj_eval(struct nft_object *obj,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+ struct nf_conn *ct = (struct nf_conn *)skb_nfct(pkt->skb);
+ struct nf_conntrack_helper *to_assign = NULL;
+ struct nf_conn_help *help;
+
+ if (!ct ||
+ nf_ct_is_confirmed(ct) ||
+ nf_ct_is_template(ct) ||
+ priv->l4proto != nf_ct_protonum(ct))
+ return;
+
+ switch (nf_ct_l3num(ct)) {
+ case NFPROTO_IPV4:
+ to_assign = priv->helper4;
+ break;
+ case NFPROTO_IPV6:
+ to_assign = priv->helper6;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ if (!to_assign)
+ return;
+
+ if (test_bit(IPS_HELPER_BIT, &ct->status))
+ return;
+
+ help = nf_ct_helper_ext_add(ct, to_assign, GFP_ATOMIC);
+ if (help) {
+ rcu_assign_pointer(help->helper, to_assign);
+ set_bit(IPS_HELPER_BIT, &ct->status);
+ }
+}
+
+static int nft_ct_helper_obj_dump(struct sk_buff *skb,
+ struct nft_object *obj, bool reset)
+{
+ const struct nft_ct_helper_obj *priv = nft_obj_data(obj);
+ const struct nf_conntrack_helper *helper = priv->helper4;
+ u16 family;
+
+ if (nla_put_string(skb, NFTA_CT_HELPER_NAME, helper->name))
+ return -1;
+
+ if (nla_put_u8(skb, NFTA_CT_HELPER_L4PROTO, priv->l4proto))
+ return -1;
+
+ if (priv->helper4 && priv->helper6)
+ family = NFPROTO_INET;
+ else if (priv->helper6)
+ family = NFPROTO_IPV6;
+ else
+ family = NFPROTO_IPV4;
+
+ if (nla_put_be16(skb, NFTA_CT_HELPER_L3PROTO, htons(family)))
+ return -1;
+
+ return 0;
+}
+
+static const struct nla_policy nft_ct_helper_policy[NFTA_CT_HELPER_MAX + 1] = {
+ [NFTA_CT_HELPER_NAME] = { .type = NLA_STRING,
+ .len = NF_CT_HELPER_NAME_LEN - 1 },
+ [NFTA_CT_HELPER_L3PROTO] = { .type = NLA_U16 },
+ [NFTA_CT_HELPER_L4PROTO] = { .type = NLA_U8 },
+};
+
+static struct nft_object_type nft_ct_helper_obj __read_mostly = {
+ .type = NFT_OBJECT_CT_HELPER,
+ .size = sizeof(struct nft_ct_helper_obj),
+ .maxattr = NFTA_CT_HELPER_MAX,
+ .policy = nft_ct_helper_policy,
+ .eval = nft_ct_helper_obj_eval,
+ .init = nft_ct_helper_obj_init,
+ .destroy = nft_ct_helper_obj_destroy,
+ .dump = nft_ct_helper_obj_dump,
+ .owner = THIS_MODULE,
+};
+
static int __init nft_ct_module_init(void)
{
int err;
@@ -747,7 +909,14 @@ static int __init nft_ct_module_init(void)
if (err < 0)
goto err1;
+ err = nft_register_obj(&nft_ct_helper_obj);
+ if (err < 0)
+ goto err2;
+
return 0;
+
+err2:
+ nft_unregister_expr(&nft_notrack_type);
err1:
nft_unregister_expr(&nft_ct_type);
return err;
@@ -755,6 +924,7 @@ err1:
static void __exit nft_ct_module_exit(void)
{
+ nft_unregister_obj(&nft_ct_helper_obj);
nft_unregister_expr(&nft_notrack_type);
nft_unregister_expr(&nft_ct_type);
}
@@ -766,3 +936,4 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_EXPR("ct");
MODULE_ALIAS_NFT_EXPR("notrack");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_HELPER);
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 049ad2d9ee66..3948da380259 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -133,16 +133,10 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
priv->invert = true;
}
- set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME],
- genmask);
- if (IS_ERR(set)) {
- if (tb[NFTA_DYNSET_SET_ID])
- set = nf_tables_set_lookup_byid(ctx->net,
- tb[NFTA_DYNSET_SET_ID],
- genmask);
- if (IS_ERR(set))
- return PTR_ERR(set);
- }
+ set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_DYNSET_SET_NAME],
+ tb[NFTA_DYNSET_SET_ID], genmask);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
if (set->ops->update == NULL)
return -EOPNOTSUPP;
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index c308920b194c..d212a85d2f33 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -98,14 +98,21 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
goto err;
offset = i + priv->offset;
- dest[priv->len / NFT_REG32_SIZE] = 0;
- memcpy(dest, opt + offset, priv->len);
+ if (priv->flags & NFT_EXTHDR_F_PRESENT) {
+ *dest = 1;
+ } else {
+ dest[priv->len / NFT_REG32_SIZE] = 0;
+ memcpy(dest, opt + offset, priv->len);
+ }
return;
}
err:
- regs->verdict.code = NFT_BREAK;
+ if (priv->flags & NFT_EXTHDR_F_PRESENT)
+ *dest = 0;
+ else
+ regs->verdict.code = NFT_BREAK;
}
static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index 29a4906adc27..21df8cccea65 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -24,7 +24,8 @@ const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = {
EXPORT_SYMBOL(nft_fib_policy);
#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \
- NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)
+ NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF | \
+ NFTA_FIB_F_PRESENT)
int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nft_data **data)
@@ -112,7 +113,7 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
if (err < 0)
return err;
- return nft_fib_validate(ctx, expr, NULL);
+ return 0;
}
EXPORT_SYMBOL_GPL(nft_fib_init);
@@ -133,19 +134,22 @@ int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr)
}
EXPORT_SYMBOL_GPL(nft_fib_dump);
-void nft_fib_store_result(void *reg, enum nft_fib_result r,
+void nft_fib_store_result(void *reg, const struct nft_fib *priv,
const struct nft_pktinfo *pkt, int index)
{
struct net_device *dev;
u32 *dreg = reg;
- switch (r) {
+ switch (priv->result) {
case NFT_FIB_RESULT_OIF:
- *dreg = index;
+ *dreg = (priv->flags & NFTA_FIB_F_PRESENT) ? !!index : index;
break;
case NFT_FIB_RESULT_OIFNAME:
dev = dev_get_by_index_rcu(nft_net(pkt), index);
- strncpy(reg, dev ? dev->name : "", IFNAMSIZ);
+ if (priv->flags & NFTA_FIB_F_PRESENT)
+ *dreg = !!dev;
+ else
+ strncpy(reg, dev ? dev->name : "", IFNAMSIZ);
break;
default:
WARN_ON_ONCE(1);
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index eb2721af898d..a6a4633725bb 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -17,7 +17,7 @@
#include <net/netfilter/nf_tables_core.h>
#include <linux/jhash.h>
-struct nft_hash {
+struct nft_jhash {
enum nft_registers sreg:8;
enum nft_registers dreg:8;
u8 len;
@@ -26,11 +26,11 @@ struct nft_hash {
u32 offset;
};
-static void nft_hash_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+static void nft_jhash_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
- struct nft_hash *priv = nft_expr_priv(expr);
+ struct nft_jhash *priv = nft_expr_priv(expr);
const void *data = &regs->data[priv->sreg];
u32 h;
@@ -38,6 +38,25 @@ static void nft_hash_eval(const struct nft_expr *expr,
regs->data[priv->dreg] = h + priv->offset;
}
+struct nft_symhash {
+ enum nft_registers dreg:8;
+ u32 modulus;
+ u32 offset;
+};
+
+static void nft_symhash_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_symhash *priv = nft_expr_priv(expr);
+ struct sk_buff *skb = pkt->skb;
+ u32 h;
+
+ h = reciprocal_scale(__skb_get_hash_symmetric(skb), priv->modulus);
+
+ regs->data[priv->dreg] = h + priv->offset;
+}
+
static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
[NFTA_HASH_SREG] = { .type = NLA_U32 },
[NFTA_HASH_DREG] = { .type = NLA_U32 },
@@ -45,13 +64,14 @@ static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
[NFTA_HASH_MODULUS] = { .type = NLA_U32 },
[NFTA_HASH_SEED] = { .type = NLA_U32 },
[NFTA_HASH_OFFSET] = { .type = NLA_U32 },
+ [NFTA_HASH_TYPE] = { .type = NLA_U32 },
};
-static int nft_hash_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
+static int nft_jhash_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
- struct nft_hash *priv = nft_expr_priv(expr);
+ struct nft_jhash *priv = nft_expr_priv(expr);
u32 len;
int err;
@@ -92,10 +112,36 @@ static int nft_hash_init(const struct nft_ctx *ctx,
NFT_DATA_VALUE, sizeof(u32));
}
-static int nft_hash_dump(struct sk_buff *skb,
- const struct nft_expr *expr)
+static int nft_symhash_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
- const struct nft_hash *priv = nft_expr_priv(expr);
+ struct nft_symhash *priv = nft_expr_priv(expr);
+
+ if (!tb[NFTA_HASH_DREG] ||
+ !tb[NFTA_HASH_MODULUS])
+ return -EINVAL;
+
+ if (tb[NFTA_HASH_OFFSET])
+ priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET]));
+
+ priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
+
+ priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
+ if (priv->modulus <= 1)
+ return -ERANGE;
+
+ if (priv->offset + priv->modulus - 1 < priv->offset)
+ return -EOVERFLOW;
+
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, sizeof(u32));
+}
+
+static int nft_jhash_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
+{
+ const struct nft_jhash *priv = nft_expr_priv(expr);
if (nft_dump_register(skb, NFTA_HASH_SREG, priv->sreg))
goto nla_put_failure;
@@ -110,6 +156,28 @@ static int nft_hash_dump(struct sk_buff *skb,
if (priv->offset != 0)
if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset)))
goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HASH_TYPE, htonl(NFT_HASH_JENKINS)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static int nft_symhash_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
+{
+ const struct nft_symhash *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_HASH_DREG, priv->dreg))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus)))
+ goto nla_put_failure;
+ if (priv->offset != 0)
+ if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HASH_TYPE, htonl(NFT_HASH_SYM)))
+ goto nla_put_failure;
return 0;
nla_put_failure:
@@ -117,17 +185,46 @@ nla_put_failure:
}
static struct nft_expr_type nft_hash_type;
-static const struct nft_expr_ops nft_hash_ops = {
+static const struct nft_expr_ops nft_jhash_ops = {
.type = &nft_hash_type,
- .size = NFT_EXPR_SIZE(sizeof(struct nft_hash)),
- .eval = nft_hash_eval,
- .init = nft_hash_init,
- .dump = nft_hash_dump,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_jhash)),
+ .eval = nft_jhash_eval,
+ .init = nft_jhash_init,
+ .dump = nft_jhash_dump,
};
+static const struct nft_expr_ops nft_symhash_ops = {
+ .type = &nft_hash_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_symhash)),
+ .eval = nft_symhash_eval,
+ .init = nft_symhash_init,
+ .dump = nft_symhash_dump,
+};
+
+static const struct nft_expr_ops *
+nft_hash_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ u32 type;
+
+ if (!tb[NFTA_HASH_TYPE])
+ return &nft_jhash_ops;
+
+ type = ntohl(nla_get_be32(tb[NFTA_HASH_TYPE]));
+ switch (type) {
+ case NFT_HASH_SYM:
+ return &nft_symhash_ops;
+ case NFT_HASH_JENKINS:
+ return &nft_jhash_ops;
+ default:
+ break;
+ }
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
static struct nft_expr_type nft_hash_type __read_mostly = {
.name = "hash",
- .ops = &nft_hash_ops,
+ .select_ops = &nft_hash_select_ops,
.policy = nft_hash_policy,
.maxattr = NFTA_HASH_MAX,
.owner = THIS_MODULE,
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index c6baf412236d..18dd57a52651 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -17,9 +17,8 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
-static DEFINE_SPINLOCK(limit_lock);
-
struct nft_limit {
+ spinlock_t lock;
u64 last;
u64 tokens;
u64 tokens_max;
@@ -34,7 +33,7 @@ static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost)
u64 now, tokens;
s64 delta;
- spin_lock_bh(&limit_lock);
+ spin_lock_bh(&limit->lock);
now = ktime_get_ns();
tokens = limit->tokens + now - limit->last;
if (tokens > limit->tokens_max)
@@ -44,11 +43,11 @@ static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost)
delta = tokens - cost;
if (delta >= 0) {
limit->tokens = delta;
- spin_unlock_bh(&limit_lock);
+ spin_unlock_bh(&limit->lock);
return limit->invert;
}
limit->tokens = tokens;
- spin_unlock_bh(&limit_lock);
+ spin_unlock_bh(&limit->lock);
return !limit->invert;
}
@@ -86,6 +85,7 @@ static int nft_limit_init(struct nft_limit *limit,
limit->invert = true;
}
limit->last = ktime_get_ns();
+ spin_lock_init(&limit->lock);
return 0;
}
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index e21aea7e5ec8..475570e89ede 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -71,16 +71,10 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
tb[NFTA_LOOKUP_SREG] == NULL)
return -EINVAL;
- set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET], genmask);
- if (IS_ERR(set)) {
- if (tb[NFTA_LOOKUP_SET_ID]) {
- set = nf_tables_set_lookup_byid(ctx->net,
- tb[NFTA_LOOKUP_SET_ID],
- genmask);
- }
- if (IS_ERR(set))
- return PTR_ERR(set);
- }
+ set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_LOOKUP_SET],
+ tb[NFTA_LOOKUP_SET_ID], genmask);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
if (set->flags & NFT_SET_EVAL)
return -EOPNOTSUPP;
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 11ce016cd479..6ac03d4266c9 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -46,10 +46,6 @@ int nft_masq_init(const struct nft_ctx *ctx,
struct nft_masq *priv = nft_expr_priv(expr);
int err;
- err = nft_masq_validate(ctx, expr, NULL);
- if (err)
- return err;
-
if (tb[NFTA_MASQ_FLAGS]) {
priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS]));
if (priv->flags & ~NF_NAT_RANGE_MASK)
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 7b60e01f38ff..9563ce3c23aa 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -372,10 +372,6 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
- err = nft_meta_set_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
-
priv->sreg = nft_parse_register(tb[NFTA_META_SREG]);
err = nft_validate_register_load(priv->sreg, len);
if (err < 0)
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 439e0bd152a0..ed548d06b6dd 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -138,10 +138,6 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return -EINVAL;
}
- err = nft_nat_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
-
if (tb[NFTA_NAT_FAMILY] == NULL)
return -EINVAL;
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 1ae8c49ca4a1..1dd428fbaaa3 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -116,16 +116,10 @@ static int nft_objref_map_init(const struct nft_ctx *ctx,
struct nft_set *set;
int err;
- set = nf_tables_set_lookup(ctx->table, tb[NFTA_OBJREF_SET_NAME], genmask);
- if (IS_ERR(set)) {
- if (tb[NFTA_OBJREF_SET_ID]) {
- set = nf_tables_set_lookup_byid(ctx->net,
- tb[NFTA_OBJREF_SET_ID],
- genmask);
- }
- if (IS_ERR(set))
- return PTR_ERR(set);
- }
+ set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_OBJREF_SET_NAME],
+ tb[NFTA_OBJREF_SET_ID], genmask);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
if (!(set->flags & NFT_SET_OBJECT))
return -EINVAL;
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 2d6fe3559912..25e33159be57 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -99,7 +99,8 @@ static int nft_quota_do_init(const struct nlattr * const tb[],
return 0;
}
-static int nft_quota_obj_init(const struct nlattr * const tb[],
+static int nft_quota_obj_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
struct nft_object *obj)
{
struct nft_quota *priv = nft_obj_data(obj);
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 40dcd05146d5..1e66538bf0ff 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -47,10 +47,6 @@ int nft_redir_init(const struct nft_ctx *ctx,
unsigned int plen;
int err;
- err = nft_redir_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
-
plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
if (tb[NFTA_REDIR_REG_PROTO_MIN]) {
priv->sreg_proto_min =
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index c64de3f7379d..29f5bd2377b0 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -42,11 +42,6 @@ int nft_reject_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_reject *priv = nft_expr_priv(expr);
- int err;
-
- err = nft_reject_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
if (tb[NFTA_REJECT_TYPE] == NULL)
return -EINVAL;
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 9e90a02cb104..5a7fb5ff867d 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -66,11 +66,7 @@ static int nft_reject_inet_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_reject *priv = nft_expr_priv(expr);
- int icmp_code, err;
-
- err = nft_reject_validate(ctx, expr, NULL);
- if (err < 0)
- return err;
+ int icmp_code;
if (tb[NFTA_REJECT_TYPE] == NULL)
return -EINVAL;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 78dfbf9588b3..e97e2fb53f0a 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -18,9 +18,8 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
-static DEFINE_SPINLOCK(nft_rbtree_lock);
-
struct nft_rbtree {
+ rwlock_t lock;
struct rb_root root;
};
@@ -44,14 +43,14 @@ static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
- const struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_rbtree *priv = nft_set_priv(set);
const struct nft_rbtree_elem *rbe, *interval = NULL;
u8 genmask = nft_genmask_cur(net);
const struct rb_node *parent;
const void *this;
int d;
- spin_lock_bh(&nft_rbtree_lock);
+ read_lock_bh(&priv->lock);
parent = priv->root.rb_node;
while (parent != NULL) {
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
@@ -75,7 +74,7 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
}
if (nft_rbtree_interval_end(rbe))
goto out;
- spin_unlock_bh(&nft_rbtree_lock);
+ read_unlock_bh(&priv->lock);
*ext = &rbe->ext;
return true;
@@ -85,12 +84,12 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
!nft_rbtree_interval_end(interval)) {
- spin_unlock_bh(&nft_rbtree_lock);
+ read_unlock_bh(&priv->lock);
*ext = &interval->ext;
return true;
}
out:
- spin_unlock_bh(&nft_rbtree_lock);
+ read_unlock_bh(&priv->lock);
return false;
}
@@ -140,12 +139,13 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
struct nft_set_ext **ext)
{
+ struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe = elem->priv;
int err;
- spin_lock_bh(&nft_rbtree_lock);
+ write_lock_bh(&priv->lock);
err = __nft_rbtree_insert(net, set, rbe, ext);
- spin_unlock_bh(&nft_rbtree_lock);
+ write_unlock_bh(&priv->lock);
return err;
}
@@ -157,9 +157,9 @@ static void nft_rbtree_remove(const struct net *net,
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe = elem->priv;
- spin_lock_bh(&nft_rbtree_lock);
+ write_lock_bh(&priv->lock);
rb_erase(&rbe->node, &priv->root);
- spin_unlock_bh(&nft_rbtree_lock);
+ write_unlock_bh(&priv->lock);
}
static void nft_rbtree_activate(const struct net *net,
@@ -224,12 +224,12 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
struct nft_set *set,
struct nft_set_iter *iter)
{
- const struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe;
struct nft_set_elem elem;
struct rb_node *node;
- spin_lock_bh(&nft_rbtree_lock);
+ read_lock_bh(&priv->lock);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
rbe = rb_entry(node, struct nft_rbtree_elem, node);
@@ -242,13 +242,13 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
iter->err = iter->fn(ctx, set, iter, &elem);
if (iter->err < 0) {
- spin_unlock_bh(&nft_rbtree_lock);
+ read_unlock_bh(&priv->lock);
return;
}
cont:
iter->count++;
}
- spin_unlock_bh(&nft_rbtree_lock);
+ read_unlock_bh(&priv->lock);
}
static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
@@ -262,6 +262,7 @@ static int nft_rbtree_init(const struct nft_set *set,
{
struct nft_rbtree *priv = nft_set_priv(set);
+ rwlock_init(&priv->lock);
priv->root = RB_ROOT;
return 0;
}
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index dab962df1787..d27b5f1ea619 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -18,6 +18,7 @@
#include <linux/netfilter/xt_limit.h>
struct xt_limit_priv {
+ spinlock_t lock;
unsigned long prev;
uint32_t credit;
};
@@ -32,8 +33,6 @@ MODULE_ALIAS("ip6t_limit");
* see net/sched/sch_tbf.c in the linux source tree
*/
-static DEFINE_SPINLOCK(limit_lock);
-
/* Rusty: This is my (non-mathematically-inclined) understanding of
this algorithm. The `average rate' in jiffies becomes your initial
amount of credit `credit' and the most credit you can ever have
@@ -72,7 +71,7 @@ limit_mt(const struct sk_buff *skb, struct xt_action_param *par)
struct xt_limit_priv *priv = r->master;
unsigned long now = jiffies;
- spin_lock_bh(&limit_lock);
+ spin_lock_bh(&priv->lock);
priv->credit += (now - xchg(&priv->prev, now)) * CREDITS_PER_JIFFY;
if (priv->credit > r->credit_cap)
priv->credit = r->credit_cap;
@@ -80,11 +79,11 @@ limit_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (priv->credit >= r->cost) {
/* We're not limited. */
priv->credit -= r->cost;
- spin_unlock_bh(&limit_lock);
+ spin_unlock_bh(&priv->lock);
return true;
}
- spin_unlock_bh(&limit_lock);
+ spin_unlock_bh(&priv->lock);
return false;
}
@@ -126,6 +125,8 @@ static int limit_mt_check(const struct xt_mtchk_param *par)
r->credit_cap = priv->credit; /* Credits full. */
r->cost = user2credits(r->avg);
}
+ spin_lock_init(&priv->lock);
+
return 0;
}
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index c82301ce3fff..e4610676299b 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2014 Nicira, Inc.
+ * Copyright (c) 2007-2017 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -44,13 +44,10 @@
#include "conntrack.h"
#include "vport.h"
-static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
- struct sw_flow_key *key,
- const struct nlattr *attr, int len);
-
struct deferred_action {
struct sk_buff *skb;
const struct nlattr *actions;
+ int actions_len;
/* Store pkt_key clone when creating deferred action. */
struct sw_flow_key pkt_key;
@@ -82,14 +79,31 @@ struct action_fifo {
struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
};
-struct recirc_keys {
+struct action_flow_keys {
struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD];
};
static struct action_fifo __percpu *action_fifos;
-static struct recirc_keys __percpu *recirc_keys;
+static struct action_flow_keys __percpu *flow_keys;
static DEFINE_PER_CPU(int, exec_actions_level);
+/* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys'
+ * space. Return NULL if out of key spaces.
+ */
+static struct sw_flow_key *clone_key(const struct sw_flow_key *key_)
+{
+ struct action_flow_keys *keys = this_cpu_ptr(flow_keys);
+ int level = this_cpu_read(exec_actions_level);
+ struct sw_flow_key *key = NULL;
+
+ if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
+ key = &keys->key[level - 1];
+ *key = *key_;
+ }
+
+ return key;
+}
+
static void action_fifo_init(struct action_fifo *fifo)
{
fifo->head = 0;
@@ -119,8 +133,9 @@ static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
/* Return true if fifo is not full */
static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
- const struct sw_flow_key *key,
- const struct nlattr *attr)
+ const struct sw_flow_key *key,
+ const struct nlattr *actions,
+ const int actions_len)
{
struct action_fifo *fifo;
struct deferred_action *da;
@@ -129,7 +144,8 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
da = action_fifo_put(fifo);
if (da) {
da->skb = skb;
- da->actions = attr;
+ da->actions = actions;
+ da->actions_len = actions_len;
da->pkt_key = *key;
}
@@ -146,6 +162,12 @@ static bool is_flow_key_valid(const struct sw_flow_key *key)
return !(key->mac_proto & SW_FLOW_KEY_INVALID);
}
+static int clone_execute(struct datapath *dp, struct sk_buff *skb,
+ struct sw_flow_key *key,
+ u32 recirc_id,
+ const struct nlattr *actions, int len,
+ bool last, bool clone_flow_key);
+
static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr,
__be16 ethertype)
{
@@ -908,72 +930,35 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
return ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
}
+/* When 'last' is true, sample() should always consume the 'skb'.
+ * Otherwise, sample() should keep 'skb' intact regardless what
+ * actions are executed within sample().
+ */
static int sample(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key, const struct nlattr *attr,
- const struct nlattr *actions, int actions_len)
+ bool last)
{
- const struct nlattr *acts_list = NULL;
- const struct nlattr *a;
- int rem;
- u32 cutlen = 0;
-
- for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
- a = nla_next(a, &rem)) {
- u32 probability;
-
- switch (nla_type(a)) {
- case OVS_SAMPLE_ATTR_PROBABILITY:
- probability = nla_get_u32(a);
- if (!probability || prandom_u32() > probability)
- return 0;
- break;
-
- case OVS_SAMPLE_ATTR_ACTIONS:
- acts_list = a;
- break;
- }
- }
-
- rem = nla_len(acts_list);
- a = nla_data(acts_list);
-
- /* Actions list is empty, do nothing */
- if (unlikely(!rem))
+ struct nlattr *actions;
+ struct nlattr *sample_arg;
+ int rem = nla_len(attr);
+ const struct sample_arg *arg;
+ bool clone_flow_key;
+
+ /* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
+ sample_arg = nla_data(attr);
+ arg = nla_data(sample_arg);
+ actions = nla_next(sample_arg, &rem);
+
+ if ((arg->probability != U32_MAX) &&
+ (!arg->probability || prandom_u32() > arg->probability)) {
+ if (last)
+ consume_skb(skb);
return 0;
-
- /* The only known usage of sample action is having a single user-space
- * action, or having a truncate action followed by a single user-space
- * action. Treat this usage as a special case.
- * The output_userspace() should clone the skb to be sent to the
- * user space. This skb will be consumed by its caller.
- */
- if (unlikely(nla_type(a) == OVS_ACTION_ATTR_TRUNC)) {
- struct ovs_action_trunc *trunc = nla_data(a);
-
- if (skb->len > trunc->max_len)
- cutlen = skb->len - trunc->max_len;
-
- a = nla_next(a, &rem);
}
- if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
- nla_is_last(a, rem)))
- return output_userspace(dp, skb, key, a, actions,
- actions_len, cutlen);
-
- skb = skb_clone(skb, GFP_ATOMIC);
- if (!skb)
- /* Skip the sample action when out of memory. */
- return 0;
-
- if (!add_deferred_actions(skb, key, a)) {
- if (net_ratelimit())
- pr_warn("%s: deferred actions limit reached, dropping sample action\n",
- ovs_dp_name(dp));
-
- kfree_skb(skb);
- }
- return 0;
+ clone_flow_key = !arg->exec;
+ return clone_execute(dp, skb, key, 0, actions, rem, last,
+ clone_flow_key);
}
static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
@@ -1084,10 +1069,9 @@ static int execute_masked_set_action(struct sk_buff *skb,
static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key,
- const struct nlattr *a, int rem)
+ const struct nlattr *a, bool last)
{
- struct deferred_action *da;
- int level;
+ u32 recirc_id;
if (!is_flow_key_valid(key)) {
int err;
@@ -1098,43 +1082,8 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
}
BUG_ON(!is_flow_key_valid(key));
- if (!nla_is_last(a, rem)) {
- /* Recirc action is the not the last action
- * of the action list, need to clone the skb.
- */
- skb = skb_clone(skb, GFP_ATOMIC);
-
- /* Skip the recirc action when out of memory, but
- * continue on with the rest of the action list.
- */
- if (!skb)
- return 0;
- }
-
- level = this_cpu_read(exec_actions_level);
- if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
- struct recirc_keys *rks = this_cpu_ptr(recirc_keys);
- struct sw_flow_key *recirc_key = &rks->key[level - 1];
-
- *recirc_key = *key;
- recirc_key->recirc_id = nla_get_u32(a);
- ovs_dp_process_packet(skb, recirc_key);
-
- return 0;
- }
-
- da = add_deferred_actions(skb, key, NULL);
- if (da) {
- da->pkt_key.recirc_id = nla_get_u32(a);
- } else {
- kfree_skb(skb);
-
- if (net_ratelimit())
- pr_warn("%s: deferred action limit reached, drop recirc action\n",
- ovs_dp_name(dp));
- }
-
- return 0;
+ recirc_id = nla_get_u32(a);
+ return clone_execute(dp, skb, key, recirc_id, NULL, 0, last, true);
}
/* Execute a list of actions against 'skb'. */
@@ -1206,9 +1155,11 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
err = pop_vlan(skb, key);
break;
- case OVS_ACTION_ATTR_RECIRC:
- err = execute_recirc(dp, skb, key, a, rem);
- if (nla_is_last(a, rem)) {
+ case OVS_ACTION_ATTR_RECIRC: {
+ bool last = nla_is_last(a, rem);
+
+ err = execute_recirc(dp, skb, key, a, last);
+ if (last) {
/* If this is the last action, the skb has
* been consumed or freed.
* Return immediately.
@@ -1216,6 +1167,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
return err;
}
break;
+ }
case OVS_ACTION_ATTR_SET:
err = execute_set_action(skb, key, nla_data(a));
@@ -1226,9 +1178,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
err = execute_masked_set_action(skb, key, nla_data(a));
break;
- case OVS_ACTION_ATTR_SAMPLE:
- err = sample(dp, skb, key, a, attr, len);
+ case OVS_ACTION_ATTR_SAMPLE: {
+ bool last = nla_is_last(a, rem);
+
+ err = sample(dp, skb, key, a, last);
+ if (last)
+ return err;
+
break;
+ }
case OVS_ACTION_ATTR_CT:
if (!is_flow_key_valid(key)) {
@@ -1264,6 +1222,79 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
return 0;
}
+/* Execute the actions on the clone of the packet. The effect of the
+ * execution does not affect the original 'skb' nor the original 'key'.
+ *
+ * The execution may be deferred in case the actions can not be executed
+ * immediately.
+ */
+static int clone_execute(struct datapath *dp, struct sk_buff *skb,
+ struct sw_flow_key *key, u32 recirc_id,
+ const struct nlattr *actions, int len,
+ bool last, bool clone_flow_key)
+{
+ struct deferred_action *da;
+ struct sw_flow_key *clone;
+
+ skb = last ? skb : skb_clone(skb, GFP_ATOMIC);
+ if (!skb) {
+ /* Out of memory, skip this action.
+ */
+ return 0;
+ }
+
+ /* When clone_flow_key is false, the 'key' will not be change
+ * by the actions, then the 'key' can be used directly.
+ * Otherwise, try to clone key from the next recursion level of
+ * 'flow_keys'. If clone is successful, execute the actions
+ * without deferring.
+ */
+ clone = clone_flow_key ? clone_key(key) : key;
+ if (clone) {
+ int err = 0;
+
+ if (actions) { /* Sample action */
+ if (clone_flow_key)
+ __this_cpu_inc(exec_actions_level);
+
+ err = do_execute_actions(dp, skb, clone,
+ actions, len);
+
+ if (clone_flow_key)
+ __this_cpu_dec(exec_actions_level);
+ } else { /* Recirc action */
+ clone->recirc_id = recirc_id;
+ ovs_dp_process_packet(skb, clone);
+ }
+ return err;
+ }
+
+ /* Out of 'flow_keys' space. Defer actions */
+ da = add_deferred_actions(skb, key, actions, len);
+ if (da) {
+ if (!actions) { /* Recirc action */
+ key = &da->pkt_key;
+ key->recirc_id = recirc_id;
+ }
+ } else {
+ /* Out of per CPU action FIFO space. Drop the 'skb' and
+ * log an error.
+ */
+ kfree_skb(skb);
+
+ if (net_ratelimit()) {
+ if (actions) { /* Sample action */
+ pr_warn("%s: deferred action limit reached, drop sample action\n",
+ ovs_dp_name(dp));
+ } else { /* Recirc action */
+ pr_warn("%s: deferred action limit reached, drop recirc action\n",
+ ovs_dp_name(dp));
+ }
+ }
+ }
+ return 0;
+}
+
static void process_deferred_actions(struct datapath *dp)
{
struct action_fifo *fifo = this_cpu_ptr(action_fifos);
@@ -1278,10 +1309,10 @@ static void process_deferred_actions(struct datapath *dp)
struct sk_buff *skb = da->skb;
struct sw_flow_key *key = &da->pkt_key;
const struct nlattr *actions = da->actions;
+ int actions_len = da->actions_len;
if (actions)
- do_execute_actions(dp, skb, key, actions,
- nla_len(actions));
+ do_execute_actions(dp, skb, key, actions, actions_len);
else
ovs_dp_process_packet(skb, key);
} while (!action_fifo_is_empty(fifo));
@@ -1323,8 +1354,8 @@ int action_fifos_init(void)
if (!action_fifos)
return -ENOMEM;
- recirc_keys = alloc_percpu(struct recirc_keys);
- if (!recirc_keys) {
+ flow_keys = alloc_percpu(struct action_flow_keys);
+ if (!flow_keys) {
free_percpu(action_fifos);
return -ENOMEM;
}
@@ -1335,5 +1366,5 @@ int action_fifos_init(void)
void action_fifos_exit(void)
{
free_percpu(action_fifos);
- free_percpu(recirc_keys);
+ free_percpu(flow_keys);
}
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 1c6e9377436d..da931bdef8a7 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -34,8 +34,6 @@
#define DP_MAX_PORTS USHRT_MAX
#define DP_VPORT_HASH_BUCKETS 1024
-#define SAMPLE_ACTION_DEPTH 3
-
/**
* struct dp_stats_percpu - per-cpu packet processing statistics for a given
* datapath.
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 1105a838bab8..df82b81a9b35 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2014 Nicira, Inc.
+ * Copyright (c) 2007-2017 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -59,6 +59,39 @@ struct ovs_len_tbl {
#define OVS_ATTR_NESTED -1
#define OVS_ATTR_VARIABLE -2
+static bool actions_may_change_flow(const struct nlattr *actions)
+{
+ struct nlattr *nla;
+ int rem;
+
+ nla_for_each_nested(nla, actions, rem) {
+ u16 action = nla_type(nla);
+
+ switch (action) {
+ case OVS_ACTION_ATTR_OUTPUT:
+ case OVS_ACTION_ATTR_RECIRC:
+ case OVS_ACTION_ATTR_TRUNC:
+ case OVS_ACTION_ATTR_USERSPACE:
+ break;
+
+ case OVS_ACTION_ATTR_CT:
+ case OVS_ACTION_ATTR_HASH:
+ case OVS_ACTION_ATTR_POP_ETH:
+ case OVS_ACTION_ATTR_POP_MPLS:
+ case OVS_ACTION_ATTR_POP_VLAN:
+ case OVS_ACTION_ATTR_PUSH_ETH:
+ case OVS_ACTION_ATTR_PUSH_MPLS:
+ case OVS_ACTION_ATTR_PUSH_VLAN:
+ case OVS_ACTION_ATTR_SAMPLE:
+ case OVS_ACTION_ATTR_SET:
+ case OVS_ACTION_ATTR_SET_MASKED:
+ default:
+ return true;
+ }
+ }
+ return false;
+}
+
static void update_range(struct sw_flow_match *match,
size_t offset, size_t size, bool is_mask)
{
@@ -2023,18 +2056,20 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa,
static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
- int depth, struct sw_flow_actions **sfa,
+ struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci, bool log);
static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
- const struct sw_flow_key *key, int depth,
+ const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
- __be16 eth_type, __be16 vlan_tci, bool log)
+ __be16 eth_type, __be16 vlan_tci,
+ bool log, bool last)
{
const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
const struct nlattr *probability, *actions;
const struct nlattr *a;
- int rem, start, err, st_acts;
+ int rem, start, err;
+ struct sample_arg arg;
memset(attrs, 0, sizeof(attrs));
nla_for_each_nested(a, attr, rem) {
@@ -2058,20 +2093,32 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
if (start < 0)
return start;
- err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
- nla_data(probability), sizeof(u32), log);
+
+ /* When both skb and flow may be changed, put the sample
+ * into a deferred fifo. On the other hand, if only skb
+ * may be modified, the actions can be executed in place.
+ *
+ * Do this analysis at the flow installation time.
+ * Set 'clone_action->exec' to true if the actions can be
+ * executed without being deferred.
+ *
+ * If the sample is the last action, it can always be excuted
+ * rather than deferred.
+ */
+ arg.exec = last || !actions_may_change_flow(actions);
+ arg.probability = nla_get_u32(probability);
+
+ err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
+ log);
if (err)
return err;
- st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log);
- if (st_acts < 0)
- return st_acts;
- err = __ovs_nla_copy_actions(net, actions, key, depth + 1, sfa,
+ err = __ovs_nla_copy_actions(net, actions, key, sfa,
eth_type, vlan_tci, log);
+
if (err)
return err;
- add_nested_action_end(*sfa, st_acts);
add_nested_action_end(*sfa, start);
return 0;
@@ -2408,16 +2455,13 @@ static int copy_action(const struct nlattr *from,
static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
- int depth, struct sw_flow_actions **sfa,
+ struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci, bool log)
{
u8 mac_proto = ovs_key_mac_proto(key);
const struct nlattr *a;
int rem, err;
- if (depth >= SAMPLE_ACTION_DEPTH)
- return -EOVERFLOW;
-
nla_for_each_nested(a, attr, rem) {
/* Expected argument lengths, (u32)-1 for variable length. */
static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
@@ -2555,13 +2599,17 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
return err;
break;
- case OVS_ACTION_ATTR_SAMPLE:
- err = validate_and_copy_sample(net, a, key, depth, sfa,
- eth_type, vlan_tci, log);
+ case OVS_ACTION_ATTR_SAMPLE: {
+ bool last = nla_is_last(a, rem);
+
+ err = validate_and_copy_sample(net, a, key, sfa,
+ eth_type, vlan_tci,
+ log, last);
if (err)
return err;
skip_copy = true;
break;
+ }
case OVS_ACTION_ATTR_CT:
err = ovs_ct_copy_action(net, a, key, sfa, log);
@@ -2615,7 +2663,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
return PTR_ERR(*sfa);
(*sfa)->orig_len = nla_len(attr);
- err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type,
+ err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
key->eth.vlan.tci, log);
if (err)
ovs_nla_free_flow_actions(*sfa);
@@ -2623,39 +2671,44 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
return err;
}
-static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
+static int sample_action_to_attr(const struct nlattr *attr,
+ struct sk_buff *skb)
{
- const struct nlattr *a;
- struct nlattr *start;
- int err = 0, rem;
+ struct nlattr *start, *ac_start = NULL, *sample_arg;
+ int err = 0, rem = nla_len(attr);
+ const struct sample_arg *arg;
+ struct nlattr *actions;
start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
if (!start)
return -EMSGSIZE;
- nla_for_each_nested(a, attr, rem) {
- int type = nla_type(a);
- struct nlattr *st_sample;
+ sample_arg = nla_data(attr);
+ arg = nla_data(sample_arg);
+ actions = nla_next(sample_arg, &rem);
- switch (type) {
- case OVS_SAMPLE_ATTR_PROBABILITY:
- if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY,
- sizeof(u32), nla_data(a)))
- return -EMSGSIZE;
- break;
- case OVS_SAMPLE_ATTR_ACTIONS:
- st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
- if (!st_sample)
- return -EMSGSIZE;
- err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
- if (err)
- return err;
- nla_nest_end(skb, st_sample);
- break;
- }
+ if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) {
+ err = -EMSGSIZE;
+ goto out;
+ }
+
+ ac_start = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
+ if (!ac_start) {
+ err = -EMSGSIZE;
+ goto out;
+ }
+
+ err = ovs_nla_put_actions(actions, rem, skb);
+
+out:
+ if (err) {
+ nla_nest_cancel(skb, ac_start);
+ nla_nest_cancel(skb, start);
+ } else {
+ nla_nest_end(skb, ac_start);
+ nla_nest_end(skb, start);
}
- nla_nest_end(skb, start);
return err;
}
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 1c38d2c7caa8..80fb6f63e768 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -702,9 +702,8 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
event->param.conn.initiator_depth);
/* rdma_accept() calls rdma_reject() internally if it fails */
- err = rdma_accept(cm_id, &conn_param);
- if (err)
- rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err);
+ if (rdma_accept(cm_id, &conn_param))
+ rds_ib_conn_error(conn, "rdma_accept failed\n");
out:
if (conn)
diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c
index 4fe8f4fec4ee..86ef907067bb 100644
--- a/net/rds/ib_fmr.c
+++ b/net/rds/ib_fmr.c
@@ -78,17 +78,15 @@ struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages)
return ibmr;
out_no_cigar:
- if (ibmr) {
- if (fmr->fmr)
- ib_dealloc_fmr(fmr->fmr);
- kfree(ibmr);
- }
+ kfree(ibmr);
atomic_dec(&pool->item_count);
+
return ERR_PTR(err);
}
-int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr,
- struct scatterlist *sg, unsigned int nents)
+static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev,
+ struct rds_ib_mr *ibmr, struct scatterlist *sg,
+ unsigned int nents)
{
struct ib_device *dev = rds_ibdev->dev;
struct rds_ib_fmr *fmr = &ibmr->u.fmr;
@@ -114,29 +112,39 @@ int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr,
u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
if (dma_addr & ~PAGE_MASK) {
- if (i > 0)
+ if (i > 0) {
+ ib_dma_unmap_sg(dev, sg, nents,
+ DMA_BIDIRECTIONAL);
return -EINVAL;
- else
+ } else {
++page_cnt;
+ }
}
if ((dma_addr + dma_len) & ~PAGE_MASK) {
- if (i < sg_dma_len - 1)
+ if (i < sg_dma_len - 1) {
+ ib_dma_unmap_sg(dev, sg, nents,
+ DMA_BIDIRECTIONAL);
return -EINVAL;
- else
+ } else {
++page_cnt;
+ }
}
len += dma_len;
}
page_cnt += len >> PAGE_SHIFT;
- if (page_cnt > ibmr->pool->fmr_attr.max_pages)
+ if (page_cnt > ibmr->pool->fmr_attr.max_pages) {
+ ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
return -EINVAL;
+ }
dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC,
rdsibdev_to_node(rds_ibdev));
- if (!dma_pages)
+ if (!dma_pages) {
+ ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
return -ENOMEM;
+ }
page_cnt = 0;
for (i = 0; i < sg_dma_len; ++i) {
@@ -149,8 +157,10 @@ int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr,
}
ret = ib_map_phys_fmr(fmr->fmr, dma_pages, page_cnt, io_addr);
- if (ret)
+ if (ret) {
+ ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
goto out;
+ }
/* Success - we successfully remapped the MR, so we can
* safely tear down the old mapping.
diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h
index 5d6e98a79a5e..0ea4ab017a8c 100644
--- a/net/rds/ib_mr.h
+++ b/net/rds/ib_mr.h
@@ -125,8 +125,6 @@ void rds_ib_mr_exit(void);
void __rds_ib_teardown_mr(struct rds_ib_mr *);
void rds_ib_teardown_mr(struct rds_ib_mr *);
struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *, int);
-int rds_ib_map_fmr(struct rds_ib_device *, struct rds_ib_mr *,
- struct scatterlist *, unsigned int);
struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *);
int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *, int, struct rds_ib_mr **);
struct rds_ib_mr *rds_ib_reg_fmr(struct rds_ib_device *, struct scatterlist *,
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 71e7ff22f7c9..c75ea5c9102c 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -603,8 +603,8 @@ nla_put_failure:
return -1;
}
-int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife,
- u16 metaid, u16 mlen, void *mdata)
+static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife,
+ u16 metaid, u16 mlen, void *mdata)
{
struct tcf_meta_info *e;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index bcf49cd22786..62567bfe52c7 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -274,7 +274,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
return NULL;
}
-void qdisc_hash_add(struct Qdisc *q)
+void qdisc_hash_add(struct Qdisc *q, bool invisible)
{
if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
struct Qdisc *root = qdisc_dev(q)->qdisc;
@@ -282,6 +282,8 @@ void qdisc_hash_add(struct Qdisc *q)
WARN_ON_ONCE(root == &noop_qdisc);
ASSERT_RTNL();
hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
+ if (invisible)
+ q->flags |= TCQ_F_INVISIBLE;
}
}
EXPORT_SYMBOL(qdisc_hash_add);
@@ -1003,7 +1005,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
goto err_out4;
}
- qdisc_hash_add(sch);
+ qdisc_hash_add(sch, false);
return sch;
}
@@ -1401,9 +1403,14 @@ nla_put_failure:
return -1;
}
-static bool tc_qdisc_dump_ignore(struct Qdisc *q)
+static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
{
- return (q->flags & TCQ_F_BUILTIN) ? true : false;
+ if (q->flags & TCQ_F_BUILTIN)
+ return true;
+ if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
+ return true;
+
+ return false;
}
static int qdisc_notify(struct net *net, struct sk_buff *oskb,
@@ -1417,12 +1424,12 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb,
if (!skb)
return -ENOBUFS;
- if (old && !tc_qdisc_dump_ignore(old)) {
+ if (old && !tc_qdisc_dump_ignore(old, false)) {
if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
0, RTM_DELQDISC) < 0)
goto err_out;
}
- if (new && !tc_qdisc_dump_ignore(new)) {
+ if (new && !tc_qdisc_dump_ignore(new, false)) {
if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
goto err_out;
@@ -1439,7 +1446,8 @@ err_out:
static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
struct netlink_callback *cb,
- int *q_idx_p, int s_q_idx, bool recur)
+ int *q_idx_p, int s_q_idx, bool recur,
+ bool dump_invisible)
{
int ret = 0, q_idx = *q_idx_p;
struct Qdisc *q;
@@ -1452,7 +1460,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
if (q_idx < s_q_idx) {
q_idx++;
} else {
- if (!tc_qdisc_dump_ignore(q) &&
+ if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWQDISC) <= 0)
@@ -1474,7 +1482,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
q_idx++;
continue;
}
- if (!tc_qdisc_dump_ignore(q) &&
+ if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWQDISC) <= 0)
@@ -1496,12 +1504,21 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
int idx, q_idx;
int s_idx, s_q_idx;
struct net_device *dev;
+ const struct nlmsghdr *nlh = cb->nlh;
+ struct tcmsg *tcm = nlmsg_data(nlh);
+ struct nlattr *tca[TCA_MAX + 1];
+ int err;
s_idx = cb->args[0];
s_q_idx = q_idx = cb->args[1];
idx = 0;
ASSERT_RTNL();
+
+ err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL);
+ if (err < 0)
+ return err;
+
for_each_netdev(net, dev) {
struct netdev_queue *dev_queue;
@@ -1512,13 +1529,14 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
q_idx = 0;
if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
- true) < 0)
+ true, tca[TCA_DUMP_INVISIBLE]) < 0)
goto done;
dev_queue = dev_ingress_queue(dev);
if (dev_queue &&
tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
- &q_idx, s_q_idx, false) < 0)
+ &q_idx, s_q_idx, false,
+ tca[TCA_DUMP_INVISIBLE]) < 0)
goto done;
cont:
@@ -1762,7 +1780,7 @@ static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
{
struct qdisc_dump_args arg;
- if (tc_qdisc_dump_ignore(q) ||
+ if (tc_qdisc_dump_ignore(q, false) ||
*t_p < s_t || !q->ops->cl_ops ||
(tcm->tcm_parent &&
TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index d6ca18dc04c3..cf93e5ff3d63 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1161,6 +1161,8 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
sch->handle);
if (!q->link.q)
q->link.q = &noop_qdisc;
+ else
+ qdisc_hash_add(q->link.q, true);
q->link.priority = TC_CBQ_MAXPRIO - 1;
q->link.priority2 = TC_CBQ_MAXPRIO - 1;
@@ -1600,6 +1602,9 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
if (!cl->q)
cl->q = &noop_qdisc;
+ else
+ qdisc_hash_add(cl->q, true);
+
cl->common.classid = classid;
cl->tparent = parent;
cl->qdisc = sch;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index bb4cbdf75004..9fe67e257dfa 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -117,6 +117,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
&pfifo_qdisc_ops, classid);
if (cl->qdisc == NULL)
cl->qdisc = &noop_qdisc;
+ else
+ qdisc_hash_add(cl->qdisc, true);
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est,
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 5334e309f17f..cfa1f2cdbaf7 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -374,6 +374,8 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle);
if (p->q == NULL)
p->q = &noop_qdisc;
+ else
+ qdisc_hash_add(p->q, true);
pr_debug("%s: qdisc %p\n", __func__, p->q);
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 9f3a884d1590..097bbe9857a5 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -288,7 +288,6 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch)
struct fq_codel_flow *flow;
struct list_head *head;
u32 prev_drop_count, prev_ecn_mark;
- unsigned int prev_backlog;
begin:
head = &q->new_flows;
@@ -307,7 +306,6 @@ begin:
prev_drop_count = q->cstats.drop_count;
prev_ecn_mark = q->cstats.ecn_mark;
- prev_backlog = sch->qstats.backlog;
skb = codel_dequeue(sch, &sch->qstats.backlog, &q->cparams,
&flow->cvars, &q->cstats, qdisc_pkt_len,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index b052b27a984e..3e64d23e098c 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -795,7 +795,7 @@ static void attach_default_qdiscs(struct net_device *dev)
}
#ifdef CONFIG_NET_SCHED
if (dev->qdisc)
- qdisc_hash_add(dev->qdisc);
+ qdisc_hash_add(dev->qdisc, false);
#endif
}
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3ffaa6fb0990..0198c6cdda49 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1066,6 +1066,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
&pfifo_qdisc_ops, classid);
if (cl->qdisc == NULL)
cl->qdisc = &noop_qdisc;
+ else
+ qdisc_hash_add(cl->qdisc, true);
INIT_LIST_HEAD(&cl->children);
cl->vt_tree = RB_ROOT;
cl->cf_tree = RB_ROOT;
@@ -1425,6 +1427,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
sch->handle);
if (q->root.qdisc == NULL)
q->root.qdisc = &noop_qdisc;
+ else
+ qdisc_hash_add(q->root.qdisc, true);
INIT_LIST_HEAD(&q->root.children);
q->root.vt_tree = RB_ROOT;
q->root.cf_tree = RB_ROOT;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 4cd5fb134bc9..95867033542e 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1460,6 +1460,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
qdisc_class_hash_insert(&q->clhash, &cl->common);
if (parent)
parent->children++;
+ if (cl->un.leaf.q != &noop_qdisc)
+ qdisc_hash_add(cl->un.leaf.q, true);
} else {
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL,
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 20b7f1646f69..cadfdd4f1e52 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -84,7 +84,7 @@ static void mq_attach(struct Qdisc *sch)
qdisc_destroy(old);
#ifdef CONFIG_NET_SCHED
if (ntx < dev->real_num_tx_queues)
- qdisc_hash_add(qdisc);
+ qdisc_hash_add(qdisc, false);
#endif
}
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 922683418e53..0a4cf27ea54b 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -21,14 +21,13 @@
struct mqprio_sched {
struct Qdisc **qdiscs;
- int hw_owned;
+ int hw_offload;
};
static void mqprio_destroy(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
struct mqprio_sched *priv = qdisc_priv(sch);
- struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO};
unsigned int ntx;
if (priv->qdiscs) {
@@ -39,10 +38,15 @@ static void mqprio_destroy(struct Qdisc *sch)
kfree(priv->qdiscs);
}
- if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc)
+ if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) {
+ struct tc_mqprio_qopt offload = { 0 };
+ struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO,
+ { .mqprio = &offload } };
+
dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
- else
+ } else {
netdev_set_num_tc(dev, 0);
+ }
}
static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
@@ -59,15 +63,20 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
return -EINVAL;
}
- /* net_device does not support requested operation */
- if (qopt->hw && !dev->netdev_ops->ndo_setup_tc)
- return -EINVAL;
+ /* Limit qopt->hw to maximum supported offload value. Drivers have
+ * the option of overriding this later if they don't support the a
+ * given offload type.
+ */
+ if (qopt->hw > TC_MQPRIO_HW_OFFLOAD_MAX)
+ qopt->hw = TC_MQPRIO_HW_OFFLOAD_MAX;
- /* if hw owned qcount and qoffset are taken from LLD so
- * no reason to verify them here
+ /* If hardware offload is requested we will leave it to the device
+ * to either populate the queue counts itself or to validate the
+ * provided queue counts. If ndo_setup_tc is not present then
+ * hardware doesn't support offload and we should return an error.
*/
if (qopt->hw)
- return 0;
+ return dev->netdev_ops->ndo_setup_tc ? 0 : -EINVAL;
for (i = 0; i < qopt->num_tc; i++) {
unsigned int last = qopt->offset[i] + qopt->count[i];
@@ -139,13 +148,15 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
* supplied and verified mapping
*/
if (qopt->hw) {
- struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO,
- { .tc = qopt->num_tc }};
+ struct tc_mqprio_qopt offload = *qopt;
+ struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO,
+ { .mqprio = &offload } };
- priv->hw_owned = 1;
err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
if (err)
return err;
+
+ priv->hw_offload = offload.hw;
} else {
netdev_set_num_tc(dev, qopt->num_tc);
for (i = 0; i < qopt->num_tc; i++)
@@ -175,7 +186,7 @@ static void mqprio_attach(struct Qdisc *sch)
if (old)
qdisc_destroy(old);
if (ntx < dev->real_num_tx_queues)
- qdisc_hash_add(qdisc);
+ qdisc_hash_add(qdisc, false);
}
kfree(priv->qdiscs);
priv->qdiscs = NULL;
@@ -243,7 +254,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.num_tc = netdev_get_num_tc(dev);
memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
- opt.hw = priv->hw_owned;
+ opt.hw = priv->hw_offload;
for (i = 0; i < netdev_get_num_tc(dev); i++) {
opt.count[i] = dev->tc_to_txq[i].count;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index e7839a0d0eaa..43a3a10b3c81 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -217,6 +217,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
sch_tree_lock(sch);
old = q->queues[i];
q->queues[i] = child;
+ if (child != &noop_qdisc)
+ qdisc_hash_add(child, true);
if (old != &noop_qdisc) {
qdisc_tree_reduce_backlog(old,
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index c8bb62a1e744..94b4928ad413 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -462,7 +462,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
/* If a delay is expected, orphan the skb. (orphaning usually takes
* place at TX completion time, so _before_ the link transit delay)
*/
- if (q->latency || q->jitter)
+ if (q->latency || q->jitter || q->rate)
skb_orphan_partial(skb);
/*
@@ -530,21 +530,31 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
now = psched_get_time();
if (q->rate) {
- struct sk_buff *last;
+ struct netem_skb_cb *last = NULL;
+
+ if (sch->q.tail)
+ last = netem_skb_cb(sch->q.tail);
+ if (q->t_root.rb_node) {
+ struct sk_buff *t_skb;
+ struct netem_skb_cb *t_last;
+
+ t_skb = netem_rb_to_skb(rb_last(&q->t_root));
+ t_last = netem_skb_cb(t_skb);
+ if (!last ||
+ t_last->time_to_send > last->time_to_send) {
+ last = t_last;
+ }
+ }
- if (sch->q.qlen)
- last = sch->q.tail;
- else
- last = netem_rb_to_skb(rb_last(&q->t_root));
if (last) {
/*
* Last packet in queue is reference point (now),
* calculate this time bonus and subtract
* from delay.
*/
- delay -= netem_skb_cb(last)->time_to_send - now;
+ delay -= last->time_to_send - now;
delay = max_t(psched_tdiff_t, 0, delay);
- now = netem_skb_cb(last)->time_to_send;
+ now = last->time_to_send;
}
delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q);
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index d4d7db267b6e..92c2e6d448d7 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -192,8 +192,11 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
qdisc_destroy(child);
}
- for (i = oldbands; i < q->bands; i++)
+ for (i = oldbands; i < q->bands; i++) {
q->queues[i] = queues[i];
+ if (q->queues[i] != &noop_qdisc)
+ qdisc_hash_add(q->queues[i], true);
+ }
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index f9e712ce2d15..6c85f3e9239b 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -494,6 +494,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
goto destroy_class;
}
+ if (cl->qdisc != &noop_qdisc)
+ qdisc_hash_add(cl->qdisc, true);
sch_tree_lock(sch);
qdisc_class_hash_insert(&q->clhash, &cl->common);
sch_tree_unlock(sch);
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 249b2a18acbd..799ea6dd69b2 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -191,6 +191,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
return PTR_ERR(child);
}
+ if (child != &noop_qdisc)
+ qdisc_hash_add(child, true);
sch_tree_lock(sch);
q->flags = ctl->flags;
q->limit = ctl->limit;
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index fe6963d21519..ae862f172c94 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -513,6 +513,8 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
if (IS_ERR(child))
return PTR_ERR(child);
+ if (child != &noop_qdisc)
+ qdisc_hash_add(child, true);
sch_tree_lock(sch);
qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 303355c449ab..9850126129a3 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -396,6 +396,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
q->qdisc->qstats.backlog);
qdisc_destroy(q->qdisc);
q->qdisc = child;
+ if (child != &noop_qdisc)
+ qdisc_hash_add(child, true);
}
q->limit = qopt->limit;
if (tb[TCA_TBF_PBURST])
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index e03bb1aab4d0..ab1374fa5ab0 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3872,9 +3872,18 @@ sctp_disposition_t sctp_sf_do_reconf(struct net *net,
else if (param.p->type == SCTP_PARAM_RESET_IN_REQUEST)
reply = sctp_process_strreset_inreq(
(struct sctp_association *)asoc, param, &ev);
- /* More handles for other types will be added here, by now it
- * just ignores other types.
- */
+ else if (param.p->type == SCTP_PARAM_RESET_TSN_REQUEST)
+ reply = sctp_process_strreset_tsnreq(
+ (struct sctp_association *)asoc, param, &ev);
+ else if (param.p->type == SCTP_PARAM_RESET_ADD_OUT_STREAMS)
+ reply = sctp_process_strreset_addstrm_out(
+ (struct sctp_association *)asoc, param, &ev);
+ else if (param.p->type == SCTP_PARAM_RESET_ADD_IN_STREAMS)
+ reply = sctp_process_strreset_addstrm_in(
+ (struct sctp_association *)asoc, param, &ev);
+ else if (param.p->type == SCTP_PARAM_RESET_RESPONSE)
+ reply = sctp_process_strreset_resp(
+ (struct sctp_association *)asoc, param, &ev);
if (ev)
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 0f378ea2ae38..72cc3ecf6516 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3758,6 +3758,39 @@ out:
return retval;
}
+static int sctp_setsockopt_reconfig_supported(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EINVAL;
+
+ if (optlen != sizeof(params))
+ goto out;
+
+ if (copy_from_user(&params, optval, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (asoc) {
+ asoc->reconf_enable = !!params.assoc_value;
+ } else if (!params.assoc_id) {
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ sp->ep->reconf_enable = !!params.assoc_value;
+ } else {
+ goto out;
+ }
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
static int sctp_setsockopt_enable_strreset(struct sock *sk,
char __user *optval,
unsigned int optlen)
@@ -4038,6 +4071,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_DEFAULT_PRINFO:
retval = sctp_setsockopt_default_prinfo(sk, optval, optlen);
break;
+ case SCTP_RECONFIG_SUPPORTED:
+ retval = sctp_setsockopt_reconfig_supported(sk, optval, optlen);
+ break;
case SCTP_ENABLE_STREAM_RESET:
retval = sctp_setsockopt_enable_strreset(sk, optval, optlen);
break;
@@ -6540,6 +6576,47 @@ out:
return retval;
}
+static int sctp_getsockopt_reconfig_supported(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EFAULT;
+
+ if (len < sizeof(params)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len))
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (asoc) {
+ params.assoc_value = asoc->reconf_enable;
+ } else if (!params.assoc_id) {
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ params.assoc_value = sp->ep->reconf_enable;
+ } else {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ if (put_user(len, optlen))
+ goto out;
+
+ if (copy_to_user(optval, &params, len))
+ goto out;
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
static int sctp_getsockopt_enable_strreset(struct sock *sk, int len,
char __user *optval,
int __user *optlen)
@@ -6748,6 +6825,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
retval = sctp_getsockopt_pr_assocstatus(sk, len, optval,
optlen);
break;
+ case SCTP_RECONFIG_SUPPORTED:
+ retval = sctp_getsockopt_reconfig_supported(sk, len, optval,
+ optlen);
+ break;
case SCTP_ENABLE_STREAM_RESET:
retval = sctp_getsockopt_enable_strreset(sk, len, optval,
optlen);
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 1c6cc04fa3a4..961d0a1e99d1 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -267,18 +267,6 @@ int sctp_send_add_streams(struct sctp_association *asoc,
stream->out = streamout;
}
- if (in) {
- struct sctp_stream_in *streamin;
-
- streamin = krealloc(stream->in, incnt * sizeof(*streamin),
- GFP_KERNEL);
- if (!streamin)
- goto out;
-
- memset(streamin + stream->incnt, 0, in * sizeof(*streamin));
- stream->in = streamin;
- }
-
chunk = sctp_make_strreset_addstrm(asoc, out, in);
if (!chunk)
goto out;
@@ -303,13 +291,14 @@ out:
}
static sctp_paramhdr_t *sctp_chunk_lookup_strreset_param(
- struct sctp_association *asoc, __u32 resp_seq)
+ struct sctp_association *asoc, __u32 resp_seq,
+ __be16 type)
{
struct sctp_chunk *chunk = asoc->strreset_chunk;
struct sctp_reconf_chunk *hdr;
union sctp_params param;
- if (ntohl(resp_seq) != asoc->strreset_outseq || !chunk)
+ if (!chunk)
return NULL;
hdr = (struct sctp_reconf_chunk *)chunk->chunk_hdr;
@@ -320,7 +309,8 @@ static sctp_paramhdr_t *sctp_chunk_lookup_strreset_param(
*/
struct sctp_strreset_tsnreq *req = param.v;
- if (req->request_seq == resp_seq)
+ if ((!resp_seq || req->request_seq == resp_seq) &&
+ (!type || type == req->param_hdr.type))
return param.v;
}
@@ -361,13 +351,9 @@ struct sctp_chunk *sctp_process_strreset_outreq(
goto out;
if (asoc->strreset_chunk) {
- sctp_paramhdr_t *param_hdr;
- struct sctp_transport *t;
-
- param_hdr = sctp_chunk_lookup_strreset_param(
- asoc, outreq->response_seq);
- if (!param_hdr || param_hdr->type !=
- SCTP_PARAM_RESET_IN_REQUEST) {
+ if (!sctp_chunk_lookup_strreset_param(
+ asoc, outreq->response_seq,
+ SCTP_PARAM_RESET_IN_REQUEST)) {
/* same process with outstanding isn't 0 */
result = SCTP_STRRESET_ERR_IN_PROGRESS;
goto out;
@@ -377,6 +363,8 @@ struct sctp_chunk *sctp_process_strreset_outreq(
asoc->strreset_outseq++;
if (!asoc->strreset_outstanding) {
+ struct sctp_transport *t;
+
t = asoc->strreset_chunk->transport;
if (del_timer(&t->reconf_timer))
sctp_transport_put(t);
@@ -477,3 +465,367 @@ out:
return chunk;
}
+
+struct sctp_chunk *sctp_process_strreset_tsnreq(
+ struct sctp_association *asoc,
+ union sctp_params param,
+ struct sctp_ulpevent **evp)
+{
+ __u32 init_tsn = 0, next_tsn = 0, max_tsn_seen;
+ struct sctp_strreset_tsnreq *tsnreq = param.v;
+ struct sctp_stream *stream = asoc->stream;
+ __u32 result = SCTP_STRRESET_DENIED;
+ __u32 request_seq;
+ __u16 i;
+
+ request_seq = ntohl(tsnreq->request_seq);
+ if (request_seq > asoc->strreset_inseq) {
+ result = SCTP_STRRESET_ERR_BAD_SEQNO;
+ goto out;
+ } else if (request_seq == asoc->strreset_inseq) {
+ asoc->strreset_inseq++;
+ }
+
+ if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_ASSOC_REQ))
+ goto out;
+
+ if (asoc->strreset_outstanding) {
+ result = SCTP_STRRESET_ERR_IN_PROGRESS;
+ goto out;
+ }
+
+ /* G3: The same processing as though a SACK chunk with no gap report
+ * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were
+ * received MUST be performed.
+ */
+ max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map);
+ sctp_ulpq_reasm_flushtsn(&asoc->ulpq, max_tsn_seen);
+ sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
+
+ /* G1: Compute an appropriate value for the Receiver's Next TSN -- the
+ * TSN that the peer should use to send the next DATA chunk. The
+ * value SHOULD be the smallest TSN not acknowledged by the
+ * receiver of the request plus 2^31.
+ */
+ init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + (1 << 31);
+ sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL,
+ init_tsn, GFP_ATOMIC);
+
+ /* G4: The same processing as though a FWD-TSN chunk (as defined in
+ * [RFC3758]) with all streams affected and a new cumulative TSN
+ * ACK of the Receiver's Next TSN minus 1 were received MUST be
+ * performed.
+ */
+ sctp_outq_free(&asoc->outqueue);
+
+ /* G2: Compute an appropriate value for the local endpoint's next TSN,
+ * i.e., the next TSN assigned by the receiver of the SSN/TSN reset
+ * chunk. The value SHOULD be the highest TSN sent by the receiver
+ * of the request plus 1.
+ */
+ next_tsn = asoc->next_tsn;
+ asoc->ctsn_ack_point = next_tsn - 1;
+ asoc->adv_peer_ack_point = asoc->ctsn_ack_point;
+
+ /* G5: The next expected and outgoing SSNs MUST be reset to 0 for all
+ * incoming and outgoing streams.
+ */
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].ssn = 0;
+ for (i = 0; i < stream->incnt; i++)
+ stream->in[i].ssn = 0;
+
+ result = SCTP_STRRESET_PERFORMED;
+
+ *evp = sctp_ulpevent_make_assoc_reset_event(asoc, 0, init_tsn,
+ next_tsn, GFP_ATOMIC);
+
+out:
+ return sctp_make_strreset_tsnresp(asoc, result, request_seq,
+ next_tsn, init_tsn);
+}
+
+struct sctp_chunk *sctp_process_strreset_addstrm_out(
+ struct sctp_association *asoc,
+ union sctp_params param,
+ struct sctp_ulpevent **evp)
+{
+ struct sctp_strreset_addstrm *addstrm = param.v;
+ struct sctp_stream *stream = asoc->stream;
+ __u32 result = SCTP_STRRESET_DENIED;
+ struct sctp_stream_in *streamin;
+ __u32 request_seq, incnt;
+ __u16 in;
+
+ request_seq = ntohl(addstrm->request_seq);
+ if (request_seq > asoc->strreset_inseq) {
+ result = SCTP_STRRESET_ERR_BAD_SEQNO;
+ goto out;
+ } else if (request_seq == asoc->strreset_inseq) {
+ asoc->strreset_inseq++;
+ }
+
+ if (!(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ))
+ goto out;
+
+ if (asoc->strreset_chunk) {
+ if (!sctp_chunk_lookup_strreset_param(
+ asoc, 0, SCTP_PARAM_RESET_ADD_IN_STREAMS)) {
+ /* same process with outstanding isn't 0 */
+ result = SCTP_STRRESET_ERR_IN_PROGRESS;
+ goto out;
+ }
+
+ asoc->strreset_outstanding--;
+ asoc->strreset_outseq++;
+
+ if (!asoc->strreset_outstanding) {
+ struct sctp_transport *t;
+
+ t = asoc->strreset_chunk->transport;
+ if (del_timer(&t->reconf_timer))
+ sctp_transport_put(t);
+
+ sctp_chunk_put(asoc->strreset_chunk);
+ asoc->strreset_chunk = NULL;
+ }
+ }
+
+ in = ntohs(addstrm->number_of_streams);
+ incnt = stream->incnt + in;
+ if (!in || incnt > SCTP_MAX_STREAM)
+ goto out;
+
+ streamin = krealloc(stream->in, incnt * sizeof(*streamin),
+ GFP_ATOMIC);
+ if (!streamin)
+ goto out;
+
+ memset(streamin + stream->incnt, 0, in * sizeof(*streamin));
+ stream->in = streamin;
+ stream->incnt = incnt;
+
+ result = SCTP_STRRESET_PERFORMED;
+
+ *evp = sctp_ulpevent_make_stream_change_event(asoc,
+ 0, ntohs(addstrm->number_of_streams), 0, GFP_ATOMIC);
+
+out:
+ return sctp_make_strreset_resp(asoc, result, request_seq);
+}
+
+struct sctp_chunk *sctp_process_strreset_addstrm_in(
+ struct sctp_association *asoc,
+ union sctp_params param,
+ struct sctp_ulpevent **evp)
+{
+ struct sctp_strreset_addstrm *addstrm = param.v;
+ struct sctp_stream *stream = asoc->stream;
+ __u32 result = SCTP_STRRESET_DENIED;
+ struct sctp_stream_out *streamout;
+ struct sctp_chunk *chunk = NULL;
+ __u32 request_seq, outcnt;
+ __u16 out;
+
+ request_seq = ntohl(addstrm->request_seq);
+ if (request_seq > asoc->strreset_inseq) {
+ result = SCTP_STRRESET_ERR_BAD_SEQNO;
+ goto out;
+ } else if (request_seq == asoc->strreset_inseq) {
+ asoc->strreset_inseq++;
+ }
+
+ if (!(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ))
+ goto out;
+
+ if (asoc->strreset_outstanding) {
+ result = SCTP_STRRESET_ERR_IN_PROGRESS;
+ goto out;
+ }
+
+ out = ntohs(addstrm->number_of_streams);
+ outcnt = stream->outcnt + out;
+ if (!out || outcnt > SCTP_MAX_STREAM)
+ goto out;
+
+ streamout = krealloc(stream->out, outcnt * sizeof(*streamout),
+ GFP_ATOMIC);
+ if (!streamout)
+ goto out;
+
+ memset(streamout + stream->outcnt, 0, out * sizeof(*streamout));
+ stream->out = streamout;
+
+ chunk = sctp_make_strreset_addstrm(asoc, out, 0);
+ if (!chunk)
+ goto out;
+
+ asoc->strreset_chunk = chunk;
+ asoc->strreset_outstanding = 1;
+ sctp_chunk_hold(asoc->strreset_chunk);
+
+ stream->outcnt = outcnt;
+
+ *evp = sctp_ulpevent_make_stream_change_event(asoc,
+ 0, 0, ntohs(addstrm->number_of_streams), GFP_ATOMIC);
+
+out:
+ if (!chunk)
+ chunk = sctp_make_strreset_resp(asoc, result, request_seq);
+
+ return chunk;
+}
+
+struct sctp_chunk *sctp_process_strreset_resp(
+ struct sctp_association *asoc,
+ union sctp_params param,
+ struct sctp_ulpevent **evp)
+{
+ struct sctp_strreset_resp *resp = param.v;
+ struct sctp_stream *stream = asoc->stream;
+ struct sctp_transport *t;
+ __u16 i, nums, flags = 0;
+ sctp_paramhdr_t *req;
+ __u32 result;
+
+ req = sctp_chunk_lookup_strreset_param(asoc, resp->response_seq, 0);
+ if (!req)
+ return NULL;
+
+ result = ntohl(resp->result);
+ if (result != SCTP_STRRESET_PERFORMED) {
+ /* if in progress, do nothing but retransmit */
+ if (result == SCTP_STRRESET_IN_PROGRESS)
+ return NULL;
+ else if (result == SCTP_STRRESET_DENIED)
+ flags = SCTP_STREAM_RESET_DENIED;
+ else
+ flags = SCTP_STREAM_RESET_FAILED;
+ }
+
+ if (req->type == SCTP_PARAM_RESET_OUT_REQUEST) {
+ struct sctp_strreset_outreq *outreq;
+ __u16 *str_p = NULL;
+
+ outreq = (struct sctp_strreset_outreq *)req;
+ nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) / 2;
+
+ if (result == SCTP_STRRESET_PERFORMED) {
+ if (nums) {
+ str_p = outreq->list_of_streams;
+ for (i = 0; i < nums; i++)
+ stream->out[ntohs(str_p[i])].ssn = 0;
+ } else {
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].ssn = 0;
+ }
+
+ flags = SCTP_STREAM_RESET_OUTGOING_SSN;
+ }
+
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].state = SCTP_STREAM_OPEN;
+
+ *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags,
+ nums, str_p, GFP_ATOMIC);
+ } else if (req->type == SCTP_PARAM_RESET_IN_REQUEST) {
+ struct sctp_strreset_inreq *inreq;
+ __u16 *str_p = NULL;
+
+ /* if the result is performed, it's impossible for inreq */
+ if (result == SCTP_STRRESET_PERFORMED)
+ return NULL;
+
+ inreq = (struct sctp_strreset_inreq *)req;
+ nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) / 2;
+
+ str_p = inreq->list_of_streams;
+ *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags,
+ nums, str_p, GFP_ATOMIC);
+ } else if (req->type == SCTP_PARAM_RESET_TSN_REQUEST) {
+ struct sctp_strreset_resptsn *resptsn;
+ __u32 stsn, rtsn;
+
+ /* check for resptsn, as sctp_verify_reconf didn't do it*/
+ if (ntohs(param.p->length) != sizeof(*resptsn))
+ return NULL;
+
+ resptsn = (struct sctp_strreset_resptsn *)resp;
+ stsn = ntohl(resptsn->senders_next_tsn);
+ rtsn = ntohl(resptsn->receivers_next_tsn);
+
+ if (result == SCTP_STRRESET_PERFORMED) {
+ __u32 mtsn = sctp_tsnmap_get_max_tsn_seen(
+ &asoc->peer.tsn_map);
+
+ sctp_ulpq_reasm_flushtsn(&asoc->ulpq, mtsn);
+ sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
+
+ sctp_tsnmap_init(&asoc->peer.tsn_map,
+ SCTP_TSN_MAP_INITIAL,
+ stsn, GFP_ATOMIC);
+
+ sctp_outq_free(&asoc->outqueue);
+
+ asoc->next_tsn = rtsn;
+ asoc->ctsn_ack_point = asoc->next_tsn - 1;
+ asoc->adv_peer_ack_point = asoc->ctsn_ack_point;
+
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].ssn = 0;
+ for (i = 0; i < stream->incnt; i++)
+ stream->in[i].ssn = 0;
+ }
+
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].state = SCTP_STREAM_OPEN;
+
+ *evp = sctp_ulpevent_make_assoc_reset_event(asoc, flags,
+ stsn, rtsn, GFP_ATOMIC);
+ } else if (req->type == SCTP_PARAM_RESET_ADD_OUT_STREAMS) {
+ struct sctp_strreset_addstrm *addstrm;
+ __u16 number;
+
+ addstrm = (struct sctp_strreset_addstrm *)req;
+ nums = ntohs(addstrm->number_of_streams);
+ number = stream->outcnt - nums;
+
+ if (result == SCTP_STRRESET_PERFORMED)
+ for (i = number; i < stream->outcnt; i++)
+ stream->out[i].state = SCTP_STREAM_OPEN;
+ else
+ stream->outcnt = number;
+
+ *evp = sctp_ulpevent_make_stream_change_event(asoc, flags,
+ 0, nums, GFP_ATOMIC);
+ } else if (req->type == SCTP_PARAM_RESET_ADD_IN_STREAMS) {
+ struct sctp_strreset_addstrm *addstrm;
+
+ /* if the result is performed, it's impossible for addstrm in
+ * request.
+ */
+ if (result == SCTP_STRRESET_PERFORMED)
+ return NULL;
+
+ addstrm = (struct sctp_strreset_addstrm *)req;
+ nums = ntohs(addstrm->number_of_streams);
+
+ *evp = sctp_ulpevent_make_stream_change_event(asoc, flags,
+ nums, 0, GFP_ATOMIC);
+ }
+
+ asoc->strreset_outstanding--;
+ asoc->strreset_outseq++;
+
+ /* remove everything for this reconf request */
+ if (!asoc->strreset_outstanding) {
+ t = asoc->strreset_chunk->transport;
+ if (del_timer(&t->reconf_timer))
+ sctp_transport_put(t);
+
+ sctp_chunk_put(asoc->strreset_chunk);
+ asoc->strreset_chunk = NULL;
+ }
+
+ return NULL;
+}
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index daf8554fd42a..0e732f68c2bf 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -275,6 +275,13 @@ static struct ctl_table sctp_net_table[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "reconf_enable",
+ .data = &init_net.sctp.reconf_enable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "auth_enable",
.data = &init_net.sctp.auth_enable,
.maxlen = sizeof(int),
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index c8881bc542a0..ec2b3e013c2f 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -883,6 +883,62 @@ struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event(
return event;
}
+struct sctp_ulpevent *sctp_ulpevent_make_assoc_reset_event(
+ const struct sctp_association *asoc, __u16 flags, __u32 local_tsn,
+ __u32 remote_tsn, gfp_t gfp)
+{
+ struct sctp_assoc_reset_event *areset;
+ struct sctp_ulpevent *event;
+ struct sk_buff *skb;
+
+ event = sctp_ulpevent_new(sizeof(struct sctp_assoc_reset_event),
+ MSG_NOTIFICATION, gfp);
+ if (!event)
+ return NULL;
+
+ skb = sctp_event2skb(event);
+ areset = (struct sctp_assoc_reset_event *)
+ skb_put(skb, sizeof(struct sctp_assoc_reset_event));
+
+ areset->assocreset_type = SCTP_ASSOC_RESET_EVENT;
+ areset->assocreset_flags = flags;
+ areset->assocreset_length = sizeof(struct sctp_assoc_reset_event);
+ sctp_ulpevent_set_owner(event, asoc);
+ areset->assocreset_assoc_id = sctp_assoc2id(asoc);
+ areset->assocreset_local_tsn = local_tsn;
+ areset->assocreset_remote_tsn = remote_tsn;
+
+ return event;
+}
+
+struct sctp_ulpevent *sctp_ulpevent_make_stream_change_event(
+ const struct sctp_association *asoc, __u16 flags,
+ __u32 strchange_instrms, __u32 strchange_outstrms, gfp_t gfp)
+{
+ struct sctp_stream_change_event *schange;
+ struct sctp_ulpevent *event;
+ struct sk_buff *skb;
+
+ event = sctp_ulpevent_new(sizeof(struct sctp_stream_change_event),
+ MSG_NOTIFICATION, gfp);
+ if (!event)
+ return NULL;
+
+ skb = sctp_event2skb(event);
+ schange = (struct sctp_stream_change_event *)
+ skb_put(skb, sizeof(struct sctp_stream_change_event));
+
+ schange->strchange_type = SCTP_STREAM_CHANGE_EVENT;
+ schange->strchange_flags = flags;
+ schange->strchange_length = sizeof(struct sctp_stream_change_event);
+ sctp_ulpevent_set_owner(event, asoc);
+ schange->strchange_assoc_id = sctp_assoc2id(asoc);
+ schange->strchange_instrms = strchange_instrms;
+ schange->strchange_outstrms = strchange_outstrms;
+
+ return event;
+}
+
/* Return the notification type, assuming this is a notification
* event.
*/