summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/bridge/netfilter/ebtables.c11
-rw-r--r--net/core/ethtool.c1
-rw-r--r--net/core/fib_rules.c495
-rw-r--r--net/core/filter.c1
-rw-r--r--net/core/neighbour.c8
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/decnet/dn_rules.c7
-rw-r--r--net/ife/ife.c38
-rw-r--r--net/ipv4/fib_rules.c7
-rw-r--r--net/ipv4/ip_output.c41
-rw-r--r--net/ipv4/ipconfig.c151
-rw-r--r--net/ipv4/ipmr.c3
-rw-r--r--net/ipv4/tcp_input.c9
-rw-r--r--net/ipv4/tcp_output.c26
-rw-r--r--net/ipv4/udp.c81
-rw-r--r--net/ipv4/udp_offload.c67
-rw-r--r--net/ipv6/addrconf.c23
-rw-r--r--net/ipv6/fib6_rules.c7
-rw-r--r--net/ipv6/ip6_offload.c6
-rw-r--r--net/ipv6/ip6_output.c45
-rw-r--r--net/ipv6/ip6mr.c3
-rw-r--r--net/ipv6/netfilter/Kconfig55
-rw-r--r--net/ipv6/route.c18
-rw-r--r--net/ipv6/seg6_iptunnel.c26
-rw-r--r--net/ipv6/sysctl_net_ipv6.c8
-rw-r--r--net/ipv6/udp.c31
-rw-r--r--net/ipv6/udp_offload.c19
-rw-r--r--net/l2tp/l2tp_debugfs.c5
-rw-r--r--net/l2tp/l2tp_ppp.c12
-rw-r--r--net/llc/af_llc.c21
-rw-r--r--net/llc/llc_c_ac.c9
-rw-r--r--net/llc/llc_conn.c22
-rw-r--r--net/netfilter/Kconfig1
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c155
-rw-r--r--net/netfilter/nf_conntrack_expect.c5
-rw-r--r--net/netfilter/nf_conntrack_extend.c2
-rw-r--r--net/netfilter/nf_conntrack_sip.c16
-rw-r--r--net/netfilter/nf_tables_api.c69
-rw-r--r--net/netfilter/xt_connmark.c49
-rw-r--r--net/packet/af_packet.c60
-rw-r--r--net/packet/internal.h10
-rw-r--r--net/rds/ib_cm.c3
-rw-r--r--net/sched/act_ife.c9
-rw-r--r--net/sctp/associola.c25
-rw-r--r--net/sctp/outqueue.c48
-rw-r--r--net/sctp/sm_make_chunk.c9
-rw-r--r--net/smc/af_smc.c4
-rw-r--r--net/strparser/strparser.c2
-rw-r--r--net/tls/tls_sw.c3
50 files changed, 1100 insertions, 636 deletions
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 032e0fe45940..28a4c3490359 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1825,13 +1825,14 @@ static int compat_table_info(const struct ebt_table_info *info,
{
unsigned int size = info->entries_size;
const void *entries = info->entries;
- int ret;
newinfo->entries_size = size;
-
- ret = xt_compat_init_offsets(NFPROTO_BRIDGE, info->nentries);
- if (ret)
- return ret;
+ if (info->nentries) {
+ int ret = xt_compat_init_offsets(NFPROTO_BRIDGE,
+ info->nentries);
+ if (ret)
+ return ret;
+ }
return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info,
entries, newinfo);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 03416e6dd5d7..4650fd6d678c 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -92,6 +92,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial",
[NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation",
[NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation",
+ [NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation",
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 33958f84c173..126ffc5bc630 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -387,247 +387,304 @@ unsigned int fib_rules_seq_read(struct net *net, int family)
}
EXPORT_SYMBOL_GPL(fib_rules_seq_read);
-static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
- struct fib_rules_ops *ops)
-{
- int err = -EINVAL;
-
- if (frh->src_len)
- if (tb[FRA_SRC] == NULL ||
- frh->src_len > (ops->addr_size * 8) ||
- nla_len(tb[FRA_SRC]) != ops->addr_size)
- goto errout;
-
- if (frh->dst_len)
- if (tb[FRA_DST] == NULL ||
- frh->dst_len > (ops->addr_size * 8) ||
- nla_len(tb[FRA_DST]) != ops->addr_size)
- goto errout;
-
- err = 0;
-errout:
- return err;
-}
-
-static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
- struct nlattr **tb, struct fib_rule *rule)
+static struct fib_rule *rule_find(struct fib_rules_ops *ops,
+ struct fib_rule_hdr *frh,
+ struct nlattr **tb,
+ struct fib_rule *rule,
+ bool user_priority)
{
struct fib_rule *r;
list_for_each_entry(r, &ops->rules_list, list) {
- if (r->action != rule->action)
+ if (rule->action && r->action != rule->action)
continue;
- if (r->table != rule->table)
+ if (rule->table && r->table != rule->table)
continue;
- if (r->pref != rule->pref)
+ if (user_priority && r->pref != rule->pref)
continue;
- if (memcmp(r->iifname, rule->iifname, IFNAMSIZ))
+ if (rule->iifname[0] &&
+ memcmp(r->iifname, rule->iifname, IFNAMSIZ))
continue;
- if (memcmp(r->oifname, rule->oifname, IFNAMSIZ))
+ if (rule->oifname[0] &&
+ memcmp(r->oifname, rule->oifname, IFNAMSIZ))
continue;
- if (r->mark != rule->mark)
+ if (rule->mark && r->mark != rule->mark)
continue;
- if (r->mark_mask != rule->mark_mask)
+ if (rule->mark_mask && r->mark_mask != rule->mark_mask)
continue;
- if (r->tun_id != rule->tun_id)
+ if (rule->tun_id && r->tun_id != rule->tun_id)
continue;
if (r->fr_net != rule->fr_net)
continue;
- if (r->l3mdev != rule->l3mdev)
+ if (rule->l3mdev && r->l3mdev != rule->l3mdev)
continue;
- if (!uid_eq(r->uid_range.start, rule->uid_range.start) ||
- !uid_eq(r->uid_range.end, rule->uid_range.end))
+ if (uid_range_set(&rule->uid_range) &&
+ (!uid_eq(r->uid_range.start, rule->uid_range.start) ||
+ !uid_eq(r->uid_range.end, rule->uid_range.end)))
continue;
- if (r->ip_proto != rule->ip_proto)
+ if (rule->ip_proto && r->ip_proto != rule->ip_proto)
continue;
- if (!fib_rule_port_range_compare(&r->sport_range,
+ if (fib_rule_port_range_set(&rule->sport_range) &&
+ !fib_rule_port_range_compare(&r->sport_range,
&rule->sport_range))
continue;
- if (!fib_rule_port_range_compare(&r->dport_range,
+ if (fib_rule_port_range_set(&rule->dport_range) &&
+ !fib_rule_port_range_compare(&r->dport_range,
&rule->dport_range))
continue;
if (!ops->compare(r, frh, tb))
continue;
- return 1;
+ return r;
+ }
+
+ return NULL;
+}
+
+#ifdef CONFIG_NET_L3_MASTER_DEV
+static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule,
+ struct netlink_ext_ack *extack)
+{
+ nlrule->l3mdev = nla_get_u8(nla);
+ if (nlrule->l3mdev != 1) {
+ NL_SET_ERR_MSG(extack, "Invalid l3mdev attribute");
+ return -1;
}
+
return 0;
}
+#else
+static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule,
+ struct netlink_ext_ack *extack)
+{
+ NL_SET_ERR_MSG(extack, "l3mdev support is not enabled in kernel");
+ return -1;
+}
+#endif
-int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
+static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack,
+ struct fib_rules_ops *ops,
+ struct nlattr *tb[],
+ struct fib_rule **rule,
+ bool *user_priority)
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
- struct fib_rules_ops *ops = NULL;
- struct fib_rule *rule, *r, *last = NULL;
- struct nlattr *tb[FRA_MAX+1];
- int err = -EINVAL, unresolved = 0;
-
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
- goto errout;
+ struct fib_rule *nlrule = NULL;
+ int err = -EINVAL;
- ops = lookup_rules_ops(net, frh->family);
- if (ops == NULL) {
- err = -EAFNOSUPPORT;
- goto errout;
+ if (frh->src_len)
+ if (!tb[FRA_SRC] ||
+ frh->src_len > (ops->addr_size * 8) ||
+ nla_len(tb[FRA_SRC]) != ops->addr_size) {
+ NL_SET_ERR_MSG(extack, "Invalid source address");
+ goto errout;
}
- err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack);
- if (err < 0)
- goto errout;
-
- err = validate_rulemsg(frh, tb, ops);
- if (err < 0)
- goto errout;
+ if (frh->dst_len)
+ if (!tb[FRA_DST] ||
+ frh->dst_len > (ops->addr_size * 8) ||
+ nla_len(tb[FRA_DST]) != ops->addr_size) {
+ NL_SET_ERR_MSG(extack, "Invalid dst address");
+ goto errout;
+ }
- rule = kzalloc(ops->rule_size, GFP_KERNEL);
- if (rule == NULL) {
+ nlrule = kzalloc(ops->rule_size, GFP_KERNEL);
+ if (!nlrule) {
err = -ENOMEM;
goto errout;
}
- refcount_set(&rule->refcnt, 1);
- rule->fr_net = net;
+ refcount_set(&nlrule->refcnt, 1);
+ nlrule->fr_net = net;
- rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY])
- : fib_default_rule_pref(ops);
+ if (tb[FRA_PRIORITY]) {
+ nlrule->pref = nla_get_u32(tb[FRA_PRIORITY]);
+ *user_priority = true;
+ } else {
+ nlrule->pref = fib_default_rule_pref(ops);
+ }
- rule->proto = tb[FRA_PROTOCOL] ?
+ nlrule->proto = tb[FRA_PROTOCOL] ?
nla_get_u8(tb[FRA_PROTOCOL]) : RTPROT_UNSPEC;
if (tb[FRA_IIFNAME]) {
struct net_device *dev;
- rule->iifindex = -1;
- nla_strlcpy(rule->iifname, tb[FRA_IIFNAME], IFNAMSIZ);
- dev = __dev_get_by_name(net, rule->iifname);
+ nlrule->iifindex = -1;
+ nla_strlcpy(nlrule->iifname, tb[FRA_IIFNAME], IFNAMSIZ);
+ dev = __dev_get_by_name(net, nlrule->iifname);
if (dev)
- rule->iifindex = dev->ifindex;
+ nlrule->iifindex = dev->ifindex;
}
if (tb[FRA_OIFNAME]) {
struct net_device *dev;
- rule->oifindex = -1;
- nla_strlcpy(rule->oifname, tb[FRA_OIFNAME], IFNAMSIZ);
- dev = __dev_get_by_name(net, rule->oifname);
+ nlrule->oifindex = -1;
+ nla_strlcpy(nlrule->oifname, tb[FRA_OIFNAME], IFNAMSIZ);
+ dev = __dev_get_by_name(net, nlrule->oifname);
if (dev)
- rule->oifindex = dev->ifindex;
+ nlrule->oifindex = dev->ifindex;
}
if (tb[FRA_FWMARK]) {
- rule->mark = nla_get_u32(tb[FRA_FWMARK]);
- if (rule->mark)
+ nlrule->mark = nla_get_u32(tb[FRA_FWMARK]);
+ if (nlrule->mark)
/* compatibility: if the mark value is non-zero all bits
* are compared unless a mask is explicitly specified.
*/
- rule->mark_mask = 0xFFFFFFFF;
+ nlrule->mark_mask = 0xFFFFFFFF;
}
if (tb[FRA_FWMASK])
- rule->mark_mask = nla_get_u32(tb[FRA_FWMASK]);
+ nlrule->mark_mask = nla_get_u32(tb[FRA_FWMASK]);
if (tb[FRA_TUN_ID])
- rule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);
+ nlrule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);
err = -EINVAL;
- if (tb[FRA_L3MDEV]) {
-#ifdef CONFIG_NET_L3_MASTER_DEV
- rule->l3mdev = nla_get_u8(tb[FRA_L3MDEV]);
- if (rule->l3mdev != 1)
-#endif
- goto errout_free;
- }
+ if (tb[FRA_L3MDEV] &&
+ fib_nl2rule_l3mdev(tb[FRA_L3MDEV], nlrule, extack) < 0)
+ goto errout_free;
- rule->action = frh->action;
- rule->flags = frh->flags;
- rule->table = frh_get_table(frh, tb);
+ nlrule->action = frh->action;
+ nlrule->flags = frh->flags;
+ nlrule->table = frh_get_table(frh, tb);
if (tb[FRA_SUPPRESS_PREFIXLEN])
- rule->suppress_prefixlen = nla_get_u32(tb[FRA_SUPPRESS_PREFIXLEN]);
+ nlrule->suppress_prefixlen = nla_get_u32(tb[FRA_SUPPRESS_PREFIXLEN]);
else
- rule->suppress_prefixlen = -1;
+ nlrule->suppress_prefixlen = -1;
if (tb[FRA_SUPPRESS_IFGROUP])
- rule->suppress_ifgroup = nla_get_u32(tb[FRA_SUPPRESS_IFGROUP]);
+ nlrule->suppress_ifgroup = nla_get_u32(tb[FRA_SUPPRESS_IFGROUP]);
else
- rule->suppress_ifgroup = -1;
+ nlrule->suppress_ifgroup = -1;
if (tb[FRA_GOTO]) {
- if (rule->action != FR_ACT_GOTO)
+ if (nlrule->action != FR_ACT_GOTO) {
+ NL_SET_ERR_MSG(extack, "Unexpected goto");
goto errout_free;
+ }
- rule->target = nla_get_u32(tb[FRA_GOTO]);
+ nlrule->target = nla_get_u32(tb[FRA_GOTO]);
/* Backward jumps are prohibited to avoid endless loops */
- if (rule->target <= rule->pref)
+ if (nlrule->target <= nlrule->pref) {
+ NL_SET_ERR_MSG(extack, "Backward goto not supported");
goto errout_free;
-
- list_for_each_entry(r, &ops->rules_list, list) {
- if (r->pref == rule->target) {
- RCU_INIT_POINTER(rule->ctarget, r);
- break;
- }
}
-
- if (rcu_dereference_protected(rule->ctarget, 1) == NULL)
- unresolved = 1;
- } else if (rule->action == FR_ACT_GOTO)
+ } else if (nlrule->action == FR_ACT_GOTO) {
+ NL_SET_ERR_MSG(extack, "Missing goto target for action goto");
goto errout_free;
+ }
- if (rule->l3mdev && rule->table)
+ if (nlrule->l3mdev && nlrule->table) {
+ NL_SET_ERR_MSG(extack, "l3mdev and table are mutually exclusive");
goto errout_free;
+ }
if (tb[FRA_UID_RANGE]) {
if (current_user_ns() != net->user_ns) {
err = -EPERM;
+ NL_SET_ERR_MSG(extack, "No permission to set uid");
goto errout_free;
}
- rule->uid_range = nla_get_kuid_range(tb);
+ nlrule->uid_range = nla_get_kuid_range(tb);
- if (!uid_range_set(&rule->uid_range) ||
- !uid_lte(rule->uid_range.start, rule->uid_range.end))
+ if (!uid_range_set(&nlrule->uid_range) ||
+ !uid_lte(nlrule->uid_range.start, nlrule->uid_range.end)) {
+ NL_SET_ERR_MSG(extack, "Invalid uid range");
goto errout_free;
+ }
} else {
- rule->uid_range = fib_kuid_range_unset;
+ nlrule->uid_range = fib_kuid_range_unset;
}
if (tb[FRA_IP_PROTO])
- rule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]);
+ nlrule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]);
if (tb[FRA_SPORT_RANGE]) {
err = nla_get_port_range(tb[FRA_SPORT_RANGE],
- &rule->sport_range);
- if (err)
+ &nlrule->sport_range);
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Invalid sport range");
goto errout_free;
+ }
}
if (tb[FRA_DPORT_RANGE]) {
err = nla_get_port_range(tb[FRA_DPORT_RANGE],
- &rule->dport_range);
- if (err)
+ &nlrule->dport_range);
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Invalid dport range");
goto errout_free;
+ }
}
+ *rule = nlrule;
+
+ return 0;
+
+errout_free:
+ kfree(nlrule);
+errout:
+ return err;
+}
+
+int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct fib_rule_hdr *frh = nlmsg_data(nlh);
+ struct fib_rules_ops *ops = NULL;
+ struct fib_rule *rule = NULL, *r, *last = NULL;
+ struct nlattr *tb[FRA_MAX + 1];
+ int err = -EINVAL, unresolved = 0;
+ bool user_priority = false;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
+ NL_SET_ERR_MSG(extack, "Invalid msg length");
+ goto errout;
+ }
+
+ ops = lookup_rules_ops(net, frh->family);
+ if (!ops) {
+ err = -EAFNOSUPPORT;
+ NL_SET_ERR_MSG(extack, "Rule family not supported");
+ goto errout;
+ }
+
+ err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack);
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack, "Error parsing msg");
+ goto errout;
+ }
+
+ err = fib_nl2rule(skb, nlh, extack, ops, tb, &rule, &user_priority);
+ if (err)
+ goto errout;
+
if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
- rule_exists(ops, frh, tb, rule)) {
+ rule_find(ops, frh, tb, rule, user_priority)) {
err = -EEXIST;
goto errout_free;
}
- err = ops->configure(rule, skb, frh, tb);
+ err = ops->configure(rule, skb, frh, tb, extack);
if (err < 0)
goto errout_free;
@@ -637,6 +694,16 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout_free;
list_for_each_entry(r, &ops->rules_list, list) {
+ if (r->pref == rule->target) {
+ RCU_INIT_POINTER(rule->ctarget, r);
+ break;
+ }
+ }
+
+ if (rcu_dereference_protected(rule->ctarget, 1) == NULL)
+ unresolved = 1;
+
+ list_for_each_entry(r, &ops->rules_list, list) {
if (r->pref > rule->pref)
break;
last = r;
@@ -690,171 +757,97 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
- struct fib_rule_port_range sprange = {0, 0};
- struct fib_rule_port_range dprange = {0, 0};
struct fib_rules_ops *ops = NULL;
- struct fib_rule *rule, *r;
+ struct fib_rule *rule = NULL, *r, *nlrule = NULL;
struct nlattr *tb[FRA_MAX+1];
- struct fib_kuid_range range;
int err = -EINVAL;
+ bool user_priority = false;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
+ NL_SET_ERR_MSG(extack, "Invalid msg length");
goto errout;
+ }
ops = lookup_rules_ops(net, frh->family);
if (ops == NULL) {
err = -EAFNOSUPPORT;
+ NL_SET_ERR_MSG(extack, "Rule family not supported");
goto errout;
}
err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack);
- if (err < 0)
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack, "Error parsing msg");
goto errout;
+ }
- err = validate_rulemsg(frh, tb, ops);
- if (err < 0)
+ err = fib_nl2rule(skb, nlh, extack, ops, tb, &nlrule, &user_priority);
+ if (err)
goto errout;
- if (tb[FRA_UID_RANGE]) {
- range = nla_get_kuid_range(tb);
- if (!uid_range_set(&range)) {
- err = -EINVAL;
- goto errout;
- }
- } else {
- range = fib_kuid_range_unset;
+ rule = rule_find(ops, frh, tb, nlrule, user_priority);
+ if (!rule) {
+ err = -ENOENT;
+ goto errout;
}
- if (tb[FRA_SPORT_RANGE]) {
- err = nla_get_port_range(tb[FRA_SPORT_RANGE],
- &sprange);
- if (err)
- goto errout;
+ if (rule->flags & FIB_RULE_PERMANENT) {
+ err = -EPERM;
+ goto errout;
}
- if (tb[FRA_DPORT_RANGE]) {
- err = nla_get_port_range(tb[FRA_DPORT_RANGE],
- &dprange);
+ if (ops->delete) {
+ err = ops->delete(rule);
if (err)
goto errout;
}
- list_for_each_entry(rule, &ops->rules_list, list) {
- if (tb[FRA_PROTOCOL] &&
- (rule->proto != nla_get_u8(tb[FRA_PROTOCOL])))
- continue;
-
- if (frh->action && (frh->action != rule->action))
- continue;
-
- if (frh_get_table(frh, tb) &&
- (frh_get_table(frh, tb) != rule->table))
- continue;
-
- if (tb[FRA_PRIORITY] &&
- (rule->pref != nla_get_u32(tb[FRA_PRIORITY])))
- continue;
-
- if (tb[FRA_IIFNAME] &&
- nla_strcmp(tb[FRA_IIFNAME], rule->iifname))
- continue;
-
- if (tb[FRA_OIFNAME] &&
- nla_strcmp(tb[FRA_OIFNAME], rule->oifname))
- continue;
-
- if (tb[FRA_FWMARK] &&
- (rule->mark != nla_get_u32(tb[FRA_FWMARK])))
- continue;
-
- if (tb[FRA_FWMASK] &&
- (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK])))
- continue;
-
- if (tb[FRA_TUN_ID] &&
- (rule->tun_id != nla_get_be64(tb[FRA_TUN_ID])))
- continue;
-
- if (tb[FRA_L3MDEV] &&
- (rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV])))
- continue;
-
- if (uid_range_set(&range) &&
- (!uid_eq(rule->uid_range.start, range.start) ||
- !uid_eq(rule->uid_range.end, range.end)))
- continue;
-
- if (tb[FRA_IP_PROTO] &&
- (rule->ip_proto != nla_get_u8(tb[FRA_IP_PROTO])))
- continue;
-
- if (fib_rule_port_range_set(&sprange) &&
- !fib_rule_port_range_compare(&rule->sport_range, &sprange))
- continue;
-
- if (fib_rule_port_range_set(&dprange) &&
- !fib_rule_port_range_compare(&rule->dport_range, &dprange))
- continue;
-
- if (!ops->compare(rule, frh, tb))
- continue;
-
- if (rule->flags & FIB_RULE_PERMANENT) {
- err = -EPERM;
- goto errout;
- }
-
- if (ops->delete) {
- err = ops->delete(rule);
- if (err)
- goto errout;
- }
+ if (rule->tun_id)
+ ip_tunnel_unneed_metadata();
- if (rule->tun_id)
- ip_tunnel_unneed_metadata();
+ list_del_rcu(&rule->list);
- list_del_rcu(&rule->list);
-
- if (rule->action == FR_ACT_GOTO) {
- ops->nr_goto_rules--;
- if (rtnl_dereference(rule->ctarget) == NULL)
- ops->unresolved_rules--;
- }
+ if (rule->action == FR_ACT_GOTO) {
+ ops->nr_goto_rules--;
+ if (rtnl_dereference(rule->ctarget) == NULL)
+ ops->unresolved_rules--;
+ }
- /*
- * Check if this rule is a target to any of them. If so,
- * adjust to the next one with the same preference or
- * disable them. As this operation is eventually very
- * expensive, it is only performed if goto rules, except
- * current if it is goto rule, have actually been added.
- */
- if (ops->nr_goto_rules > 0) {
- struct fib_rule *n;
-
- n = list_next_entry(rule, list);
- if (&n->list == &ops->rules_list || n->pref != rule->pref)
- n = NULL;
- list_for_each_entry(r, &ops->rules_list, list) {
- if (rtnl_dereference(r->ctarget) != rule)
- continue;
- rcu_assign_pointer(r->ctarget, n);
- if (!n)
- ops->unresolved_rules++;
- }
+ /*
+ * Check if this rule is a target to any of them. If so,
+ * adjust to the next one with the same preference or
+ * disable them. As this operation is eventually very
+ * expensive, it is only performed if goto rules, except
+ * current if it is goto rule, have actually been added.
+ */
+ if (ops->nr_goto_rules > 0) {
+ struct fib_rule *n;
+
+ n = list_next_entry(rule, list);
+ if (&n->list == &ops->rules_list || n->pref != rule->pref)
+ n = NULL;
+ list_for_each_entry(r, &ops->rules_list, list) {
+ if (rtnl_dereference(r->ctarget) != rule)
+ continue;
+ rcu_assign_pointer(r->ctarget, n);
+ if (!n)
+ ops->unresolved_rules++;
}
-
- call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops,
- NULL);
- notify_rule_change(RTM_DELRULE, rule, ops, nlh,
- NETLINK_CB(skb).portid);
- fib_rule_put(rule);
- flush_route_cache(ops);
- rules_ops_put(ops);
- return 0;
}
- err = -ENOENT;
+ call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops,
+ NULL);
+ notify_rule_change(RTM_DELRULE, rule, ops, nlh,
+ NETLINK_CB(skb).portid);
+ fib_rule_put(rule);
+ flush_route_cache(ops);
+ rules_ops_put(ops);
+ kfree(nlrule);
+ return 0;
+
errout:
+ if (nlrule)
+ kfree(nlrule);
rules_ops_put(ops);
return err;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 8e45c6c7ab08..d3781daa26ab 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3281,6 +3281,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
skb_dst_set(skb, (struct dst_entry *) md);
info = &md->u.tun_info;
+ memset(info, 0, sizeof(*info));
info->mode = IP_TUNNEL_INFO_TX;
info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index ce519861be59..5afae29367c1 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -820,7 +820,8 @@ static void neigh_periodic_work(struct work_struct *work)
write_lock(&n->lock);
state = n->nud_state;
- if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
+ if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
+ (n->flags & NTF_EXT_LEARNED)) {
write_unlock(&n->lock);
goto next_elt;
}
@@ -1136,6 +1137,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
if (neigh->dead)
goto out;
+ neigh_update_ext_learned(neigh, flags, &notify);
+
if (!(new & NUD_VALID)) {
neigh_del_timer(neigh);
if (old & NUD_CONNECTED)
@@ -1781,6 +1784,9 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
flags &= ~NEIGH_UPDATE_F_OVERRIDE;
}
+ if (ndm->ndm_flags & NTF_EXT_LEARNED)
+ flags |= NEIGH_UPDATE_F_EXT_LEARNED;
+
if (ndm->ndm_flags & NTF_USE) {
neigh_event_send(neigh, NULL);
err = 0;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ff49e352deea..c647cfe114e0 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4940,6 +4940,8 @@ static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
thlen = tcp_hdrlen(skb);
} else if (unlikely(skb_is_gso_sctp(skb))) {
thlen = sizeof(struct sctphdr);
+ } else if (shinfo->gso_type & SKB_GSO_UDP_L4) {
+ thlen = sizeof(struct udphdr);
}
/* UFO sets gso_size to the size of the fragmentation
* payload, i.e. the size of the L4 (UDP) header is already
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index c795c3f509c9..72236695db3d 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -121,13 +121,16 @@ static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
- struct nlattr **tb)
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
{
int err = -EINVAL;
struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
- if (frh->tos)
+ if (frh->tos) {
+ NL_SET_ERR_MSG(extack, "Invalid tos value");
goto errout;
+ }
if (rule->table == RT_TABLE_UNSPEC) {
if (rule->action == FR_ACT_TO_TBL) {
diff --git a/net/ife/ife.c b/net/ife/ife.c
index 7d1ec76e7f43..13bbf8cb6a39 100644
--- a/net/ife/ife.c
+++ b/net/ife/ife.c
@@ -69,6 +69,9 @@ void *ife_decode(struct sk_buff *skb, u16 *metalen)
int total_pull;
u16 ifehdrln;
+ if (!pskb_may_pull(skb, skb->dev->hard_header_len + IFE_METAHDRLEN))
+ return NULL;
+
ifehdr = (struct ifeheadr *) (skb->data + skb->dev->hard_header_len);
ifehdrln = ntohs(ifehdr->metalen);
total_pull = skb->dev->hard_header_len + ifehdrln;
@@ -92,12 +95,43 @@ struct meta_tlvhdr {
__be16 len;
};
+static bool __ife_tlv_meta_valid(const unsigned char *skbdata,
+ const unsigned char *ifehdr_end)
+{
+ const struct meta_tlvhdr *tlv;
+ u16 tlvlen;
+
+ if (unlikely(skbdata + sizeof(*tlv) > ifehdr_end))
+ return false;
+
+ tlv = (const struct meta_tlvhdr *)skbdata;
+ tlvlen = ntohs(tlv->len);
+
+ /* tlv length field is inc header, check on minimum */
+ if (tlvlen < NLA_HDRLEN)
+ return false;
+
+ /* overflow by NLA_ALIGN check */
+ if (NLA_ALIGN(tlvlen) < tlvlen)
+ return false;
+
+ if (unlikely(skbdata + NLA_ALIGN(tlvlen) > ifehdr_end))
+ return false;
+
+ return true;
+}
+
/* Caller takes care of presenting data in network order
*/
-void *ife_tlv_meta_decode(void *skbdata, u16 *attrtype, u16 *dlen, u16 *totlen)
+void *ife_tlv_meta_decode(void *skbdata, const void *ifehdr_end, u16 *attrtype,
+ u16 *dlen, u16 *totlen)
{
- struct meta_tlvhdr *tlv = (struct meta_tlvhdr *) skbdata;
+ struct meta_tlvhdr *tlv;
+
+ if (!__ife_tlv_meta_valid(skbdata, ifehdr_end))
+ return NULL;
+ tlv = (struct meta_tlvhdr *)skbdata;
*dlen = ntohs(tlv->len) - NLA_HDRLEN;
*attrtype = ntohs(tlv->type);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 737d11bc8838..f8eb78d042a4 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -213,14 +213,17 @@ static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = {
static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
- struct nlattr **tb)
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
int err = -EINVAL;
struct fib4_rule *rule4 = (struct fib4_rule *) rule;
- if (frh->tos & ~IPTOS_TOS_MASK)
+ if (frh->tos & ~IPTOS_TOS_MASK) {
+ NL_SET_ERR_MSG(extack, "Invalid tos");
goto errout;
+ }
/* split local/main if they are not already split */
err = fib_unmerge(net);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 83c73bab2c3d..f2338e40c37d 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -878,11 +878,14 @@ static int __ip_append_data(struct sock *sk,
struct rtable *rt = (struct rtable *)cork->dst;
unsigned int wmem_alloc_delta = 0;
u32 tskey = 0;
+ bool paged;
skb = skb_peek_tail(queue);
exthdrlen = !skb ? rt->dst.header_len : 0;
- mtu = cork->fragsize;
+ mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
+ paged = !!cork->gso_size;
+
if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
tskey = sk->sk_tskey++;
@@ -906,7 +909,7 @@ static int __ip_append_data(struct sock *sk,
if (transhdrlen &&
length + fragheaderlen <= mtu &&
rt->dst.dev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM) &&
- !(flags & MSG_MORE) &&
+ (!(flags & MSG_MORE) || cork->gso_size) &&
!exthdrlen)
csummode = CHECKSUM_PARTIAL;
@@ -933,6 +936,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
+ unsigned int pagedlen = 0;
struct sk_buff *skb_prev;
alloc_new_skb:
skb_prev = skb;
@@ -953,8 +957,12 @@ alloc_new_skb:
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
- else
+ else if (!paged)
alloclen = fraglen;
+ else {
+ alloclen = min_t(int, fraglen, MAX_HEADER);
+ pagedlen = fraglen - alloclen;
+ }
alloclen += exthdrlen;
@@ -998,7 +1006,7 @@ alloc_new_skb:
/*
* Find where to start putting bytes.
*/
- data = skb_put(skb, fraglen + exthdrlen);
+ data = skb_put(skb, fraglen + exthdrlen - pagedlen);
skb_set_network_header(skb, exthdrlen);
skb->transport_header = (skb->network_header +
fragheaderlen);
@@ -1014,7 +1022,7 @@ alloc_new_skb:
pskb_trim_unique(skb_prev, maxfraglen);
}
- copy = datalen - transhdrlen - fraggap;
+ copy = datalen - transhdrlen - fraggap - pagedlen;
if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
@@ -1022,7 +1030,7 @@ alloc_new_skb:
}
offset += copy;
- length -= datalen - fraggap;
+ length -= copy + transhdrlen;
transhdrlen = 0;
exthdrlen = 0;
csummode = CHECKSUM_NONE;
@@ -1135,6 +1143,8 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
*rtp = NULL;
cork->fragsize = ip_sk_use_pmtu(sk) ?
dst_mtu(&rt->dst) : rt->dst.dev->mtu;
+
+ cork->gso_size = sk->sk_type == SOCK_DGRAM ? ipc->gso_size : 0;
cork->dst = &rt->dst;
cork->length = 0;
cork->ttl = ipc->ttl;
@@ -1214,7 +1224,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
return -EOPNOTSUPP;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
- mtu = cork->fragsize;
+ mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
@@ -1470,9 +1480,8 @@ struct sk_buff *ip_make_skb(struct sock *sk,
int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
struct ipcm_cookie *ipc, struct rtable **rtp,
- unsigned int flags)
+ struct inet_cork *cork, unsigned int flags)
{
- struct inet_cork cork;
struct sk_buff_head queue;
int err;
@@ -1481,22 +1490,22 @@ struct sk_buff *ip_make_skb(struct sock *sk,
__skb_queue_head_init(&queue);
- cork.flags = 0;
- cork.addr = 0;
- cork.opt = NULL;
- err = ip_setup_cork(sk, &cork, ipc, rtp);
+ cork->flags = 0;
+ cork->addr = 0;
+ cork->opt = NULL;
+ err = ip_setup_cork(sk, cork, ipc, rtp);
if (err)
return ERR_PTR(err);
- err = __ip_append_data(sk, fl4, &queue, &cork,
+ err = __ip_append_data(sk, fl4, &queue, cork,
&current->task_frag, getfrag,
from, length, transhdrlen, flags);
if (err) {
- __ip_flush_pending_frames(sk, &queue, &cork);
+ __ip_flush_pending_frames(sk, &queue, cork);
return ERR_PTR(err);
}
- return __ip_make_skb(sk, fl4, &queue, &cork);
+ return __ip_make_skb(sk, fl4, &queue, cork);
}
/*
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 43f620feb1c4..d839d74853fc 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -28,6 +28,9 @@
*
* Multiple Nameservers in /proc/net/pnp
* -- Josef Siemes <jsiemes@web.de>, Aug 2002
+ *
+ * NTP servers in /proc/net/ipconfig/ntp_servers
+ * -- Chris Novakovic <chris@chrisn.me.uk>, April 2018
*/
#include <linux/types.h>
@@ -93,6 +96,7 @@
#define CONF_TIMEOUT_MAX (HZ*30) /* Maximum allowed timeout */
#define CONF_NAMESERVERS_MAX 3 /* Maximum number of nameservers
- '3' from resolv.h */
+#define CONF_NTP_SERVERS_MAX 3 /* Maximum number of NTP servers */
#define NONE cpu_to_be32(INADDR_NONE)
#define ANY cpu_to_be32(INADDR_ANY)
@@ -152,12 +156,16 @@ static int ic_proto_used; /* Protocol used, if any */
#define ic_proto_used 0
#endif
static __be32 ic_nameservers[CONF_NAMESERVERS_MAX]; /* DNS Server IP addresses */
+static __be32 ic_ntp_servers[CONF_NTP_SERVERS_MAX]; /* NTP server IP addresses */
static u8 ic_domain[64]; /* DNS (not NIS) domain name */
/*
* Private state.
*/
+/* proc_dir_entry for /proc/net/ipconfig */
+static struct proc_dir_entry *ipconfig_dir;
+
/* Name of user-selected boot device */
static char user_dev_name[IFNAMSIZ] __initdata = { 0, };
@@ -576,6 +584,15 @@ static inline void __init ic_nameservers_predef(void)
ic_nameservers[i] = NONE;
}
+/* Predefine NTP servers */
+static inline void __init ic_ntp_servers_predef(void)
+{
+ int i;
+
+ for (i = 0; i < CONF_NTP_SERVERS_MAX; i++)
+ ic_ntp_servers[i] = NONE;
+}
+
/*
* DHCP/BOOTP support.
*/
@@ -671,6 +688,7 @@ ic_dhcp_init_options(u8 *options, struct ic_device *d)
17, /* Boot path */
26, /* MTU */
40, /* NIS domain name */
+ 42, /* NTP servers */
};
*e++ = 55; /* Parameter request list */
@@ -721,9 +739,11 @@ static void __init ic_bootp_init_ext(u8 *e)
*e++ = 3; /* Default gateway request */
*e++ = 4;
e += 4;
- *e++ = 5; /* Name server request */
- *e++ = 8;
- e += 8;
+#if CONF_NAMESERVERS_MAX > 0
+ *e++ = 6; /* (DNS) name server request */
+ *e++ = 4 * CONF_NAMESERVERS_MAX;
+ e += 4 * CONF_NAMESERVERS_MAX;
+#endif
*e++ = 12; /* Host name request */
*e++ = 32;
e += 32;
@@ -748,7 +768,13 @@ static void __init ic_bootp_init_ext(u8 *e)
*/
static inline void __init ic_bootp_init(void)
{
+ /* Re-initialise all name servers and NTP servers to NONE, in case any
+ * were set via the "ip=" or "nfsaddrs=" kernel command line parameters:
+ * any IP addresses specified there will already have been decoded but
+ * are no longer needed
+ */
ic_nameservers_predef();
+ ic_ntp_servers_predef();
dev_add_pack(&bootp_packet_type);
}
@@ -912,6 +938,15 @@ static void __init ic_do_bootp_ext(u8 *ext)
ic_bootp_string(utsname()->domainname, ext+1, *ext,
__NEW_UTS_LEN);
break;
+ case 42: /* NTP servers */
+ servers = *ext / 4;
+ if (servers > CONF_NTP_SERVERS_MAX)
+ servers = CONF_NTP_SERVERS_MAX;
+ for (i = 0; i < servers; i++) {
+ if (ic_ntp_servers[i] == NONE)
+ memcpy(&ic_ntp_servers[i], ext+1+4*i, 4);
+ }
+ break;
}
}
@@ -1258,6 +1293,7 @@ static int __init ic_dynamic(void)
#ifdef CONFIG_PROC_FS
+/* Name servers: */
static int pnp_seq_show(struct seq_file *seq, void *v)
{
int i;
@@ -1294,6 +1330,62 @@ static const struct file_operations pnp_seq_fops = {
.llseek = seq_lseek,
.release = single_release,
};
+
+/* Create the /proc/net/ipconfig directory */
+static int __init ipconfig_proc_net_init(void)
+{
+ ipconfig_dir = proc_net_mkdir(&init_net, "ipconfig", init_net.proc_net);
+ if (!ipconfig_dir)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/* Create a new file under /proc/net/ipconfig */
+static int ipconfig_proc_net_create(const char *name,
+ const struct file_operations *fops)
+{
+ char *pname;
+ struct proc_dir_entry *p;
+
+ if (!ipconfig_dir)
+ return -ENOMEM;
+
+ pname = kasprintf(GFP_KERNEL, "%s%s", "ipconfig/", name);
+ if (!pname)
+ return -ENOMEM;
+
+ p = proc_create(pname, 0444, init_net.proc_net, fops);
+ kfree(pname);
+ if (!p)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/* Write NTP server IP addresses to /proc/net/ipconfig/ntp_servers */
+static int ntp_servers_seq_show(struct seq_file *seq, void *v)
+{
+ int i;
+
+ for (i = 0; i < CONF_NTP_SERVERS_MAX; i++) {
+ if (ic_ntp_servers[i] != NONE)
+ seq_printf(seq, "%pI4\n", &ic_ntp_servers[i]);
+ }
+ return 0;
+}
+
+static int ntp_servers_seq_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ntp_servers_seq_show, NULL);
+}
+
+static const struct file_operations ntp_servers_seq_fops = {
+ .open = ntp_servers_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
#endif /* CONFIG_PROC_FS */
/*
@@ -1368,8 +1460,20 @@ static int __init ip_auto_config(void)
int err;
unsigned int i;
+ /* Initialise all name servers and NTP servers to NONE (but only if the
+ * "ip=" or "nfsaddrs=" kernel command line parameters weren't decoded,
+ * otherwise we'll overwrite the IP addresses specified there)
+ */
+ if (ic_set_manually == 0) {
+ ic_nameservers_predef();
+ ic_ntp_servers_predef();
+ }
+
#ifdef CONFIG_PROC_FS
proc_create("pnp", 0444, init_net.proc_net, &pnp_seq_fops);
+
+ if (ipconfig_proc_net_init() == 0)
+ ipconfig_proc_net_create("ntp_servers", &ntp_servers_seq_fops);
#endif /* CONFIG_PROC_FS */
if (!ic_enable)
@@ -1481,16 +1585,32 @@ static int __init ip_auto_config(void)
&ic_servaddr, &root_server_addr, root_server_path);
if (ic_dev_mtu)
pr_cont(", mtu=%d", ic_dev_mtu);
- for (i = 0; i < CONF_NAMESERVERS_MAX; i++)
+ /* Name servers (if any): */
+ for (i = 0; i < CONF_NAMESERVERS_MAX; i++) {
if (ic_nameservers[i] != NONE) {
- pr_cont(" nameserver%u=%pI4",
- i, &ic_nameservers[i]);
- break;
+ if (i == 0)
+ pr_info(" nameserver%u=%pI4",
+ i, &ic_nameservers[i]);
+ else
+ pr_cont(", nameserver%u=%pI4",
+ i, &ic_nameservers[i]);
}
- for (i++; i < CONF_NAMESERVERS_MAX; i++)
- if (ic_nameservers[i] != NONE)
- pr_cont(", nameserver%u=%pI4", i, &ic_nameservers[i]);
- pr_cont("\n");
+ if (i + 1 == CONF_NAMESERVERS_MAX)
+ pr_cont("\n");
+ }
+ /* NTP servers (if any): */
+ for (i = 0; i < CONF_NTP_SERVERS_MAX; i++) {
+ if (ic_ntp_servers[i] != NONE) {
+ if (i == 0)
+ pr_info(" ntpserver%u=%pI4",
+ i, &ic_ntp_servers[i]);
+ else
+ pr_cont(", ntpserver%u=%pI4",
+ i, &ic_ntp_servers[i]);
+ }
+ if (i + 1 == CONF_NTP_SERVERS_MAX)
+ pr_cont("\n");
+ }
#endif /* !SILENT */
/*
@@ -1588,7 +1708,9 @@ static int __init ip_auto_config_setup(char *addrs)
return 1;
}
+ /* Initialise all name servers and NTP servers to NONE */
ic_nameservers_predef();
+ ic_ntp_servers_predef();
/* Parse string for static IP assignment. */
ip = addrs;
@@ -1647,6 +1769,13 @@ static int __init ip_auto_config_setup(char *addrs)
ic_nameservers[1] = NONE;
}
break;
+ case 9:
+ if (CONF_NTP_SERVERS_MAX >= 1) {
+ ic_ntp_servers[0] = in_aton(ip);
+ if (ic_ntp_servers[0] == ANY)
+ ic_ntp_servers[0] = NONE;
+ }
+ break;
}
}
ip = cp;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 2fb4de3f7f66..38e092eafc97 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -201,7 +201,8 @@ static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
};
static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
- struct fib_rule_hdr *frh, struct nlattr **tb)
+ struct fib_rule_hdr *frh, struct nlattr **tb,
+ struct netlink_ext_ack *extack)
{
return 0;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0396fb919b5d..8acbe5fd2098 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -582,6 +582,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
u32 copied;
int time;
+ trace_tcp_rcv_space_adjust(sk);
+
tcp_mstamp_refresh(tp);
time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
@@ -3887,11 +3889,8 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
int length = (th->doff << 2) - sizeof(*th);
const u8 *ptr = (const u8 *)(th + 1);
- /* If the TCP option is too short, we can short cut */
- if (length < TCPOLEN_MD5SIG)
- return NULL;
-
- while (length > 0) {
+ /* If not enough data remaining, we can short cut */
+ while (length >= TCPOLEN_MD5SIG) {
int opcode = *ptr++;
int opsize;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 383cac0ff0ec..95feffb6d53f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -585,14 +585,15 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
unsigned int remaining = MAX_TCP_OPTION_SPACE;
struct tcp_fastopen_request *fastopen = tp->fastopen_req;
+ *md5 = NULL;
#ifdef CONFIG_TCP_MD5SIG
- *md5 = tp->af_specific->md5_lookup(sk, sk);
- if (*md5) {
- opts->options |= OPTION_MD5;
- remaining -= TCPOLEN_MD5SIG_ALIGNED;
+ if (unlikely(rcu_access_pointer(tp->md5sig_info))) {
+ *md5 = tp->af_specific->md5_lookup(sk, sk);
+ if (*md5) {
+ opts->options |= OPTION_MD5;
+ remaining -= TCPOLEN_MD5SIG_ALIGNED;
+ }
}
-#else
- *md5 = NULL;
#endif
/* We always get an MSS option. The option bytes which will be seen in
@@ -720,14 +721,15 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
opts->options = 0;
+ *md5 = NULL;
#ifdef CONFIG_TCP_MD5SIG
- *md5 = tp->af_specific->md5_lookup(sk, sk);
- if (unlikely(*md5)) {
- opts->options |= OPTION_MD5;
- size += TCPOLEN_MD5SIG_ALIGNED;
+ if (unlikely(rcu_access_pointer(tp->md5sig_info))) {
+ *md5 = tp->af_specific->md5_lookup(sk, sk);
+ if (*md5) {
+ opts->options |= OPTION_MD5;
+ size += TCPOLEN_MD5SIG_ALIGNED;
+ }
}
-#else
- *md5 = NULL;
#endif
if (likely(tp->rx_opt.tstamp_ok)) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 24b5c59b1c53..794aeafeb782 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -757,7 +757,8 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
}
EXPORT_SYMBOL(udp_set_csum);
-static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
+static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4,
+ struct inet_cork *cork)
{
struct sock *sk = skb->sk;
struct inet_sock *inet = inet_sk(sk);
@@ -777,6 +778,21 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
uh->len = htons(len);
uh->check = 0;
+ if (cork->gso_size) {
+ const int hlen = skb_network_header_len(skb) +
+ sizeof(struct udphdr);
+
+ if (hlen + cork->gso_size > cork->fragsize)
+ return -EINVAL;
+ if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS)
+ return -EINVAL;
+ if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite)
+ return -EIO;
+
+ skb_shinfo(skb)->gso_size = cork->gso_size;
+ skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
+ }
+
if (is_udplite) /* UDP-Lite */
csum = udplite_csum(skb);
@@ -828,7 +844,7 @@ int udp_push_pending_frames(struct sock *sk)
if (!skb)
goto out;
- err = udp_send_skb(skb, fl4);
+ err = udp_send_skb(skb, fl4, &inet->cork.base);
out:
up->len = 0;
@@ -837,6 +853,43 @@ out:
}
EXPORT_SYMBOL(udp_push_pending_frames);
+static int __udp_cmsg_send(struct cmsghdr *cmsg, u16 *gso_size)
+{
+ switch (cmsg->cmsg_type) {
+ case UDP_SEGMENT:
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u16)))
+ return -EINVAL;
+ *gso_size = *(__u16 *)CMSG_DATA(cmsg);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size)
+{
+ struct cmsghdr *cmsg;
+ bool need_ip = false;
+ int err;
+
+ for_each_cmsghdr(cmsg, msg) {
+ if (!CMSG_OK(msg, cmsg))
+ return -EINVAL;
+
+ if (cmsg->cmsg_level != SOL_UDP) {
+ need_ip = true;
+ continue;
+ }
+
+ err = __udp_cmsg_send(cmsg, gso_size);
+ if (err)
+ return err;
+ }
+
+ return need_ip;
+}
+EXPORT_SYMBOL_GPL(udp_cmsg_send);
+
int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct inet_sock *inet = inet_sk(sk);
@@ -922,10 +975,14 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc.sockc.tsflags = sk->sk_tsflags;
ipc.addr = inet->inet_saddr;
ipc.oif = sk->sk_bound_dev_if;
+ ipc.gso_size = up->gso_size;
if (msg->msg_controllen) {
- err = ip_cmsg_send(sk, msg, &ipc, sk->sk_family == AF_INET6);
- if (unlikely(err)) {
+ err = udp_cmsg_send(sk, msg, &ipc.gso_size);
+ if (err > 0)
+ err = ip_cmsg_send(sk, msg, &ipc,
+ sk->sk_family == AF_INET6);
+ if (unlikely(err < 0)) {
kfree(ipc.opt);
return err;
}
@@ -1030,12 +1087,14 @@ back_from_confirm:
/* Lockless fast path for the non-corking case. */
if (!corkreq) {
+ struct inet_cork cork;
+
skb = ip_make_skb(sk, fl4, getfrag, msg, ulen,
sizeof(struct udphdr), &ipc, &rt,
- msg->msg_flags);
+ &cork, msg->msg_flags);
err = PTR_ERR(skb);
if (!IS_ERR_OR_NULL(skb))
- err = udp_send_skb(skb, fl4);
+ err = udp_send_skb(skb, fl4, &cork);
goto out;
}
@@ -2365,6 +2424,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
up->no_check6_rx = valbool;
break;
+ case UDP_SEGMENT:
+ if (val < 0 || val > USHRT_MAX)
+ return -EINVAL;
+ up->gso_size = val;
+ break;
+
/*
* UDP-Lite's partial checksum coverage (RFC 3828).
*/
@@ -2455,6 +2520,10 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
val = up->no_check6_rx;
break;
+ case UDP_SEGMENT:
+ val = up->gso_size;
+ break;
+
/* The following two cannot be changed on UDP sockets, the return is
* always 0 (which corresponds to the full checksum coverage of UDP). */
case UDPLITE_SEND_CSCOV:
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index ea6e6e7df0ee..dc5158cba66e 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -187,6 +187,68 @@ out_unlock:
}
EXPORT_SYMBOL(skb_udp_tunnel_segment);
+struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
+ netdev_features_t features,
+ unsigned int mss, __sum16 check)
+{
+ struct sock *sk = gso_skb->sk;
+ unsigned int sum_truesize = 0;
+ struct sk_buff *segs, *seg;
+ unsigned int hdrlen;
+ struct udphdr *uh;
+
+ if (gso_skb->len <= sizeof(*uh) + mss)
+ return ERR_PTR(-EINVAL);
+
+ hdrlen = gso_skb->data - skb_mac_header(gso_skb);
+ skb_pull(gso_skb, sizeof(*uh));
+
+ /* clear destructor to avoid skb_segment assigning it to tail */
+ WARN_ON_ONCE(gso_skb->destructor != sock_wfree);
+ gso_skb->destructor = NULL;
+
+ segs = skb_segment(gso_skb, features);
+ if (unlikely(IS_ERR_OR_NULL(segs))) {
+ gso_skb->destructor = sock_wfree;
+ return segs;
+ }
+
+ for (seg = segs; seg; seg = seg->next) {
+ uh = udp_hdr(seg);
+ uh->len = htons(seg->len - hdrlen);
+ uh->check = check;
+
+ /* last packet can be partial gso_size */
+ if (!seg->next)
+ csum_replace2(&uh->check, htons(mss),
+ htons(seg->len - hdrlen - sizeof(*uh)));
+
+ seg->destructor = sock_wfree;
+ seg->sk = sk;
+ sum_truesize += seg->truesize;
+ }
+
+ refcount_add(sum_truesize - gso_skb->truesize, &sk->sk_wmem_alloc);
+
+ return segs;
+}
+EXPORT_SYMBOL_GPL(__udp_gso_segment);
+
+static struct sk_buff *__udp4_gso_segment(struct sk_buff *gso_skb,
+ netdev_features_t features)
+{
+ const struct iphdr *iph = ip_hdr(gso_skb);
+ unsigned int mss = skb_shinfo(gso_skb)->gso_size;
+
+ if (!can_checksum_protocol(features, htons(ETH_P_IP)))
+ return ERR_PTR(-EIO);
+
+ return __udp_gso_segment(gso_skb, features, mss,
+ udp_v4_check(sizeof(struct udphdr) + mss,
+ iph->saddr, iph->daddr, 0));
+}
+EXPORT_SYMBOL_GPL(__udp4_gso_segment);
+
static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -203,12 +265,15 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
goto out;
}
- if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
+ if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_UDP | SKB_GSO_UDP_L4)))
goto out;
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+ return __udp4_gso_segment(skb, features);
+
mss = skb_shinfo(skb)->gso_size;
if (unlikely(skb->len <= mss))
goto out;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7b4d7bbf2c17..fbfd71a2d9c8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3622,8 +3622,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
struct net *net = dev_net(dev);
struct inet6_dev *idev;
struct inet6_ifaddr *ifa, *tmp;
- int _keep_addr;
- bool keep_addr;
+ bool keep_addr = false;
int state, i;
ASSERT_RTNL();
@@ -3649,15 +3648,18 @@ static int addrconf_ifdown(struct net_device *dev, int how)
}
- /* aggregate the system setting and interface setting */
- _keep_addr = net->ipv6.devconf_all->keep_addr_on_down;
- if (!_keep_addr)
- _keep_addr = idev->cnf.keep_addr_on_down;
-
/* combine the user config with event to determine if permanent
* addresses are to be removed from address hash table
*/
- keep_addr = !(how || _keep_addr <= 0 || idev->cnf.disable_ipv6);
+ if (!how && !idev->cnf.disable_ipv6) {
+ /* aggregate the system setting and interface setting */
+ int _keep_addr = net->ipv6.devconf_all->keep_addr_on_down;
+
+ if (!_keep_addr)
+ _keep_addr = idev->cnf.keep_addr_on_down;
+
+ keep_addr = (_keep_addr > 0);
+ }
/* Step 2: clear hash table */
for (i = 0; i < IN6_ADDR_HSIZE; i++) {
@@ -3707,11 +3709,6 @@ restart:
write_lock_bh(&idev->lock);
}
- /* re-combine the user config with event to determine if permanent
- * addresses are to be removed from the interface list
- */
- keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6);
-
list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
struct fib6_info *rt = NULL;
bool keep;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index df113c7b5fc8..6547fc6491a6 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -245,15 +245,18 @@ static const struct nla_policy fib6_rule_policy[FRA_MAX+1] = {
static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
- struct nlattr **tb)
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
{
int err = -EINVAL;
struct net *net = sock_net(skb->sk);
struct fib6_rule *rule6 = (struct fib6_rule *) rule;
if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
- if (rule->table == RT6_TABLE_UNSPEC)
+ if (rule->table == RT6_TABLE_UNSPEC) {
+ NL_SET_ERR_MSG(extack, "Invalid table");
goto errout;
+ }
if (fib6_new_table(net, rule->table) == NULL) {
err = -ENOBUFS;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 4a87f9428ca5..5b3f2f89ef41 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -88,9 +88,11 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
if (skb->encapsulation &&
skb_shinfo(skb)->gso_type & (SKB_GSO_IPXIP4 | SKB_GSO_IPXIP6))
- udpfrag = proto == IPPROTO_UDP && encap;
+ udpfrag = proto == IPPROTO_UDP && encap &&
+ (skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
else
- udpfrag = proto == IPPROTO_UDP && !skb->encapsulation;
+ udpfrag = proto == IPPROTO_UDP && !skb->encapsulation &&
+ (skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
ops = rcu_dereference(inet6_offloads[proto]);
if (likely(ops && ops->callbacks.gso_segment)) {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 6a477d54f8c7..dfd8af41824e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1240,6 +1240,8 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
if (mtu < IPV6_MIN_MTU)
return -EINVAL;
cork->base.fragsize = mtu;
+ cork->base.gso_size = sk->sk_type == SOCK_DGRAM ? ipc6->gso_size : 0;
+
if (dst_allfrag(xfrm_dst_path(&rt->dst)))
cork->base.flags |= IPCORK_ALLFRAG;
cork->base.length = 0;
@@ -1274,6 +1276,7 @@ static int __ip6_append_data(struct sock *sk,
int csummode = CHECKSUM_NONE;
unsigned int maxnonfragsize, headersize;
unsigned int wmem_alloc_delta = 0;
+ bool paged;
skb = skb_peek_tail(queue);
if (!skb) {
@@ -1281,7 +1284,8 @@ static int __ip6_append_data(struct sock *sk,
dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
}
- mtu = cork->fragsize;
+ paged = !!cork->gso_size;
+ mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
orig_mtu = mtu;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
@@ -1329,7 +1333,7 @@ emsgsize:
if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
headersize == sizeof(struct ipv6hdr) &&
length <= mtu - headersize &&
- !(flags & MSG_MORE) &&
+ (!(flags & MSG_MORE) || cork->gso_size) &&
rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
csummode = CHECKSUM_PARTIAL;
@@ -1372,6 +1376,7 @@ emsgsize:
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
+ unsigned int pagedlen = 0;
alloc_new_skb:
/* There's no room in the current skb */
if (skb)
@@ -1394,11 +1399,17 @@ alloc_new_skb:
if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
+ fraglen = datalen + fragheaderlen;
+
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
- else
- alloclen = datalen + fragheaderlen;
+ else if (!paged)
+ alloclen = fraglen;
+ else {
+ alloclen = min_t(int, fraglen, MAX_HEADER);
+ pagedlen = fraglen - alloclen;
+ }
alloclen += dst_exthdrlen;
@@ -1420,7 +1431,7 @@ alloc_new_skb:
*/
alloclen += sizeof(struct frag_hdr);
- copy = datalen - transhdrlen - fraggap;
+ copy = datalen - transhdrlen - fraggap - pagedlen;
if (copy < 0) {
err = -EINVAL;
goto error;
@@ -1459,7 +1470,7 @@ alloc_new_skb:
/*
* Find where to start putting bytes
*/
- data = skb_put(skb, fraglen);
+ data = skb_put(skb, fraglen - pagedlen);
skb_set_network_header(skb, exthdrlen);
data += fragheaderlen;
skb->transport_header = (skb->network_header +
@@ -1482,7 +1493,7 @@ alloc_new_skb:
}
offset += copy;
- length -= datalen - fraggap;
+ length -= copy + transhdrlen;
transhdrlen = 0;
exthdrlen = 0;
dst_exthdrlen = 0;
@@ -1755,9 +1766,9 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
void *from, int length, int transhdrlen,
struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
struct rt6_info *rt, unsigned int flags,
+ struct inet_cork_full *cork,
const struct sockcm_cookie *sockc)
{
- struct inet_cork_full cork;
struct inet6_cork v6_cork;
struct sk_buff_head queue;
int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
@@ -1768,27 +1779,27 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
__skb_queue_head_init(&queue);
- cork.base.flags = 0;
- cork.base.addr = 0;
- cork.base.opt = NULL;
- cork.base.dst = NULL;
+ cork->base.flags = 0;
+ cork->base.addr = 0;
+ cork->base.opt = NULL;
+ cork->base.dst = NULL;
v6_cork.opt = NULL;
- err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
+ err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
if (err) {
- ip6_cork_release(&cork, &v6_cork);
+ ip6_cork_release(cork, &v6_cork);
return ERR_PTR(err);
}
if (ipc6->dontfrag < 0)
ipc6->dontfrag = inet6_sk(sk)->dontfrag;
- err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
+ err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
&current->task_frag, getfrag, from,
length + exthdrlen, transhdrlen + exthdrlen,
flags, ipc6, sockc);
if (err) {
- __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
+ __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
return ERR_PTR(err);
}
- return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
+ return __ip6_make_skb(sk, &queue, cork, &v6_cork);
}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 298fd8b6ed17..20a419ee8000 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -180,7 +180,8 @@ static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
};
static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
- struct fib_rule_hdr *frh, struct nlattr **tb)
+ struct fib_rule_hdr *frh, struct nlattr **tb,
+ struct netlink_ext_ack *extack)
{
return 0;
}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index ccbfa83e4bb0..ce77bcc2490c 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -48,6 +48,34 @@ config NFT_CHAIN_ROUTE_IPV6
fields such as the source, destination, flowlabel, hop-limit and
the packet mark.
+if NF_NAT_IPV6
+
+config NFT_CHAIN_NAT_IPV6
+ tristate "IPv6 nf_tables nat chain support"
+ help
+ This option enables the "nat" chain for IPv6 in nf_tables. This
+ chain type is used to perform Network Address Translation (NAT)
+ packet transformations such as the source, destination address and
+ source and destination ports.
+
+config NFT_MASQ_IPV6
+ tristate "IPv6 masquerade support for nf_tables"
+ depends on NFT_MASQ
+ select NF_NAT_MASQUERADE_IPV6
+ help
+ This is the expression that provides IPv4 masquerading support for
+ nf_tables.
+
+config NFT_REDIR_IPV6
+ tristate "IPv6 redirect support for nf_tables"
+ depends on NFT_REDIR
+ select NF_NAT_REDIRECT
+ help
+ This is the expression that provides IPv4 redirect support for
+ nf_tables.
+
+endif # NF_NAT_IPV6
+
config NFT_REJECT_IPV6
select NF_REJECT_IPV6
default NFT_REJECT
@@ -107,39 +135,12 @@ config NF_NAT_IPV6
if NF_NAT_IPV6
-config NFT_CHAIN_NAT_IPV6
- depends on NF_TABLES_IPV6
- tristate "IPv6 nf_tables nat chain support"
- help
- This option enables the "nat" chain for IPv6 in nf_tables. This
- chain type is used to perform Network Address Translation (NAT)
- packet transformations such as the source, destination address and
- source and destination ports.
-
config NF_NAT_MASQUERADE_IPV6
tristate "IPv6 masquerade support"
help
This is the kernel functionality to provide NAT in the masquerade
flavour (automatic source address selection) for IPv6.
-config NFT_MASQ_IPV6
- tristate "IPv6 masquerade support for nf_tables"
- depends on NF_TABLES_IPV6
- depends on NFT_MASQ
- select NF_NAT_MASQUERADE_IPV6
- help
- This is the expression that provides IPv4 masquerading support for
- nf_tables.
-
-config NFT_REDIR_IPV6
- tristate "IPv6 redirect support for nf_tables"
- depends on NF_TABLES_IPV6
- depends on NFT_REDIR
- select NF_NAT_REDIRECT
- help
- This is the expression that provides IPv4 redirect support for
- nf_tables.
-
endif # NF_NAT_IPV6
config IP6_NF_IPTABLES
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0407bbc5a028..7ee0a34fba46 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1541,11 +1541,12 @@ static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
static int rt6_remove_exception_rt(struct rt6_info *rt)
{
struct rt6_exception_bucket *bucket;
- struct fib6_info *from = rt->from;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
+ struct fib6_info *from;
int err;
+ from = rcu_dereference(rt->from);
if (!from ||
!(rt->rt6i_flags & RTF_CACHE))
return -EINVAL;
@@ -2201,10 +2202,12 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
if (rt) {
if (rt->rt6i_flags & RTF_CACHE) {
+ rcu_read_lock();
if (rt6_check_expired(rt)) {
rt6_remove_exception_rt(rt);
dst = NULL;
}
+ rcu_read_unlock();
} else {
dst_release(dst);
dst = NULL;
@@ -2221,6 +2224,7 @@ static void ip6_link_failure(struct sk_buff *skb)
rt = (struct rt6_info *) skb_dst(skb);
if (rt) {
+ rcu_read_lock();
if (rt->rt6i_flags & RTF_CACHE) {
if (dst_hold_safe(&rt->dst))
rt6_remove_exception_rt(rt);
@@ -2228,15 +2232,14 @@ static void ip6_link_failure(struct sk_buff *skb)
struct fib6_info *from;
struct fib6_node *fn;
- rcu_read_lock();
from = rcu_dereference(rt->from);
if (from) {
fn = rcu_dereference(from->fib6_node);
if (fn && (rt->rt6i_flags & RTF_DEFAULT))
fn->fn_sernum = -1;
}
- rcu_read_unlock();
}
+ rcu_read_unlock();
}
}
@@ -3338,8 +3341,10 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
rcu_read_lock();
from = rcu_dereference(rt->from);
- nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
+ fib6_info_hold(from);
rcu_read_unlock();
+
+ nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
if (!nrt)
goto out;
@@ -3353,7 +3358,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
* a cached route because rt6_insert_exception() will
* takes care of it
*/
- if (rt6_insert_exception(nrt, rt->from)) {
+ if (rt6_insert_exception(nrt, from)) {
dst_release_immediate(&nrt->dst);
goto out;
}
@@ -3365,6 +3370,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
out:
+ fib6_info_release(from);
neigh_release(neigh);
}
@@ -4047,6 +4053,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
+ [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
[RTA_OIF] = { .type = NLA_U32 },
[RTA_IIF] = { .type = NLA_U32 },
[RTA_PRIORITY] = { .type = NLA_U32 },
@@ -4058,6 +4065,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_EXPIRES] = { .type = NLA_U32 },
[RTA_UID] = { .type = NLA_U32 },
[RTA_MARK] = { .type = NLA_U32 },
+ [RTA_TABLE] = { .type = NLA_U32 },
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index f343e6f0fc95..9898926ce30d 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -91,6 +91,24 @@ static void set_tun_src(struct net *net, struct net_device *dev,
rcu_read_unlock();
}
+/* Compute flowlabel for outer IPv6 header */
+static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
+ struct ipv6hdr *inner_hdr)
+{
+ int do_flowlabel = net->ipv6.sysctl.seg6_flowlabel;
+ __be32 flowlabel = 0;
+ u32 hash;
+
+ if (do_flowlabel > 0) {
+ hash = skb_get_hash(skb);
+ rol32(hash, 16);
+ flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
+ } else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) {
+ flowlabel = ip6_flowlabel(inner_hdr);
+ }
+ return flowlabel;
+}
+
/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
{
@@ -99,6 +117,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
struct ipv6hdr *hdr, *inner_hdr;
struct ipv6_sr_hdr *isrh;
int hdrlen, tot_len, err;
+ __be32 flowlabel;
hdrlen = (osrh->hdrlen + 1) << 3;
tot_len = hdrlen + sizeof(*hdr);
@@ -119,12 +138,13 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
* decapsulation will overwrite inner hlim with outer hlim
*/
+ flowlabel = seg6_make_flowlabel(net, skb, inner_hdr);
if (skb->protocol == htons(ETH_P_IPV6)) {
ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
- ip6_flowlabel(inner_hdr));
+ flowlabel);
hdr->hop_limit = inner_hdr->hop_limit;
} else {
- ip6_flow_hdr(hdr, 0, 0);
+ ip6_flow_hdr(hdr, 0, flowlabel);
hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
}
@@ -136,7 +156,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
isrh->nexthdr = proto;
hdr->daddr = isrh->segments[isrh->first_segment];
- set_tun_src(net, ip6_dst_idev(dst)->dev, &hdr->daddr, &hdr->saddr);
+ set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr);
#ifdef CONFIG_IPV6_SEG6_HMAC
if (sr_has_hmac(isrh)) {
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 6fbdef630152..e15cd37024fd 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -152,6 +152,13 @@ static struct ctl_table ipv6_table_template[] = {
.extra1 = &zero,
.extra2 = &one,
},
+ {
+ .procname = "seg6_flowlabel",
+ .data = &init_net.ipv6.sysctl.seg6_flowlabel,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
{ }
};
@@ -217,6 +224,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
ipv6_table[14].data = &net->ipv6.sysctl.multipath_hash_policy,
+ ipv6_table[15].data = &net->ipv6.sysctl.seg6_flowlabel;
ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 4ec76a87aeb8..6acfdd3e442b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1023,7 +1023,8 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
* Sending
*/
-static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6)
+static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
+ struct inet_cork *cork)
{
struct sock *sk = skb->sk;
struct udphdr *uh;
@@ -1042,6 +1043,21 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6)
uh->len = htons(len);
uh->check = 0;
+ if (cork->gso_size) {
+ const int hlen = skb_network_header_len(skb) +
+ sizeof(struct udphdr);
+
+ if (hlen + cork->gso_size > cork->fragsize)
+ return -EINVAL;
+ if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS)
+ return -EINVAL;
+ if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite)
+ return -EIO;
+
+ skb_shinfo(skb)->gso_size = cork->gso_size;
+ skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
+ }
+
if (is_udplite)
csum = udplite_csum(skb);
else if (udp_sk(sk)->no_check6_tx) { /* UDP csum disabled */
@@ -1093,7 +1109,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
if (!skb)
goto out;
- err = udp_v6_send_skb(skb, &fl6);
+ err = udp_v6_send_skb(skb, &fl6, &inet_sk(sk)->cork.base);
out:
up->len = 0;
@@ -1127,6 +1143,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc6.hlimit = -1;
ipc6.tclass = -1;
ipc6.dontfrag = -1;
+ ipc6.gso_size = up->gso_size;
sockc.tsflags = sk->sk_tsflags;
/* destination address check */
@@ -1259,7 +1276,10 @@ do_udp_sendmsg:
opt->tot_len = sizeof(*opt);
ipc6.opt = opt;
- err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6, &sockc);
+ err = udp_cmsg_send(sk, msg, &ipc6.gso_size);
+ if (err > 0)
+ err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6,
+ &ipc6, &sockc);
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
@@ -1324,15 +1344,16 @@ back_from_confirm:
/* Lockless fast path for the non-corking case */
if (!corkreq) {
+ struct inet_cork_full cork;
struct sk_buff *skb;
skb = ip6_make_skb(sk, getfrag, msg, ulen,
sizeof(struct udphdr), &ipc6,
&fl6, (struct rt6_info *)dst,
- msg->msg_flags, &sockc);
+ msg->msg_flags, &cork, &sockc);
err = PTR_ERR(skb);
if (!IS_ERR_OR_NULL(skb))
- err = udp_v6_send_skb(skb, &fl6);
+ err = udp_v6_send_skb(skb, &fl6, &cork.base);
goto out;
}
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 2a04dc9c781b..f7b85b1e6b3e 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -17,6 +17,20 @@
#include <net/ip6_checksum.h>
#include "ip6_offload.h"
+static struct sk_buff *__udp6_gso_segment(struct sk_buff *gso_skb,
+ netdev_features_t features)
+{
+ const struct ipv6hdr *ip6h = ipv6_hdr(gso_skb);
+ unsigned int mss = skb_shinfo(gso_skb)->gso_size;
+
+ if (!can_checksum_protocol(features, htons(ETH_P_IPV6)))
+ return ERR_PTR(-EIO);
+
+ return __udp_gso_segment(gso_skb, features, mss,
+ udp_v6_check(sizeof(struct udphdr) + mss,
+ &ip6h->saddr, &ip6h->daddr, 0));
+}
+
static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -42,12 +56,15 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
const struct ipv6hdr *ipv6h;
struct udphdr *uh;
- if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
+ if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_UDP | SKB_GSO_UDP_L4)))
goto out;
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+ return __udp6_gso_segment(skb, features);
+
/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
* do checksum of UDP packets sent as multiple IP fragments.
*/
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index b8f9d45bfeb1..7f1e842ef05a 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -106,8 +106,11 @@ static void l2tp_dfs_seq_stop(struct seq_file *p, void *v)
return;
/* Drop reference taken by last invocation of l2tp_dfs_next_tunnel() */
- if (pd->tunnel)
+ if (pd->tunnel) {
l2tp_tunnel_dec_refcount(pd->tunnel);
+ pd->tunnel = NULL;
+ pd->session = NULL;
+ }
}
static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 7d0c963680e6..1fd9e145076a 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -619,6 +619,13 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
lock_sock(sk);
error = -EINVAL;
+
+ if (sockaddr_len != sizeof(struct sockaddr_pppol2tp) &&
+ sockaddr_len != sizeof(struct sockaddr_pppol2tpv3) &&
+ sockaddr_len != sizeof(struct sockaddr_pppol2tpin6) &&
+ sockaddr_len != sizeof(struct sockaddr_pppol2tpv3in6))
+ goto end;
+
if (sp->sa_protocol != PX_PROTO_OL2TP)
goto end;
@@ -1618,8 +1625,11 @@ static void pppol2tp_seq_stop(struct seq_file *p, void *v)
return;
/* Drop reference taken by last invocation of pppol2tp_next_tunnel() */
- if (pd->tunnel)
+ if (pd->tunnel) {
l2tp_tunnel_dec_refcount(pd->tunnel);
+ pd->tunnel = NULL;
+ pd->session = NULL;
+ }
}
static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 6d29b2b94e84..cb80ebb38311 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -189,7 +189,6 @@ static int llc_ui_release(struct socket *sock)
{
struct sock *sk = sock->sk;
struct llc_sock *llc;
- struct llc_sap *sap;
if (unlikely(sk == NULL))
goto out;
@@ -200,15 +199,19 @@ static int llc_ui_release(struct socket *sock)
llc->laddr.lsap, llc->daddr.lsap);
if (!llc_send_disc(sk))
llc_ui_wait_for_disc(sk, sk->sk_rcvtimeo);
- sap = llc->sap;
- /* Hold this for release_sock(), so that llc_backlog_rcv() could still
- * use it.
- */
- llc_sap_hold(sap);
- if (!sock_flag(sk, SOCK_ZAPPED))
+ if (!sock_flag(sk, SOCK_ZAPPED)) {
+ struct llc_sap *sap = llc->sap;
+
+ /* Hold this for release_sock(), so that llc_backlog_rcv()
+ * could still use it.
+ */
+ llc_sap_hold(sap);
llc_sap_remove_socket(llc->sap, sk);
- release_sock(sk);
- llc_sap_put(sap);
+ release_sock(sk);
+ llc_sap_put(sap);
+ } else {
+ release_sock(sk);
+ }
if (llc->dev)
dev_put(llc->dev);
sock_put(sk);
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index 163121192aca..4d78375f9872 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -1099,14 +1099,7 @@ int llc_conn_ac_inc_tx_win_size(struct sock *sk, struct sk_buff *skb)
int llc_conn_ac_stop_all_timers(struct sock *sk, struct sk_buff *skb)
{
- struct llc_sock *llc = llc_sk(sk);
-
- del_timer(&llc->pf_cycle_timer.timer);
- del_timer(&llc->ack_timer.timer);
- del_timer(&llc->rej_sent_timer.timer);
- del_timer(&llc->busy_state_timer.timer);
- llc->ack_must_be_send = 0;
- llc->ack_pf = 0;
+ llc_sk_stop_all_timers(sk, false);
return 0;
}
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 110e32bcb399..c0ac522b48a1 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -961,6 +961,26 @@ out:
return sk;
}
+void llc_sk_stop_all_timers(struct sock *sk, bool sync)
+{
+ struct llc_sock *llc = llc_sk(sk);
+
+ if (sync) {
+ del_timer_sync(&llc->pf_cycle_timer.timer);
+ del_timer_sync(&llc->ack_timer.timer);
+ del_timer_sync(&llc->rej_sent_timer.timer);
+ del_timer_sync(&llc->busy_state_timer.timer);
+ } else {
+ del_timer(&llc->pf_cycle_timer.timer);
+ del_timer(&llc->ack_timer.timer);
+ del_timer(&llc->rej_sent_timer.timer);
+ del_timer(&llc->busy_state_timer.timer);
+ }
+
+ llc->ack_must_be_send = 0;
+ llc->ack_pf = 0;
+}
+
/**
* llc_sk_free - Frees a LLC socket
* @sk - socket to free
@@ -973,7 +993,7 @@ void llc_sk_free(struct sock *sk)
llc->state = LLC_CONN_OUT_OF_SVC;
/* Stop all (possibly) running timers */
- llc_conn_ac_stop_all_timers(sk, NULL);
+ llc_sk_stop_all_timers(sk, true);
#ifdef DEBUG_LLC_CONN_ALLOC
printk(KERN_INFO "%s: unackq=%d, txq=%d\n", __func__,
skb_queue_len(&llc->pdu_unack_q),
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 704b3832dbad..44d8a55e9721 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -594,6 +594,7 @@ config NFT_QUOTA
config NFT_REJECT
default m if NETFILTER_ADVANCED=n
tristate "Netfilter nf_tables reject support"
+ depends on !NF_TABLES_INET || (IPV6!=m || m)
help
This option adds the "reject" expression that you can use to
explicitly deny and notify via TCP reset/ICMP informational errors
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 5ebde4b15810..f36098887ad0 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2384,11 +2384,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
strlcpy(cfg.mcast_ifn, dm->mcast_ifn,
sizeof(cfg.mcast_ifn));
cfg.syncid = dm->syncid;
- rtnl_lock();
- mutex_lock(&ipvs->sync_mutex);
ret = start_sync_thread(ipvs, &cfg, dm->state);
- mutex_unlock(&ipvs->sync_mutex);
- rtnl_unlock();
} else {
mutex_lock(&ipvs->sync_mutex);
ret = stop_sync_thread(ipvs, dm->state);
@@ -3481,12 +3477,8 @@ static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
if (ipvs->mixed_address_family_dests > 0)
return -EINVAL;
- rtnl_lock();
- mutex_lock(&ipvs->sync_mutex);
ret = start_sync_thread(ipvs, &c,
nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
- mutex_unlock(&ipvs->sync_mutex);
- rtnl_unlock();
return ret;
}
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index fbaf3bd05b2e..001501e25625 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -49,6 +49,7 @@
#include <linux/kthread.h>
#include <linux/wait.h>
#include <linux/kernel.h>
+#include <linux/sched/signal.h>
#include <asm/unaligned.h> /* Used for ntoh_seq and hton_seq */
@@ -1360,15 +1361,9 @@ static void set_mcast_pmtudisc(struct sock *sk, int val)
/*
* Specifiy default interface for outgoing multicasts
*/
-static int set_mcast_if(struct sock *sk, char *ifname)
+static int set_mcast_if(struct sock *sk, struct net_device *dev)
{
- struct net_device *dev;
struct inet_sock *inet = inet_sk(sk);
- struct net *net = sock_net(sk);
-
- dev = __dev_get_by_name(net, ifname);
- if (!dev)
- return -ENODEV;
if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
return -EINVAL;
@@ -1396,19 +1391,14 @@ static int set_mcast_if(struct sock *sk, char *ifname)
* in the in_addr structure passed in as a parameter.
*/
static int
-join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
+join_mcast_group(struct sock *sk, struct in_addr *addr, struct net_device *dev)
{
- struct net *net = sock_net(sk);
struct ip_mreqn mreq;
- struct net_device *dev;
int ret;
memset(&mreq, 0, sizeof(mreq));
memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
- dev = __dev_get_by_name(net, ifname);
- if (!dev)
- return -ENODEV;
if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
return -EINVAL;
@@ -1423,15 +1413,10 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
#ifdef CONFIG_IP_VS_IPV6
static int join_mcast_group6(struct sock *sk, struct in6_addr *addr,
- char *ifname)
+ struct net_device *dev)
{
- struct net *net = sock_net(sk);
- struct net_device *dev;
int ret;
- dev = __dev_get_by_name(net, ifname);
- if (!dev)
- return -ENODEV;
if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
return -EINVAL;
@@ -1443,24 +1428,18 @@ static int join_mcast_group6(struct sock *sk, struct in6_addr *addr,
}
#endif
-static int bind_mcastif_addr(struct socket *sock, char *ifname)
+static int bind_mcastif_addr(struct socket *sock, struct net_device *dev)
{
- struct net *net = sock_net(sock->sk);
- struct net_device *dev;
__be32 addr;
struct sockaddr_in sin;
- dev = __dev_get_by_name(net, ifname);
- if (!dev)
- return -ENODEV;
-
addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
if (!addr)
pr_err("You probably need to specify IP address on "
"multicast interface.\n");
IP_VS_DBG(7, "binding socket with (%s) %pI4\n",
- ifname, &addr);
+ dev->name, &addr);
/* Now bind the socket with the address of multicast interface */
sin.sin_family = AF_INET;
@@ -1493,7 +1472,8 @@ static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen,
/*
* Set up sending multicast socket over UDP
*/
-static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id)
+static int make_send_sock(struct netns_ipvs *ipvs, int id,
+ struct net_device *dev, struct socket **sock_ret)
{
/* multicast addr */
union ipvs_sockaddr mcast_addr;
@@ -1505,9 +1485,10 @@ static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id)
IPPROTO_UDP, &sock);
if (result < 0) {
pr_err("Error during creation of socket; terminating\n");
- return ERR_PTR(result);
+ goto error;
}
- result = set_mcast_if(sock->sk, ipvs->mcfg.mcast_ifn);
+ *sock_ret = sock;
+ result = set_mcast_if(sock->sk, dev);
if (result < 0) {
pr_err("Error setting outbound mcast interface\n");
goto error;
@@ -1522,7 +1503,7 @@ static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id)
set_sock_size(sock->sk, 1, result);
if (AF_INET == ipvs->mcfg.mcast_af)
- result = bind_mcastif_addr(sock, ipvs->mcfg.mcast_ifn);
+ result = bind_mcastif_addr(sock, dev);
else
result = 0;
if (result < 0) {
@@ -1538,19 +1519,18 @@ static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id)
goto error;
}
- return sock;
+ return 0;
error:
- sock_release(sock);
- return ERR_PTR(result);
+ return result;
}
/*
* Set up receiving multicast socket over UDP
*/
-static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id,
- int ifindex)
+static int make_receive_sock(struct netns_ipvs *ipvs, int id,
+ struct net_device *dev, struct socket **sock_ret)
{
/* multicast addr */
union ipvs_sockaddr mcast_addr;
@@ -1562,8 +1542,9 @@ static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id,
IPPROTO_UDP, &sock);
if (result < 0) {
pr_err("Error during creation of socket; terminating\n");
- return ERR_PTR(result);
+ goto error;
}
+ *sock_ret = sock;
/* it is equivalent to the REUSEADDR option in user-space */
sock->sk->sk_reuse = SK_CAN_REUSE;
result = sysctl_sync_sock_size(ipvs);
@@ -1571,7 +1552,7 @@ static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id,
set_sock_size(sock->sk, 0, result);
get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id);
- sock->sk->sk_bound_dev_if = ifindex;
+ sock->sk->sk_bound_dev_if = dev->ifindex;
result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen);
if (result < 0) {
pr_err("Error binding to the multicast addr\n");
@@ -1582,21 +1563,20 @@ static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id,
#ifdef CONFIG_IP_VS_IPV6
if (ipvs->bcfg.mcast_af == AF_INET6)
result = join_mcast_group6(sock->sk, &mcast_addr.in6.sin6_addr,
- ipvs->bcfg.mcast_ifn);
+ dev);
else
#endif
result = join_mcast_group(sock->sk, &mcast_addr.in.sin_addr,
- ipvs->bcfg.mcast_ifn);
+ dev);
if (result < 0) {
pr_err("Error joining to the multicast group\n");
goto error;
}
- return sock;
+ return 0;
error:
- sock_release(sock);
- return ERR_PTR(result);
+ return result;
}
@@ -1778,13 +1758,12 @@ static int sync_thread_backup(void *data)
int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
int state)
{
- struct ip_vs_sync_thread_data *tinfo;
+ struct ip_vs_sync_thread_data *tinfo = NULL;
struct task_struct **array = NULL, *task;
- struct socket *sock;
struct net_device *dev;
char *name;
int (*threadfn)(void *data);
- int id, count, hlen;
+ int id = 0, count, hlen;
int result = -ENOMEM;
u16 mtu, min_mtu;
@@ -1792,6 +1771,18 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %zd bytes\n",
sizeof(struct ip_vs_sync_conn_v0));
+ /* Do not hold one mutex and then to block on another */
+ for (;;) {
+ rtnl_lock();
+ if (mutex_trylock(&ipvs->sync_mutex))
+ break;
+ rtnl_unlock();
+ mutex_lock(&ipvs->sync_mutex);
+ if (rtnl_trylock())
+ break;
+ mutex_unlock(&ipvs->sync_mutex);
+ }
+
if (!ipvs->sync_state) {
count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX);
ipvs->threads_mask = count - 1;
@@ -1810,7 +1801,8 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
dev = __dev_get_by_name(ipvs->net, c->mcast_ifn);
if (!dev) {
pr_err("Unknown mcast interface: %s\n", c->mcast_ifn);
- return -ENODEV;
+ result = -ENODEV;
+ goto out_early;
}
hlen = (AF_INET6 == c->mcast_af) ?
sizeof(struct ipv6hdr) + sizeof(struct udphdr) :
@@ -1827,26 +1819,30 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
c->sync_maxlen = mtu - hlen;
if (state == IP_VS_STATE_MASTER) {
+ result = -EEXIST;
if (ipvs->ms)
- return -EEXIST;
+ goto out_early;
ipvs->mcfg = *c;
name = "ipvs-m:%d:%d";
threadfn = sync_thread_master;
} else if (state == IP_VS_STATE_BACKUP) {
+ result = -EEXIST;
if (ipvs->backup_threads)
- return -EEXIST;
+ goto out_early;
ipvs->bcfg = *c;
name = "ipvs-b:%d:%d";
threadfn = sync_thread_backup;
} else {
- return -EINVAL;
+ result = -EINVAL;
+ goto out_early;
}
if (state == IP_VS_STATE_MASTER) {
struct ipvs_master_sync_state *ms;
+ result = -ENOMEM;
ipvs->ms = kcalloc(count, sizeof(ipvs->ms[0]), GFP_KERNEL);
if (!ipvs->ms)
goto out;
@@ -1862,39 +1858,38 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
} else {
array = kcalloc(count, sizeof(struct task_struct *),
GFP_KERNEL);
+ result = -ENOMEM;
if (!array)
goto out;
}
- tinfo = NULL;
for (id = 0; id < count; id++) {
- if (state == IP_VS_STATE_MASTER)
- sock = make_send_sock(ipvs, id);
- else
- sock = make_receive_sock(ipvs, id, dev->ifindex);
- if (IS_ERR(sock)) {
- result = PTR_ERR(sock);
- goto outtinfo;
- }
+ result = -ENOMEM;
tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
if (!tinfo)
- goto outsocket;
+ goto out;
tinfo->ipvs = ipvs;
- tinfo->sock = sock;
+ tinfo->sock = NULL;
if (state == IP_VS_STATE_BACKUP) {
tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen,
GFP_KERNEL);
if (!tinfo->buf)
- goto outtinfo;
+ goto out;
} else {
tinfo->buf = NULL;
}
tinfo->id = id;
+ if (state == IP_VS_STATE_MASTER)
+ result = make_send_sock(ipvs, id, dev, &tinfo->sock);
+ else
+ result = make_receive_sock(ipvs, id, dev, &tinfo->sock);
+ if (result < 0)
+ goto out;
task = kthread_run(threadfn, tinfo, name, ipvs->gen, id);
if (IS_ERR(task)) {
result = PTR_ERR(task);
- goto outtinfo;
+ goto out;
}
tinfo = NULL;
if (state == IP_VS_STATE_MASTER)
@@ -1911,20 +1906,20 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
ipvs->sync_state |= state;
spin_unlock_bh(&ipvs->sync_buff_lock);
+ mutex_unlock(&ipvs->sync_mutex);
+ rtnl_unlock();
+
/* increase the module use count */
ip_vs_use_count_inc();
return 0;
-outsocket:
- sock_release(sock);
-
-outtinfo:
- if (tinfo) {
- sock_release(tinfo->sock);
- kfree(tinfo->buf);
- kfree(tinfo);
- }
+out:
+ /* We do not need RTNL lock anymore, release it here so that
+ * sock_release below and in the kthreads can use rtnl_lock
+ * to leave the mcast group.
+ */
+ rtnl_unlock();
count = id;
while (count-- > 0) {
if (state == IP_VS_STATE_MASTER)
@@ -1932,13 +1927,23 @@ outtinfo:
else
kthread_stop(array[count]);
}
- kfree(array);
-
-out:
if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
kfree(ipvs->ms);
ipvs->ms = NULL;
}
+ mutex_unlock(&ipvs->sync_mutex);
+ if (tinfo) {
+ if (tinfo->sock)
+ sock_release(tinfo->sock);
+ kfree(tinfo->buf);
+ kfree(tinfo);
+ }
+ kfree(array);
+ return result;
+
+out_early:
+ mutex_unlock(&ipvs->sync_mutex);
+ rtnl_unlock();
return result;
}
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 8ef21d9f9a00..4b2b3d53acfc 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -252,7 +252,7 @@ static inline int expect_clash(const struct nf_conntrack_expect *a,
static inline int expect_matches(const struct nf_conntrack_expect *a,
const struct nf_conntrack_expect *b)
{
- return a->master == b->master && a->class == b->class &&
+ return a->master == b->master &&
nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
@@ -421,6 +421,9 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
h = nf_ct_expect_dst_hash(net, &expect->tuple);
hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
if (expect_matches(i, expect)) {
+ if (i->class != expect->class)
+ return -EALREADY;
+
if (nf_ct_remove_expect(i))
break;
} else if (expect_clash(i, expect)) {
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 9fe0ddc333fb..277bbfe26478 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -9,6 +9,7 @@
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
+#include <linux/kmemleak.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/rcupdate.h>
@@ -71,6 +72,7 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
rcu_read_unlock();
alloc = max(newlen, NF_CT_EXT_PREALLOC);
+ kmemleak_not_leak(old);
new = __krealloc(old, alloc, gfp);
if (!new)
return NULL;
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 4dbb5bad4363..908e51e2dc2b 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -938,11 +938,19 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
datalen, rtp_exp, rtcp_exp,
mediaoff, medialen, daddr);
else {
- if (nf_ct_expect_related(rtp_exp) == 0) {
- if (nf_ct_expect_related(rtcp_exp) != 0)
- nf_ct_unexpect_related(rtp_exp);
- else
+ /* -EALREADY handling works around end-points that send
+ * SDP messages with identical port but different media type,
+ * we pretend expectation was set up.
+ */
+ int errp = nf_ct_expect_related(rtp_exp);
+
+ if (errp == 0 || errp == -EALREADY) {
+ int errcp = nf_ct_expect_related(rtcp_exp);
+
+ if (errcp == 0 || errcp == -EALREADY)
ret = NF_ACCEPT;
+ else if (errp == 0)
+ nf_ct_unexpect_related(rtp_exp);
}
}
nf_ct_expect_put(rtcp_exp);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 9134cc429ad4..04d4e3772584 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2361,41 +2361,46 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
}
if (nlh->nlmsg_flags & NLM_F_REPLACE) {
- if (nft_is_active_next(net, old_rule)) {
- trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE,
- old_rule);
- if (trans == NULL) {
- err = -ENOMEM;
- goto err2;
- }
- nft_deactivate_next(net, old_rule);
- chain->use--;
- list_add_tail_rcu(&rule->list, &old_rule->list);
- } else {
+ if (!nft_is_active_next(net, old_rule)) {
err = -ENOENT;
goto err2;
}
- } else if (nlh->nlmsg_flags & NLM_F_APPEND)
- if (old_rule)
- list_add_rcu(&rule->list, &old_rule->list);
- else
- list_add_tail_rcu(&rule->list, &chain->rules);
- else {
- if (old_rule)
- list_add_tail_rcu(&rule->list, &old_rule->list);
- else
- list_add_rcu(&rule->list, &chain->rules);
- }
+ trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE,
+ old_rule);
+ if (trans == NULL) {
+ err = -ENOMEM;
+ goto err2;
+ }
+ nft_deactivate_next(net, old_rule);
+ chain->use--;
- if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
- err = -ENOMEM;
- goto err3;
+ if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
+ err = -ENOMEM;
+ goto err2;
+ }
+
+ list_add_tail_rcu(&rule->list, &old_rule->list);
+ } else {
+ if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
+ err = -ENOMEM;
+ goto err2;
+ }
+
+ if (nlh->nlmsg_flags & NLM_F_APPEND) {
+ if (old_rule)
+ list_add_rcu(&rule->list, &old_rule->list);
+ else
+ list_add_tail_rcu(&rule->list, &chain->rules);
+ } else {
+ if (old_rule)
+ list_add_tail_rcu(&rule->list, &old_rule->list);
+ else
+ list_add_rcu(&rule->list, &chain->rules);
+ }
}
chain->use++;
return 0;
-err3:
- list_del_rcu(&rule->list);
err2:
nf_tables_rule_destroy(&ctx, rule);
err1:
@@ -3207,18 +3212,20 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
err = ops->init(set, &desc, nla);
if (err < 0)
- goto err2;
+ goto err3;
err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
if (err < 0)
- goto err3;
+ goto err4;
list_add_tail_rcu(&set->list, &table->sets);
table->use++;
return 0;
-err3:
+err4:
ops->destroy(set);
+err3:
+ kfree(set->name);
err2:
kvfree(set);
err1:
@@ -5738,7 +5745,7 @@ static void nft_chain_commit_update(struct nft_trans *trans)
struct nft_base_chain *basechain;
if (nft_trans_chain_name(trans))
- strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans));
+ swap(trans->ctx.chain->name, nft_trans_chain_name(trans));
if (!nft_is_base_chain(trans->ctx.chain))
return;
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 773da82190dc..94df000abb92 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -36,11 +36,10 @@ MODULE_ALIAS("ipt_connmark");
MODULE_ALIAS("ip6t_connmark");
static unsigned int
-connmark_tg_shift(struct sk_buff *skb,
- const struct xt_connmark_tginfo1 *info,
- u8 shift_bits, u8 shift_dir)
+connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info)
{
enum ip_conntrack_info ctinfo;
+ u_int32_t new_targetmark;
struct nf_conn *ct;
u_int32_t newmark;
@@ -51,34 +50,39 @@ connmark_tg_shift(struct sk_buff *skb,
switch (info->mode) {
case XT_CONNMARK_SET:
newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
- if (shift_dir == D_SHIFT_RIGHT)
- newmark >>= shift_bits;
+ if (info->shift_dir == D_SHIFT_RIGHT)
+ newmark >>= info->shift_bits;
else
- newmark <<= shift_bits;
+ newmark <<= info->shift_bits;
+
if (ct->mark != newmark) {
ct->mark = newmark;
nf_conntrack_event_cache(IPCT_MARK, ct);
}
break;
case XT_CONNMARK_SAVE:
- newmark = (ct->mark & ~info->ctmask) ^
- (skb->mark & info->nfmask);
- if (shift_dir == D_SHIFT_RIGHT)
- newmark >>= shift_bits;
+ new_targetmark = (skb->mark & info->nfmask);
+ if (info->shift_dir == D_SHIFT_RIGHT)
+ new_targetmark >>= info->shift_bits;
else
- newmark <<= shift_bits;
+ new_targetmark <<= info->shift_bits;
+
+ newmark = (ct->mark & ~info->ctmask) ^
+ new_targetmark;
if (ct->mark != newmark) {
ct->mark = newmark;
nf_conntrack_event_cache(IPCT_MARK, ct);
}
break;
case XT_CONNMARK_RESTORE:
- newmark = (skb->mark & ~info->nfmask) ^
- (ct->mark & info->ctmask);
- if (shift_dir == D_SHIFT_RIGHT)
- newmark >>= shift_bits;
+ new_targetmark = (ct->mark & info->ctmask);
+ if (info->shift_dir == D_SHIFT_RIGHT)
+ new_targetmark >>= info->shift_bits;
else
- newmark <<= shift_bits;
+ new_targetmark <<= info->shift_bits;
+
+ newmark = (skb->mark & ~info->nfmask) ^
+ new_targetmark;
skb->mark = newmark;
break;
}
@@ -89,8 +93,14 @@ static unsigned int
connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_connmark_tginfo1 *info = par->targinfo;
-
- return connmark_tg_shift(skb, info, 0, 0);
+ const struct xt_connmark_tginfo2 info2 = {
+ .ctmark = info->ctmark,
+ .ctmask = info->ctmask,
+ .nfmask = info->nfmask,
+ .mode = info->mode,
+ };
+
+ return connmark_tg_shift(skb, &info2);
}
static unsigned int
@@ -98,8 +108,7 @@ connmark_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_connmark_tginfo2 *info = par->targinfo;
- return connmark_tg_shift(skb, (const struct xt_connmark_tginfo1 *)info,
- info->shift_bits, info->shift_dir);
+ return connmark_tg_shift(skb, info);
}
static int connmark_tg_check(const struct xt_tgchk_param *par)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index c31b0687396a..01f3515cada0 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -329,11 +329,11 @@ static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
skb_set_queue_mapping(skb, queue_index);
}
-/* register_prot_hook must be invoked with the po->bind_lock held,
+/* __register_prot_hook must be invoked through register_prot_hook
* or from a context in which asynchronous accesses to the packet
* socket is not possible (packet_create()).
*/
-static void register_prot_hook(struct sock *sk)
+static void __register_prot_hook(struct sock *sk)
{
struct packet_sock *po = pkt_sk(sk);
@@ -348,8 +348,13 @@ static void register_prot_hook(struct sock *sk)
}
}
-/* {,__}unregister_prot_hook() must be invoked with the po->bind_lock
- * held. If the sync parameter is true, we will temporarily drop
+static void register_prot_hook(struct sock *sk)
+{
+ lockdep_assert_held_once(&pkt_sk(sk)->bind_lock);
+ __register_prot_hook(sk);
+}
+
+/* If the sync parameter is true, we will temporarily drop
* the po->bind_lock and do a synchronize_net to make sure no
* asynchronous packet processing paths still refer to the elements
* of po->prot_hook. If the sync parameter is false, it is the
@@ -359,6 +364,8 @@ static void __unregister_prot_hook(struct sock *sk, bool sync)
{
struct packet_sock *po = pkt_sk(sk);
+ lockdep_assert_held_once(&po->bind_lock);
+
po->running = 0;
if (po->fanout)
@@ -3252,7 +3259,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
if (proto) {
po->prot_hook.type = proto;
- register_prot_hook(sk);
+ __register_prot_hook(sk);
}
mutex_lock(&net->packet.sklist_lock);
@@ -3732,12 +3739,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
- return -EBUSY;
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
- po->tp_loss = !!val;
- return 0;
+
+ lock_sock(sk);
+ if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+ ret = -EBUSY;
+ } else {
+ po->tp_loss = !!val;
+ ret = 0;
+ }
+ release_sock(sk);
+ return ret;
}
case PACKET_AUXDATA:
{
@@ -3748,7 +3761,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
+ lock_sock(sk);
po->auxdata = !!val;
+ release_sock(sk);
return 0;
}
case PACKET_ORIGDEV:
@@ -3760,7 +3775,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
+ lock_sock(sk);
po->origdev = !!val;
+ release_sock(sk);
return 0;
}
case PACKET_VNET_HDR:
@@ -3769,15 +3786,20 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (sock->type != SOCK_RAW)
return -EINVAL;
- if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
- return -EBUSY;
if (optlen < sizeof(val))
return -EINVAL;
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
- po->has_vnet_hdr = !!val;
- return 0;
+ lock_sock(sk);
+ if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+ ret = -EBUSY;
+ } else {
+ po->has_vnet_hdr = !!val;
+ ret = 0;
+ }
+ release_sock(sk);
+ return ret;
}
case PACKET_TIMESTAMP:
{
@@ -3815,11 +3837,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
- return -EBUSY;
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
- po->tp_tx_has_off = !!val;
+
+ lock_sock(sk);
+ if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+ ret = -EBUSY;
+ } else {
+ po->tp_tx_has_off = !!val;
+ ret = 0;
+ }
+ release_sock(sk);
return 0;
}
case PACKET_QDISC_BYPASS:
diff --git a/net/packet/internal.h b/net/packet/internal.h
index a1d2b2319ae9..3bb7c5fb3bff 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -112,10 +112,12 @@ struct packet_sock {
int copy_thresh;
spinlock_t bind_lock;
struct mutex pg_vec_lock;
- unsigned int running:1, /* prot_hook is attached*/
- auxdata:1,
+ unsigned int running; /* bind_lock must be held */
+ unsigned int auxdata:1, /* writer must hold sock lock */
origdev:1,
- has_vnet_hdr:1;
+ has_vnet_hdr:1,
+ tp_loss:1,
+ tp_tx_has_off:1;
int pressure;
int ifindex; /* bound device */
__be16 num;
@@ -125,8 +127,6 @@ struct packet_sock {
enum tpacket_versions tp_version;
unsigned int tp_hdrlen;
unsigned int tp_reserve;
- unsigned int tp_loss:1;
- unsigned int tp_tx_has_off:1;
unsigned int tp_tstamp;
struct net_device __rcu *cached_dev;
int (*xmit)(struct sk_buff *skb);
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index eea1d8611b20..13b38ad0fa4a 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -547,7 +547,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd,
ic->i_send_cq, ic->i_recv_cq);
- return ret;
+ goto out;
sends_out:
vfree(ic->i_sends);
@@ -572,6 +572,7 @@ send_cq_out:
ic->i_send_cq = NULL;
rds_ibdev_out:
rds_ib_remove_conn(rds_ibdev, conn);
+out:
rds_ib_dev_put(rds_ibdev);
return ret;
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index a5994cf0512b..8527cfdc446d 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -652,7 +652,7 @@ static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife,
}
}
- return 0;
+ return -ENOENT;
}
static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
@@ -682,7 +682,12 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
u16 mtype;
u16 dlen;
- curr_data = ife_tlv_meta_decode(tlv_data, &mtype, &dlen, NULL);
+ curr_data = ife_tlv_meta_decode(tlv_data, ifehdr_end, &mtype,
+ &dlen, NULL);
+ if (!curr_data) {
+ qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
+ return TC_ACT_SHOT;
+ }
if (find_decode_metaid(skb, ife, mtype, dlen, curr_data)) {
/* abuse overlimits to count when we receive metadata
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 837806dd5799..a8f3b088fcb2 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -988,31 +988,6 @@ out:
return match;
}
-/* Is this the association we are looking for? */
-struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc,
- struct net *net,
- const union sctp_addr *laddr,
- const union sctp_addr *paddr)
-{
- struct sctp_transport *transport;
-
- if ((htons(asoc->base.bind_addr.port) == laddr->v4.sin_port) &&
- (htons(asoc->peer.port) == paddr->v4.sin_port) &&
- net_eq(sock_net(asoc->base.sk), net)) {
- transport = sctp_assoc_lookup_paddr(asoc, paddr);
- if (!transport)
- goto out;
-
- if (sctp_bind_addr_match(&asoc->base.bind_addr, laddr,
- sctp_sk(asoc->base.sk)))
- goto out;
- }
- transport = NULL;
-
-out:
- return transport;
-}
-
/* Do delayed input processing. This is scheduled by sctp_rcv(). */
static void sctp_assoc_bh_rcv(struct work_struct *work)
{
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index f211b3db6a35..dee7cbd54831 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1457,7 +1457,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
* the outstanding bytes for this chunk, so only
* count bytes associated with a transport.
*/
- if (transport) {
+ if (transport && !tchunk->tsn_gap_acked) {
/* If this chunk is being used for RTT
* measurement, calculate the RTT and update
* the RTO using this value.
@@ -1469,14 +1469,34 @@ static void sctp_check_transmitted(struct sctp_outq *q,
* first instance of the packet or a later
* instance).
*/
- if (!tchunk->tsn_gap_acked &&
- !sctp_chunk_retransmitted(tchunk) &&
+ if (!sctp_chunk_retransmitted(tchunk) &&
tchunk->rtt_in_progress) {
tchunk->rtt_in_progress = 0;
rtt = jiffies - tchunk->sent_at;
sctp_transport_update_rto(transport,
rtt);
}
+
+ if (TSN_lte(tsn, sack_ctsn)) {
+ /*
+ * SFR-CACC algorithm:
+ * 2) If the SACK contains gap acks
+ * and the flag CHANGEOVER_ACTIVE is
+ * set the receiver of the SACK MUST
+ * take the following action:
+ *
+ * B) For each TSN t being acked that
+ * has not been acked in any SACK so
+ * far, set cacc_saw_newack to 1 for
+ * the destination that the TSN was
+ * sent to.
+ */
+ if (sack->num_gap_ack_blocks &&
+ q->asoc->peer.primary_path->cacc.
+ changeover_active)
+ transport->cacc.cacc_saw_newack
+ = 1;
+ }
}
/* If the chunk hasn't been marked as ACKED,
@@ -1508,28 +1528,6 @@ static void sctp_check_transmitted(struct sctp_outq *q,
restart_timer = 1;
forward_progress = true;
- if (!tchunk->tsn_gap_acked) {
- /*
- * SFR-CACC algorithm:
- * 2) If the SACK contains gap acks
- * and the flag CHANGEOVER_ACTIVE is
- * set the receiver of the SACK MUST
- * take the following action:
- *
- * B) For each TSN t being acked that
- * has not been acked in any SACK so
- * far, set cacc_saw_newack to 1 for
- * the destination that the TSN was
- * sent to.
- */
- if (transport &&
- sack->num_gap_ack_blocks &&
- q->asoc->peer.primary_path->cacc.
- changeover_active)
- transport->cacc.cacc_saw_newack
- = 1;
- }
-
list_add_tail(&tchunk->transmitted_list,
&q->sacked);
} else {
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 5a4fb1dc8400..db93eabd6ef5 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -779,10 +779,9 @@ struct sctp_chunk *sctp_make_datafrag_empty(const struct sctp_association *asoc,
* association. This reports on which TSN's we've seen to date,
* including duplicates and gaps.
*/
-struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
+struct sctp_chunk *sctp_make_sack(struct sctp_association *asoc)
{
struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
- struct sctp_association *aptr = (struct sctp_association *)asoc;
struct sctp_gap_ack_block gabs[SCTP_MAX_GABS];
__u16 num_gabs, num_dup_tsns;
struct sctp_transport *trans;
@@ -857,7 +856,7 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
/* Add the duplicate TSN information. */
if (num_dup_tsns) {
- aptr->stats.idupchunks += num_dup_tsns;
+ asoc->stats.idupchunks += num_dup_tsns;
sctp_addto_chunk(retval, sizeof(__u32) * num_dup_tsns,
sctp_tsnmap_get_dups(map));
}
@@ -869,11 +868,11 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
* association so no transport will match after a wrap event like this,
* Until the next sack
*/
- if (++aptr->peer.sack_generation == 0) {
+ if (++asoc->peer.sack_generation == 0) {
list_for_each_entry(trans, &asoc->peer.transport_addr_list,
transports)
trans->sack_generation = 0;
- aptr->peer.sack_generation = 1;
+ asoc->peer.sack_generation = 1;
}
nodata:
return retval;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index f5d4b69dbabc..4470501374bf 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -978,10 +978,6 @@ static void smc_tcp_listen_work(struct work_struct *work)
}
out:
- if (lsmc->clcsock) {
- sock_release(lsmc->clcsock);
- lsmc->clcsock = NULL;
- }
release_sock(lsk);
sock_put(&lsmc->sk); /* sock_hold in smc_listen */
}
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index 805b139756db..092bebc70048 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -67,7 +67,7 @@ static void strp_abort_strp(struct strparser *strp, int err)
static void strp_start_timer(struct strparser *strp, long timeo)
{
- if (timeo)
+ if (timeo && timeo != LONG_MAX)
mod_delayed_work(strp_wq, &strp->msg_timer_work, timeo);
}
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 71e79597f940..6ed1c02cfc94 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -693,8 +693,7 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb,
if (!sgout) {
nsg = skb_cow_data(skb, 0, &unused) + 1;
sgin = kmalloc_array(nsg, sizeof(*sgin), sk->sk_allocation);
- if (!sgout)
- sgout = sgin;
+ sgout = sgin;
}
sg_init_table(sgin, nsg);