summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/mod.c1
-rw-r--r--net/batman-adv/bat_iv_ogm.c25
-rw-r--r--net/batman-adv/bat_v_elp.c10
-rw-r--r--net/batman-adv/bat_v_ogm.c27
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c6
-rw-r--r--net/batman-adv/distributed-arp-table.c2
-rw-r--r--net/batman-adv/fragmentation.c6
-rw-r--r--net/batman-adv/hard-interface.c16
-rw-r--r--net/batman-adv/log.h6
-rw-r--r--net/batman-adv/main.c2
-rw-r--r--net/batman-adv/main.h8
-rw-r--r--net/batman-adv/multicast.c21
-rw-r--r--net/batman-adv/netlink.c14
-rw-r--r--net/batman-adv/network-coding.c14
-rw-r--r--net/batman-adv/originator.c8
-rw-r--r--net/batman-adv/routing.c4
-rw-r--r--net/batman-adv/send.c4
-rw-r--r--net/batman-adv/soft-interface.c2
-rw-r--r--net/batman-adv/tp_meter.c12
-rw-r--r--net/batman-adv/translation-table.c10
-rw-r--r--net/batman-adv/tvlv.c4
-rw-r--r--net/batman-adv/types.h18
-rw-r--r--net/bridge/br_fdb.c127
-rw-r--r--net/bridge/br_mrp.c10
-rw-r--r--net/bridge/br_mrp_netlink.c64
-rw-r--r--net/bridge/br_netlink.c25
-rw-r--r--net/bridge/br_private.h13
-rw-r--r--net/bridge/netfilter/nft_meta_bridge.c1
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c1
-rw-r--r--net/ceph/ceph_common.c14
-rw-r--r--net/ceph/osd_client.c9
-rw-r--r--net/core/dev.c27
-rw-r--r--net/core/devlink.c141
-rw-r--r--net/core/drop_monitor.c1
-rw-r--r--net/core/flow_offload.c47
-rw-r--r--net/core/neighbour.c1
-rw-r--r--net/core/sock.c4
-rw-r--r--net/core/tso.c44
-rw-r--r--net/core/xdp.c1
-rw-r--r--net/dcb/dcbnl.c2
-rw-r--r--net/decnet/dn_route.c2
-rw-r--r--net/devres.c4
-rw-r--r--net/dsa/slave.c3
-rw-r--r--net/dsa/tag_edsa.c37
-rw-r--r--net/ethtool/cabletest.c17
-rw-r--r--net/ethtool/common.c2
-rw-r--r--net/ethtool/ioctl.c6
-rw-r--r--net/ethtool/linkstate.c63
-rw-r--r--net/hsr/hsr_device.c23
-rw-r--r--net/hsr/hsr_device.h2
-rw-r--r--net/hsr/hsr_main.c9
-rw-r--r--net/hsr/hsr_netlink.c17
-rw-r--r--net/ipv4/Kconfig34
-rw-r--r--net/ipv4/af_inet.c6
-rw-r--r--net/ipv4/esp4_offload.c1
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/fou.c1
-rw-r--r--net/ipv4/ip_output.c6
-rw-r--r--net/ipv4/ip_tunnel.c14
-rw-r--r--net/ipv4/netfilter/ip_tables.c15
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c1
-rw-r--r--net/ipv4/netfilter/iptable_filter.c10
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c10
-rw-r--r--net/ipv4/netfilter/iptable_nat.c10
-rw-r--r--net/ipv4/netfilter/iptable_raw.c10
-rw-r--r--net/ipv4/netfilter/iptable_security.c11
-rw-r--r--net/ipv4/netfilter/nf_flow_table_ipv4.c1
-rw-r--r--net/ipv4/netfilter/nft_dup_ipv4.c1
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c1
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c1
-rw-r--r--net/ipv4/tcp_cubic.c5
-rw-r--r--net/ipv4/tcp_input.c80
-rw-r--r--net/ipv4/tcp_output.c12
-rw-r--r--net/ipv6/Kconfig34
-rw-r--r--net/ipv6/esp6_offload.c1
-rw-r--r--net/ipv6/exthdrs.c2
-rw-r--r--net/ipv6/fib6_rules.c9
-rw-r--r--net/ipv6/fou6.c1
-rw-r--r--net/ipv6/icmp.c5
-rw-r--r--net/ipv6/ila/ila_main.c1
-rw-r--r--net/ipv6/ip6_fib.c3
-rw-r--r--net/ipv6/ip6_gre.c9
-rw-r--r--net/ipv6/ip6_icmp.c10
-rw-r--r--net/ipv6/ip6_offload.c8
-rw-r--r--net/ipv6/netfilter/ip6_tables.c15
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c1
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c10
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c10
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c10
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c10
-rw-r--r--net/ipv6/netfilter/ip6table_security.c10
-rw-r--r--net/ipv6/netfilter/nf_flow_table_ipv6.c1
-rw-r--r--net/ipv6/netfilter/nft_dup_ipv6.c1
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c1
-rw-r--r--net/ipv6/netfilter/nft_reject_ipv6.c1
-rw-r--r--net/ipv6/ping.c1
-rw-r--r--net/ipv6/route.c8
-rw-r--r--net/ipv6/rpl_iptunnel.c3
-rw-r--r--net/ipv6/tcp_ipv6.c7
-rw-r--r--net/l2tp/l2tp_eth.c2
-rw-r--r--net/l3mdev/l3mdev.c93
-rw-r--r--net/mptcp/Kconfig20
-rw-r--r--net/mptcp/Makefile4
-rw-r--r--net/mptcp/crypto.c63
-rw-r--r--net/mptcp/crypto_test.c72
-rw-r--r--net/mptcp/options.c11
-rw-r--r--net/mptcp/pm.c2
-rw-r--r--net/mptcp/pm_netlink.c2
-rw-r--r--net/mptcp/protocol.c435
-rw-r--r--net/mptcp/protocol.h80
-rw-r--r--net/mptcp/subflow.c188
-rw-r--r--net/mptcp/token.c280
-rw-r--r--net/mptcp/token_test.c140
-rw-r--r--net/netfilter/ipset/ip_set_core.c2
-rw-r--r--net/netfilter/nf_dup_netdev.c1
-rw-r--r--net/netfilter/nf_flow_table_core.c1
-rw-r--r--net/netfilter/nf_flow_table_inet.c1
-rw-r--r--net/netfilter/nf_flow_table_offload.c1
-rw-r--r--net/netfilter/nf_synproxy_core.c1
-rw-r--r--net/netfilter/nf_tables_offload.c1
-rw-r--r--net/netfilter/nfnetlink.c1
-rw-r--r--net/netfilter/nft_compat.c1
-rw-r--r--net/netfilter/nft_connlimit.c1
-rw-r--r--net/netfilter/nft_counter.c1
-rw-r--r--net/netfilter/nft_ct.c1
-rw-r--r--net/netfilter/nft_dup_netdev.c1
-rw-r--r--net/netfilter/nft_fib_inet.c1
-rw-r--r--net/netfilter/nft_fib_netdev.c1
-rw-r--r--net/netfilter/nft_flow_offload.c1
-rw-r--r--net/netfilter/nft_hash.c1
-rw-r--r--net/netfilter/nft_limit.c1
-rw-r--r--net/netfilter/nft_log.c1
-rw-r--r--net/netfilter/nft_masq.c1
-rw-r--r--net/netfilter/nft_nat.c1
-rw-r--r--net/netfilter/nft_numgen.c1
-rw-r--r--net/netfilter/nft_objref.c1
-rw-r--r--net/netfilter/nft_osf.c1
-rw-r--r--net/netfilter/nft_queue.c1
-rw-r--r--net/netfilter/nft_quota.c1
-rw-r--r--net/netfilter/nft_redir.c1
-rw-r--r--net/netfilter/nft_reject.c1
-rw-r--r--net/netfilter/nft_reject_inet.c1
-rw-r--r--net/netfilter/nft_synproxy.c1
-rw-r--r--net/netfilter/nft_tunnel.c1
-rw-r--r--net/netfilter/xt_nat.c1
-rw-r--r--net/openvswitch/actions.c9
-rw-r--r--net/packet/af_packet.c2
-rw-r--r--net/rds/transport.c26
-rw-r--r--net/rxrpc/call_accept.c7
-rw-r--r--net/rxrpc/call_event.c2
-rw-r--r--net/rxrpc/input.c7
-rw-r--r--net/sched/Kconfig4
-rw-r--r--net/sched/act_api.c12
-rw-r--r--net/sched/act_ct.c6
-rw-r--r--net/sched/act_gact.c7
-rw-r--r--net/sched/act_gate.c132
-rw-r--r--net/sched/act_mirred.c6
-rw-r--r--net/sched/act_pedit.c6
-rw-r--r--net/sched/act_police.c4
-rw-r--r--net/sched/act_skbedit.c5
-rw-r--r--net/sched/act_vlan.c6
-rw-r--r--net/sched/cls_api.c148
-rw-r--r--net/sched/cls_flower.c1
-rw-r--r--net/sched/cls_matchall.c3
-rw-r--r--net/sched/cls_tcindex.c2
-rw-r--r--net/sched/cls_u32.c4
-rw-r--r--net/sched/sch_api.c3
-rw-r--r--net/sched/sch_atm.c4
-rw-r--r--net/sched/sch_blackhole.c2
-rw-r--r--net/sched/sch_cake.c68
-rw-r--r--net/sched/sch_cbq.c4
-rw-r--r--net/sched/sch_cbs.c18
-rw-r--r--net/sched/sch_choke.c2
-rw-r--r--net/sched/sch_codel.c2
-rw-r--r--net/sched/sch_drr.c4
-rw-r--r--net/sched/sch_dsmark.c4
-rw-r--r--net/sched/sch_etf.c2
-rw-r--r--net/sched/sch_ets.c4
-rw-r--r--net/sched/sch_fifo.c6
-rw-r--r--net/sched/sch_fq.c3
-rw-r--r--net/sched/sch_fq_codel.c3
-rw-r--r--net/sched/sch_fq_pie.c2
-rw-r--r--net/sched/sch_generic.c4
-rw-r--r--net/sched/sch_gred.c2
-rw-r--r--net/sched/sch_hfsc.c6
-rw-r--r--net/sched/sch_hhf.c3
-rw-r--r--net/sched/sch_htb.c4
-rw-r--r--net/sched/sch_multiq.c4
-rw-r--r--net/sched/sch_netem.c8
-rw-r--r--net/sched/sch_pie.c2
-rw-r--r--net/sched/sch_plug.c2
-rw-r--r--net/sched/sch_prio.c6
-rw-r--r--net/sched/sch_qfq.c4
-rw-r--r--net/sched/sch_red.c102
-rw-r--r--net/sched/sch_sfb.c4
-rw-r--r--net/sched/sch_sfq.c2
-rw-r--r--net/sched/sch_skbprio.c2
-rw-r--r--net/sched/sch_taprio.c9
-rw-r--r--net/sched/sch_tbf.c10
-rw-r--r--net/sched/sch_teql.c4
-rw-r--r--net/sctp/associola.c5
-rw-r--r--net/sctp/bind_addr.c1
-rw-r--r--net/sctp/protocol.c3
-rw-r--r--net/tipc/bcast.c6
-rw-r--r--net/tipc/bcast.h4
-rw-r--r--net/tipc/link.c10
-rw-r--r--net/tipc/msg.h46
-rw-r--r--net/tipc/name_distr.c116
-rw-r--r--net/tipc/name_distr.h9
-rw-r--r--net/tipc/name_table.c9
-rw-r--r--net/tipc/name_table.h2
-rw-r--r--net/tipc/node.c29
-rw-r--r--net/tipc/node.h8
-rw-r--r--net/tls/tls_device.c60
-rw-r--r--net/xfrm/Kconfig24
-rw-r--r--net/xfrm/xfrm_device.c37
-rw-r--r--net/xfrm/xfrm_output.c4
217 files changed, 2721 insertions, 1126 deletions
diff --git a/net/9p/mod.c b/net/9p/mod.c
index c1b62428da7b..5126566850bd 100644
--- a/net/9p/mod.c
+++ b/net/9p/mod.c
@@ -189,3 +189,4 @@ MODULE_AUTHOR("Latchesar Ionkov <lucho@ionkov.net>");
MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
MODULE_AUTHOR("Ron Minnich <rminnich@lanl.gov>");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Plan 9 Resource Sharing Support (9P2000)");
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index e87f19c82e8d..a4faf5f904d9 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -134,7 +134,7 @@ static u8 batadv_ring_buffer_avg(const u8 lq_recv[])
*
* Return: the originator object corresponding to the passed mac address or NULL
* on failure.
- * If the object does not exists it is created an initialised.
+ * If the object does not exist, it is created and initialised.
*/
static struct batadv_orig_node *
batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr)
@@ -871,7 +871,7 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
}
/**
- * batadv_iv_orig_ifinfo_sum() - Get bcast_own sum for originator over iterface
+ * batadv_iv_orig_ifinfo_sum() - Get bcast_own sum for originator over interface
* @orig_node: originator which reproadcasted the OGMs directly
* @if_outgoing: interface which transmitted the original OGM and received the
* direct rebroadcast
@@ -1075,10 +1075,10 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
struct batadv_neigh_ifinfo *neigh_ifinfo;
u8 total_count;
u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own;
+ unsigned int tq_iface_hop_penalty = BATADV_TQ_MAX_VALUE;
unsigned int neigh_rq_inv_cube, neigh_rq_max_cube;
unsigned int tq_asym_penalty, inv_asym_penalty;
unsigned int combined_tq;
- unsigned int tq_iface_penalty;
bool ret = false;
/* find corresponding one hop neighbor */
@@ -1157,31 +1157,32 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
inv_asym_penalty = BATADV_TQ_MAX_VALUE * neigh_rq_inv_cube;
inv_asym_penalty /= neigh_rq_max_cube;
tq_asym_penalty = BATADV_TQ_MAX_VALUE - inv_asym_penalty;
+ tq_iface_hop_penalty -= atomic_read(&if_incoming->hop_penalty);
/* penalize if the OGM is forwarded on the same interface. WiFi
* interfaces and other half duplex devices suffer from throughput
* drops as they can't send and receive at the same time.
*/
- tq_iface_penalty = BATADV_TQ_MAX_VALUE;
if (if_outgoing && if_incoming == if_outgoing &&
batadv_is_wifi_hardif(if_outgoing))
- tq_iface_penalty = batadv_hop_penalty(BATADV_TQ_MAX_VALUE,
- bat_priv);
+ tq_iface_hop_penalty = batadv_hop_penalty(tq_iface_hop_penalty,
+ bat_priv);
combined_tq = batadv_ogm_packet->tq *
tq_own *
tq_asym_penalty *
- tq_iface_penalty;
+ tq_iface_hop_penalty;
combined_tq /= BATADV_TQ_MAX_VALUE *
BATADV_TQ_MAX_VALUE *
BATADV_TQ_MAX_VALUE;
batadv_ogm_packet->tq = combined_tq;
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "bidirectional: orig = %pM neigh = %pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, iface_penalty: %3i, total tq: %3i, if_incoming = %s, if_outgoing = %s\n",
+ "bidirectional: orig = %pM neigh = %pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, iface_hop_penalty: %3i, total tq: %3i, if_incoming = %s, if_outgoing = %s\n",
orig_node->orig, orig_neigh_node->orig, total_count,
- neigh_rq_count, tq_own, tq_asym_penalty, tq_iface_penalty,
- batadv_ogm_packet->tq, if_incoming->net_dev->name,
+ neigh_rq_count, tq_own, tq_asym_penalty,
+ tq_iface_hop_penalty, batadv_ogm_packet->tq,
+ if_incoming->net_dev->name,
if_outgoing ? if_outgoing->net_dev->name : "DEFAULT");
/* if link has the minimum required transmission quality
@@ -1554,7 +1555,7 @@ static void batadv_iv_ogm_process_reply(struct batadv_ogm_packet *ogm_packet,
* batadv_iv_ogm_process() - process an incoming batman iv OGM
* @skb: the skb containing the OGM
* @ogm_offset: offset to the OGM which should be processed (for aggregates)
- * @if_incoming: the interface where this packet was receved
+ * @if_incoming: the interface where this packet was received
*/
static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
struct batadv_hard_iface *if_incoming)
@@ -2288,7 +2289,7 @@ batadv_iv_ogm_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq,
* @msg: Netlink message to dump into
* @cb: Control block containing additional options
* @bat_priv: The bat priv with all the soft interface information
- * @single_hardif: Limit dump to this hard interfaace
+ * @single_hardif: Limit dump to this hard interface
*/
static void
batadv_iv_ogm_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb,
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 0bdefa35da98..d35aca0e969a 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -60,7 +60,7 @@ static void batadv_v_elp_start_timer(struct batadv_hard_iface *hard_iface)
* @neigh: the neighbour for which the throughput has to be obtained
*
* Return: The throughput towards the given neighbour in multiples of 100kpbs
- * (a value of '1' equals to 0.1Mbps, '10' equals 1Mbps, etc).
+ * (a value of '1' equals 0.1Mbps, '10' equals 1Mbps, etc).
*/
static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
{
@@ -183,8 +183,8 @@ void batadv_v_elp_throughput_metric_update(struct work_struct *work)
*
* Sends a predefined number of unicast wifi packets to a given neighbour in
* order to trigger the throughput estimation on this link by the RC algorithm.
- * Packets are sent only if there there is not enough payload unicast traffic
- * towards this neighbour..
+ * Packets are sent only if there is not enough payload unicast traffic towards
+ * this neighbour..
*
* Return: True on success and false in case of error during skb preparation.
*/
@@ -244,7 +244,7 @@ batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh)
* batadv_v_elp_periodic_work() - ELP periodic task per interface
* @work: work queue item
*
- * Emits broadcast ELP message in regular intervals.
+ * Emits broadcast ELP messages in regular intervals.
*/
static void batadv_v_elp_periodic_work(struct work_struct *work)
{
@@ -499,7 +499,7 @@ orig_free:
* @skb: the received packet
* @if_incoming: the interface this packet was received through
*
- * Return: NET_RX_SUCCESS and consumes the skb if the packet was peoperly
+ * Return: NET_RX_SUCCESS and consumes the skb if the packet was properly
* processed or NET_RX_DROP in case of failure.
*/
int batadv_v_elp_packet_recv(struct sk_buff *skb,
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 18028b9f95f0..0f8495b9eeb1 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -47,9 +47,9 @@
* @bat_priv: the bat priv with all the soft interface information
* @addr: the address of the originator
*
- * Return: the orig_node corresponding to the specified address. If such object
- * does not exist it is allocated here. In case of allocation failure returns
- * NULL.
+ * Return: the orig_node corresponding to the specified address. If such an
+ * object does not exist, it is allocated here. In case of allocation failure
+ * returns NULL.
*/
struct batadv_orig_node *batadv_v_ogm_orig_get(struct batadv_priv *bat_priv,
const u8 *addr)
@@ -172,7 +172,7 @@ static bool batadv_v_ogm_queue_left(struct sk_buff *skb,
* batadv_v_ogm_aggr_list_free - free all elements in an aggregation queue
* @hard_iface: the interface holding the aggregation queue
*
- * Empties the OGMv2 aggregation queue and frees all the skbs it contained.
+ * Empties the OGMv2 aggregation queue and frees all the skbs it contains.
*
* Caller needs to hold the hard_iface->bat_v.aggr_list.lock.
*/
@@ -378,7 +378,7 @@ static void batadv_v_ogm_send(struct work_struct *work)
* batadv_v_ogm_aggr_work() - OGM queue periodic task per interface
* @work: work queue item
*
- * Emits aggregated OGM message in regular intervals.
+ * Emits aggregated OGM messages in regular intervals.
*/
void batadv_v_ogm_aggr_work(struct work_struct *work)
{
@@ -399,7 +399,7 @@ void batadv_v_ogm_aggr_work(struct work_struct *work)
* batadv_v_ogm_iface_enable() - prepare an interface for B.A.T.M.A.N. V
* @hard_iface: the interface to prepare
*
- * Takes care of scheduling own OGM sending routine for this interface.
+ * Takes care of scheduling its own OGM sending routine for this interface.
*
* Return: 0 on success or a negative error code otherwise
*/
@@ -455,15 +455,17 @@ unlock:
* @throughput: the current throughput
*
* Apply a penalty on the current throughput metric value based on the
- * characteristic of the interface where the OGM has been received. The return
- * value is computed as follows:
+ * characteristic of the interface where the OGM has been received.
+ *
+ * Initially the per hardif hop penalty is applied to the throughput. After
+ * that the return value is then computed as follows:
* - throughput * 50% if the incoming and outgoing interface are the
* same WiFi interface and the throughput is above
* 1MBit/s
* - throughput if the outgoing interface is the default
* interface (i.e. this OGM is processed for the
* internal table and not forwarded)
- * - throughput * hop penalty otherwise
+ * - throughput * node hop penalty otherwise
*
* Return: the penalised throughput metric.
*/
@@ -472,9 +474,14 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv,
struct batadv_hard_iface *if_outgoing,
u32 throughput)
{
+ int if_hop_penalty = atomic_read(&if_incoming->hop_penalty);
int hop_penalty = atomic_read(&bat_priv->hop_penalty);
int hop_penalty_max = BATADV_TQ_MAX_VALUE;
+ /* Apply per hardif hop penalty */
+ throughput = throughput * (hop_penalty_max - if_hop_penalty) /
+ hop_penalty_max;
+
/* Don't apply hop penalty in default originator table. */
if (if_outgoing == BATADV_IF_DEFAULT)
return throughput;
@@ -847,7 +854,7 @@ batadv_v_ogm_aggr_packet(int buff_pos, int packet_len,
* batadv_v_ogm_process() - process an incoming batman v OGM
* @skb: the skb containing the OGM
* @ogm_offset: offset to the OGM which should be processed (for aggregates)
- * @if_incoming: the interface where this packet was receved
+ * @if_incoming: the interface where this packet was received
*/
static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
struct batadv_hard_iface *if_incoming)
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 41cc87f06b14..91a04ca373dc 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -992,7 +992,7 @@ static bool batadv_handle_claim(struct batadv_priv *bat_priv,
* @hw_dst: the Hardware destination in the ARP Header
* @ethhdr: pointer to the Ethernet header of the claim frame
*
- * checks if it is a claim packet and if its on the same group.
+ * checks if it is a claim packet and if it's on the same group.
* This function also applies the group ID of the sender
* if it is in the same mesh.
*
@@ -1757,7 +1757,7 @@ void batadv_bla_free(struct batadv_priv *bat_priv)
* @vid: the VLAN ID of the frame
*
* Checks if this packet is a loop detect frame which has been sent by us,
- * throw an uevent and log the event if that is the case.
+ * throws an uevent and logs the event if that is the case.
*
* Return: true if it is a loop detect frame which is to be dropped, false
* otherwise.
@@ -1815,7 +1815,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb,
* * we have to race for a claim
* * if the frame is allowed on the LAN
*
- * in these cases, the skb is further handled by this function
+ * In these cases, the skb is further handled by this function
*
* Return: true if handled, otherwise it returns false and the caller shall
* further process the skb.
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index b85da4b7a77b..0e6e53e9b5f3 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -666,7 +666,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst,
* @vid: VLAN identifier
* @packet_subtype: unicast4addr packet subtype to use
*
- * This function copies the skb with pskb_copy() and is sent as unicast packet
+ * This function copies the skb with pskb_copy() and is sent as a unicast packet
* to each of the selected candidates.
*
* Return: true if the packet is sent to at least one candidate, false
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 7cad97644d05..9fdbe3068153 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -102,8 +102,8 @@ static int batadv_frag_size_limit(void)
*
* Caller must hold chain->lock.
*
- * Return: true if chain is empty and caller can just insert the new fragment
- * without searching for the right position.
+ * Return: true if chain is empty and the caller can just insert the new
+ * fragment without searching for the right position.
*/
static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain,
u16 seqno)
@@ -306,7 +306,7 @@ free:
* set *skb to merged packet; 2) Packet is buffered: Return true and set *skb
* to NULL; 3) Error: Return false and free skb.
*
- * Return: true when packet is merged or buffered, false when skb is not not
+ * Return: true when the packet is merged or buffered, false when skb is not not
* used.
*/
bool batadv_frag_skb_buffer(struct sk_buff **skb,
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 3a256af92784..fa06b51c0144 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -138,10 +138,10 @@ static bool batadv_mutual_parents(const struct net_device *dev1,
* @net_dev: the device to check
*
* If the user creates any virtual device on top of a batman-adv interface, it
- * is important to prevent this new interface to be used to create a new mesh
- * network (this behaviour would lead to a batman-over-batman configuration).
- * This function recursively checks all the fathers of the device passed as
- * argument looking for a batman-adv soft interface.
+ * is important to prevent this new interface from being used to create a new
+ * mesh network (this behaviour would lead to a batman-over-batman
+ * configuration). This function recursively checks all the fathers of the
+ * device passed as argument looking for a batman-adv soft interface.
*
* Return: true if the device is descendant of a batman-adv mesh interface (or
* if it is a batman-adv interface itself), false otherwise
@@ -680,8 +680,8 @@ batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface)
* @slave: the interface enslaved in another master
* @master: the master from which slave has to be removed
*
- * Invoke ndo_del_slave on master passing slave as argument. In this way slave
- * is free'd and master can correctly change its internal state.
+ * Invoke ndo_del_slave on master passing slave as argument. In this way the
+ * slave is free'd and the master can correctly change its internal state.
*
* Return: 0 on success, a negative value representing the error otherwise
*/
@@ -818,7 +818,7 @@ err:
* @soft_iface: soft interface to check
*
* This function is only using RCU for locking - the result can therefore be
- * off when another functions is modifying the list at the same time. The
+ * off when another function is modifying the list at the same time. The
* caller can use the rtnl_lock to make sure that the count is accurate.
*
* Return: number of connected/enslaved hard interfaces
@@ -939,6 +939,8 @@ batadv_hardif_add_interface(struct net_device *net_dev)
if (batadv_is_wifi_hardif(hard_iface))
hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS;
+ atomic_set(&hard_iface->hop_penalty, 0);
+
batadv_v_hardif_init(hard_iface);
batadv_check_known_mac_addr(hard_iface->net_dev);
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
index f9884dc56cf3..979864c0fa6b 100644
--- a/net/batman-adv/log.h
+++ b/net/batman-adv/log.h
@@ -69,7 +69,7 @@ int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
__printf(2, 3);
/**
- * _batadv_dbg() - Store debug output with(out) ratelimiting
+ * _batadv_dbg() - Store debug output with(out) rate limiting
* @type: type of debug message
* @bat_priv: the bat priv with all the soft interface information
* @ratelimited: whether output should be rate limited
@@ -95,7 +95,7 @@ static inline void _batadv_dbg(int type __always_unused,
#endif
/**
- * batadv_dbg() - Store debug output without ratelimiting
+ * batadv_dbg() - Store debug output without rate limiting
* @type: type of debug message
* @bat_priv: the bat priv with all the soft interface information
* @arg: format string and variable arguments
@@ -104,7 +104,7 @@ static inline void _batadv_dbg(int type __always_unused,
_batadv_dbg(type, bat_priv, 0, ## arg)
/**
- * batadv_dbg_ratelimited() - Store debug output with ratelimiting
+ * batadv_dbg_ratelimited() - Store debug output with rate limiting
* @type: type of debug message
* @bat_priv: the bat priv with all the soft interface information
* @arg: format string and variable arguments
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index d8a255c85e77..519c08c2cfba 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -666,7 +666,7 @@ unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len)
* @vid: the VLAN identifier for which the AP isolation attributed as to be
* looked up
*
- * Return: true if AP isolation is on for the VLAN idenfied by vid, false
+ * Return: true if AP isolation is on for the VLAN identified by vid, false
* otherwise
*/
bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid)
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 61d8dbe8c954..0393bb9ed3d0 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2020.2"
+#define BATADV_SOURCE_VERSION "2020.3"
#endif
/* B.A.T.M.A.N. parameters */
@@ -308,7 +308,7 @@ static inline bool batadv_has_timed_out(unsigned long timestamp,
* @y: value to compare @x against
*
* It handles overflows/underflows and can correctly check for a predecessor
- * unless the variable sequence number has grown by more then
+ * unless the variable sequence number has grown by more than
* 2**(bitwidth(x)-1)-1.
*
* This means that for a u8 with the maximum value 255, it would think:
@@ -330,11 +330,11 @@ static inline bool batadv_has_timed_out(unsigned long timestamp,
/**
* batadv_seq_after() - Checks if a sequence number x is a successor of y
- * @x: potential sucessor of @y
+ * @x: potential successor of @y
* @y: value to compare @x against
*
* It handles overflows/underflows and can correctly check for a successor
- * unless the variable sequence number has grown by more then
+ * unless the variable sequence number has grown by more than
* 2**(bitwidth(x)-1)-1.
*
* This means that for a u8 with the maximum value 255, it would think:
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 9ebdc1e864b9..bdc4a1fba1c6 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -510,7 +510,7 @@ batadv_mcast_mla_softif_get_ipv6(struct net_device *dev,
* the given mcast_list. In general, multicast listeners provided by
* your multicast receiving applications run directly on this node.
*
- * If there is a bridge interface on top of dev, collects from that one
+ * If there is a bridge interface on top of dev, collect from that one
* instead. Just like with IP addresses and routes, multicast listeners
* will(/should) register to the bridge interface instead of an
* enslaved bat0.
@@ -832,8 +832,8 @@ batadv_mcast_bridge_log(struct batadv_priv *bat_priv,
* @bat_priv: the bat priv with all the soft interface information
* @flags: TVLV flags indicating the new multicast state
*
- * Whenever the multicast TVLV flags this nodes announces change this notifies
- * userspace via the 'mcast' log level.
+ * Whenever the multicast TVLV flags this node announces change, this function
+ * should be used to notify userspace about the change.
*/
static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags)
{
@@ -1244,7 +1244,7 @@ batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv)
* @ethhdr: an ethernet header to determine the protocol family from
*
* Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 or
- * BATADV_MCAST_WANT_ALL_IPV6 flag, depending on the provided ethhdr, set and
+ * BATADV_MCAST_WANT_ALL_IPV6 flag, depending on the provided ethhdr, sets and
* increases its refcount.
*/
static struct batadv_orig_node *
@@ -1693,7 +1693,7 @@ batadv_mcast_forw_want_rtr(struct batadv_priv *bat_priv,
}
/**
- * batadv_mcast_forw_send() - send packet to any detected multicast recpient
+ * batadv_mcast_forw_send() - send packet to any detected multicast recipient
* @bat_priv: the bat priv with all the soft interface information
* @skb: the multicast packet to transmit
* @vid: the vlan identifier
@@ -1742,7 +1742,8 @@ int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb,
* @mcast_flags: flags indicating the new multicast state
*
* If the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag of this originator,
- * orig, has toggled then this method updates counter and list accordingly.
+ * orig, has toggled then this method updates the counter and the list
+ * accordingly.
*
* Caller needs to hold orig->mcast_handler_lock.
*/
@@ -1787,7 +1788,7 @@ static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv,
* @mcast_flags: flags indicating the new multicast state
*
* If the BATADV_MCAST_WANT_ALL_IPV4 flag of this originator, orig, has
- * toggled then this method updates counter and list accordingly.
+ * toggled then this method updates the counter and the list accordingly.
*
* Caller needs to hold orig->mcast_handler_lock.
*/
@@ -1832,7 +1833,7 @@ static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv,
* @mcast_flags: flags indicating the new multicast state
*
* If the BATADV_MCAST_WANT_ALL_IPV6 flag of this originator, orig, has
- * toggled then this method updates counter and list accordingly.
+ * toggled then this method updates the counter and the list accordingly.
*
* Caller needs to hold orig->mcast_handler_lock.
*/
@@ -1877,7 +1878,7 @@ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv,
* @mcast_flags: flags indicating the new multicast state
*
* If the BATADV_MCAST_WANT_NO_RTR4 flag of this originator, orig, has
- * toggled then this method updates counter and list accordingly.
+ * toggled then this method updates the counter and the list accordingly.
*
* Caller needs to hold orig->mcast_handler_lock.
*/
@@ -1922,7 +1923,7 @@ static void batadv_mcast_want_rtr4_update(struct batadv_priv *bat_priv,
* @mcast_flags: flags indicating the new multicast state
*
* If the BATADV_MCAST_WANT_NO_RTR6 flag of this originator, orig, has
- * toggled then this method updates counter and list accordingly.
+ * toggled then this method updates the counter and the list accordingly.
*
* Caller needs to hold orig->mcast_handler_lock.
*/
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 02ed073f95a9..dc193618a761 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -640,7 +640,7 @@ batadv_netlink_tp_meter_put(struct sk_buff *msg, u32 cookie)
* @bat_priv: the bat priv with all the soft interface information
* @dst: destination of tp_meter session
* @result: reason for tp meter session stop
- * @test_time: total time ot the tp_meter session
+ * @test_time: total time of the tp_meter session
* @total_bytes: bytes acked to the receiver
* @cookie: cookie of tp_meter session
*
@@ -826,6 +826,10 @@ static int batadv_netlink_hardif_fill(struct sk_buff *msg,
goto nla_put_failure;
}
+ if (nla_put_u8(msg, BATADV_ATTR_HOP_PENALTY,
+ atomic_read(&hard_iface->hop_penalty)))
+ goto nla_put_failure;
+
#ifdef CONFIG_BATMAN_ADV_BATMAN_V
if (nla_put_u32(msg, BATADV_ATTR_ELP_INTERVAL,
atomic_read(&hard_iface->bat_v.elp_interval)))
@@ -920,9 +924,15 @@ static int batadv_netlink_set_hardif(struct sk_buff *skb,
{
struct batadv_hard_iface *hard_iface = info->user_ptr[1];
struct batadv_priv *bat_priv = info->user_ptr[0];
+ struct nlattr *attr;
+
+ if (info->attrs[BATADV_ATTR_HOP_PENALTY]) {
+ attr = info->attrs[BATADV_ATTR_HOP_PENALTY];
+
+ atomic_set(&hard_iface->hop_penalty, nla_get_u8(attr));
+ }
#ifdef CONFIG_BATMAN_ADV_BATMAN_V
- struct nlattr *attr;
if (info->attrs[BATADV_ATTR_ELP_INTERVAL]) {
attr = info->attrs[BATADV_ATTR_ELP_INTERVAL];
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index b0469d15da0e..48d707850f3e 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -134,7 +134,7 @@ static void batadv_nc_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
}
/**
- * batadv_nc_mesh_init() - initialise coding hash table and start house keeping
+ * batadv_nc_mesh_init() - initialise coding hash table and start housekeeping
* @bat_priv: the bat priv with all the soft interface information
*
* Return: 0 on success or negative error number in case of failure
@@ -700,7 +700,7 @@ batadv_nc_process_nc_paths(struct batadv_priv *bat_priv,
}
/**
- * batadv_nc_worker() - periodic task for house keeping related to network
+ * batadv_nc_worker() - periodic task for housekeeping related to network
* coding
* @work: kernel work struct
*/
@@ -1316,7 +1316,7 @@ batadv_nc_path_search(struct batadv_priv *bat_priv,
}
/**
- * batadv_nc_skb_src_search() - Loops through the list of neighoring nodes of
+ * batadv_nc_skb_src_search() - Loops through the list of neighboring nodes of
* the skb's sender (may be equal to the originator).
* @bat_priv: the bat priv with all the soft interface information
* @skb: data skb to forward
@@ -1402,10 +1402,10 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv,
* @neigh_node: next hop to forward packet to
* @ethhdr: pointer to the ethernet header inside the skb
*
- * Loops through list of neighboring nodes the next hop has a good connection to
- * (receives OGMs with a sufficient quality). We need to find a neighbor of our
- * next hop that potentially sent a packet which our next hop also received
- * (overheard) and has stored for later decoding.
+ * Loops through the list of neighboring nodes the next hop has a good
+ * connection to (receives OGMs with a sufficient quality). We need to find a
+ * neighbor of our next hop that potentially sent a packet which our next hop
+ * also received (overheard) and has stored for later decoding.
*
* Return: true if the skb was consumed (encoded packet sent) or false otherwise
*/
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 5b0c2fffc214..805d8969bdfb 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -325,7 +325,7 @@ void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node)
* @if_outgoing: the interface where the payload packet has been received or
* the OGM should be sent to
*
- * Return: the neighbor which should be router for this orig_node/iface.
+ * Return: the neighbor which should be the router for this orig_node/iface.
*
* The object is returned with refcounter increased by 1.
*/
@@ -515,7 +515,7 @@ out:
* Looks for and possibly returns a neighbour belonging to this originator list
* which is connected through the provided hard interface.
*
- * Return: neighbor when found. Othwerwise NULL
+ * Return: neighbor when found. Otherwise NULL
*/
static struct batadv_neigh_node *
batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
@@ -620,7 +620,7 @@ batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface,
*
* Looks for and possibly returns a neighbour belonging to this hard interface.
*
- * Return: neighbor when found. Othwerwise NULL
+ * Return: neighbor when found. Otherwise NULL
*/
struct batadv_hardif_neigh_node *
batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
@@ -999,7 +999,7 @@ void batadv_originator_free(struct batadv_priv *bat_priv)
* @bat_priv: the bat priv with all the soft interface information
* @addr: the mac address of the originator
*
- * Creates a new originator object and initialise all the generic fields.
+ * Creates a new originator object and initialises all the generic fields.
* The new object is not added to the originator list.
*
* Return: the newly created object or NULL on failure.
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index d343382e9664..27cdf5e4349a 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -449,7 +449,7 @@ free_skb:
* @skb: packet to check
* @hdr_size: size of header to pull
*
- * Check for short header and bad addresses in given packet.
+ * Checks for short header and bad addresses in the given packet.
*
* Return: negative value when check fails and 0 otherwise. The negative value
* depends on the reason: -ENODATA for bad header, -EBADR for broadcast
@@ -1113,7 +1113,7 @@ free_skb:
* @recv_if: interface that the skb is received on
*
* This function does one of the three following things: 1) Forward fragment, if
- * the assembled packet will exceed our MTU; 2) Buffer fragment, if we till
+ * the assembled packet will exceed our MTU; 2) Buffer fragment, if we still
* lack further fragments; 3) Merge fragments, if we have all needed parts.
*
* Return: NET_RX_DROP if the skb is not consumed, NET_RX_SUCCESS otherwise.
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 7f8ade04e08e..d267b94800d6 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -605,8 +605,8 @@ bool batadv_forw_packet_steal(struct batadv_forw_packet *forw_packet,
* given hard_iface. If hard_iface is NULL forwarding packets on all hard
* interfaces will be claimed.
*
- * The packets are being moved from the forw_list to the cleanup_list and
- * by that allows already running threads to notice the claiming.
+ * The packets are being moved from the forw_list to the cleanup_list. This
+ * makes it possible for already running threads to notice the claim.
*/
static void
batadv_forw_packet_list_steal(struct hlist_head *forw_list,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index f1f1c86f3419..23833a0ba5e6 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -406,7 +406,7 @@ end:
* @hdr_size: size of already parsed batman-adv header
* @orig_node: originator from which the batman-adv packet was sent
*
- * Sends a ethernet frame to the receive path of the local @soft_iface.
+ * Sends an ethernet frame to the receive path of the local @soft_iface.
* skb->data has still point to the batman-adv header with the size @hdr_size.
* The caller has to have parsed this header already and made sure that at least
* @hdr_size bytes are still available for pull in @skb.
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index bd2ac570c42c..db7e3774825b 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -66,7 +66,7 @@
/**
* BATADV_TP_MAX_RTO - Maximum sender timeout. If the sender RTO gets beyond
- * such amound of milliseconds, the receiver is considered unreachable and the
+ * such amount of milliseconds, the receiver is considered unreachable and the
* connection is killed
*/
#define BATADV_TP_MAX_RTO 30000
@@ -108,10 +108,10 @@ static u32 batadv_tp_session_cookie(const u8 session[2], u8 icmp_uid)
* batadv_tp_cwnd() - compute the new cwnd size
* @base: base cwnd size value
* @increment: the value to add to base to get the new size
- * @min: minumim cwnd value (usually MSS)
+ * @min: minimum cwnd value (usually MSS)
*
- * Return the new cwnd size and ensures it does not exceed the Advertised
- * Receiver Window size. It is wrap around safe.
+ * Return the new cwnd size and ensure it does not exceed the Advertised
+ * Receiver Window size. It is wrapped around safely.
* For details refer to Section 3.1 of RFC5681
*
* Return: new congestion window size in bytes
@@ -254,7 +254,7 @@ static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason,
* @dst: the other endpoint MAC address to look for
*
* Look for a tp_vars object matching dst as end_point and return it after
- * having incremented the refcounter. Return NULL is not found
+ * having increment the refcounter. Return NULL is not found
*
* Return: matching tp_vars or NULL when no tp_vars with @dst was found
*/
@@ -291,7 +291,7 @@ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv,
* @session: session identifier
*
* Look for a tp_vars object matching dst as end_point, session as tp meter
- * session and return it after having incremented the refcounter. Return NULL
+ * session and return it after having increment the refcounter. Return NULL
* is not found
*
* Return: matching tp_vars or NULL when no tp_vars was found
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index a9635c882fe0..98a0aaaf0d50 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -301,7 +301,7 @@ void batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry)
* @vid: VLAN identifier
*
* Return: the number of originators advertising the given address/data
- * (excluding ourself).
+ * (excluding our self).
*/
int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
const u8 *addr, unsigned short vid)
@@ -842,7 +842,7 @@ out:
* table. In case of success the value is updated with the real amount of
* reserved bytes
* Allocate the needed amount of memory for the entire TT TVLV and write its
- * header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data
+ * header made up of one tvlv_tt_data object and a series of tvlv_tt_vlan_data
* objects, one per active VLAN served by the originator node.
*
* Return: the size of the allocated buffer or 0 in case of failure.
@@ -1674,7 +1674,7 @@ out:
* the function argument.
* If a TT local entry exists for this non-mesh client remove it.
*
- * The caller must hold orig_node refcount.
+ * The caller must hold the orig_node refcount.
*
* Return: true if the new entry has been added, false otherwise
*/
@@ -1839,7 +1839,7 @@ out:
* @bat_priv: the bat priv with all the soft interface information
* @tt_global_entry: global translation table entry to be analyzed
*
- * This functon assumes the caller holds rcu_read_lock().
+ * This function assumes the caller holds rcu_read_lock().
* Return: best originator list entry or NULL on errors.
*/
static struct batadv_tt_orig_list_entry *
@@ -1887,7 +1887,7 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv,
* @tt_global_entry: global translation table entry to be printed
* @seq: debugfs table seq_file struct
*
- * This functon assumes the caller holds rcu_read_lock().
+ * This function assumes the caller holds rcu_read_lock().
*/
static void
batadv_tt_global_print_entry(struct batadv_priv *bat_priv,
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 0963a43ad996..6a23a566cde1 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -353,8 +353,8 @@ end:
* @tvlv_value: tvlv content
* @tvlv_value_len: tvlv content length
*
- * Return: success if handler was not found or the return value of the handler
- * callback.
+ * Return: success if the handler was not found or the return value of the
+ * handler callback.
*/
static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
struct batadv_tvlv_handler *tvlv_handler,
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index d152b8e81f61..ed519efa3c36 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -208,6 +208,12 @@ struct batadv_hard_iface {
/** @rcu: struct used for freeing in an RCU-safe manner */
struct rcu_head rcu;
+ /**
+ * @hop_penalty: penalty which will be applied to the tq-field
+ * of an OGM received via this interface
+ */
+ atomic_t hop_penalty;
+
/** @bat_iv: per hard-interface B.A.T.M.A.N. IV data */
struct batadv_hard_iface_bat_iv bat_iv;
@@ -455,8 +461,8 @@ struct batadv_orig_node {
spinlock_t tt_buff_lock;
/**
- * @tt_lock: prevents from updating the table while reading it. Table
- * update is made up by two operations (data structure update and
+ * @tt_lock: avoids concurrent read from and write to the table. Table
+ * update is made up of two operations (data structure update and
* metadata -CRC/TTVN-recalculation) and they have to be executed
* atomically in order to avoid another thread to read the
* table/metadata between those.
@@ -748,7 +754,7 @@ struct batadv_neigh_ifinfo {
* struct batadv_bcast_duplist_entry - structure for LAN broadcast suppression
*/
struct batadv_bcast_duplist_entry {
- /** @orig: mac address of orig node orginating the broadcast */
+ /** @orig: mac address of orig node originating the broadcast */
u8 orig[ETH_ALEN];
/** @crc: crc32 checksum of broadcast payload */
@@ -1010,7 +1016,7 @@ struct batadv_priv_tt {
/**
* @commit_lock: prevents from executing a local TT commit while reading
- * the local table. The local TT commit is made up by two operations
+ * the local table. The local TT commit is made up of two operations
* (data structure update and metadata -CRC/TTVN- recalculation) and
* they have to be executed atomically in order to avoid another thread
* to read the table/metadata between those.
@@ -1024,7 +1030,7 @@ struct batadv_priv_tt {
#ifdef CONFIG_BATMAN_ADV_BLA
/**
- * struct batadv_priv_bla - per mesh interface bridge loope avoidance data
+ * struct batadv_priv_bla - per mesh interface bridge loop avoidance data
*/
struct batadv_priv_bla {
/** @num_requests: number of bla requests in flight */
@@ -1718,7 +1724,7 @@ struct batadv_priv {
spinlock_t softif_vlan_list_lock;
#ifdef CONFIG_BATMAN_ADV_BLA
- /** @bla: bridge loope avoidance data */
+ /** @bla: bridge loop avoidance data */
struct batadv_priv_bla bla;
#endif
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 4877a0db16c6..9db504baa094 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -349,12 +349,21 @@ void br_fdb_cleanup(struct work_struct *work)
*/
rcu_read_lock();
hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
- unsigned long this_timer;
+ unsigned long this_timer = f->updated + delay;
if (test_bit(BR_FDB_STATIC, &f->flags) ||
- test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags))
+ test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags)) {
+ if (test_bit(BR_FDB_NOTIFY, &f->flags)) {
+ if (time_after(this_timer, now))
+ work_delay = min(work_delay,
+ this_timer - now);
+ else if (!test_and_set_bit(BR_FDB_NOTIFY_INACTIVE,
+ &f->flags))
+ fdb_notify(br, f, RTM_NEWNEIGH, false);
+ }
continue;
- this_timer = f->updated + delay;
+ }
+
if (time_after(this_timer, now)) {
work_delay = min(work_delay, this_timer - now);
} else {
@@ -556,11 +565,17 @@ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
return ret;
}
+/* returns true if the fdb was modified */
+static bool __fdb_mark_active(struct net_bridge_fdb_entry *fdb)
+{
+ return !!(test_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags) &&
+ test_and_clear_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags));
+}
+
void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid, unsigned long flags)
{
struct net_bridge_fdb_entry *fdb;
- bool fdb_modified = false;
/* some users want to always flood. */
if (hold_time(br) == 0)
@@ -575,6 +590,12 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
source->dev->name, addr, vid);
} else {
unsigned long now = jiffies;
+ bool fdb_modified = false;
+
+ if (now != fdb->updated) {
+ fdb->updated = now;
+ fdb_modified = __fdb_mark_active(fdb);
+ }
/* fastpath: update of existing entry */
if (unlikely(source != fdb->dst &&
@@ -587,8 +608,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
clear_bit(BR_FDB_ADDED_BY_EXT_LEARN,
&fdb->flags);
}
- if (now != fdb->updated)
- fdb->updated = now;
+
if (unlikely(test_bit(BR_FDB_ADDED_BY_USER, &flags)))
set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
if (unlikely(fdb_modified)) {
@@ -667,6 +687,23 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
&fdb->key.vlan_id))
goto nla_put_failure;
+ if (test_bit(BR_FDB_NOTIFY, &fdb->flags)) {
+ struct nlattr *nest = nla_nest_start(skb, NDA_FDB_EXT_ATTRS);
+ u8 notify_bits = FDB_NOTIFY_BIT;
+
+ if (!nest)
+ goto nla_put_failure;
+ if (test_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags))
+ notify_bits |= FDB_NOTIFY_INACTIVE_BIT;
+
+ if (nla_put_u8(skb, NFEA_ACTIVITY_NOTIFY, notify_bits)) {
+ nla_nest_cancel(skb, nest);
+ goto nla_put_failure;
+ }
+
+ nla_nest_end(skb, nest);
+ }
+
nlmsg_end(skb, nlh);
return 0;
@@ -681,7 +718,9 @@ static inline size_t fdb_nlmsg_size(void)
+ nla_total_size(ETH_ALEN) /* NDA_LLADDR */
+ nla_total_size(sizeof(u32)) /* NDA_MASTER */
+ nla_total_size(sizeof(u16)) /* NDA_VLAN */
- + nla_total_size(sizeof(struct nda_cacheinfo));
+ + nla_total_size(sizeof(struct nda_cacheinfo))
+ + nla_total_size(0) /* NDA_FDB_EXT_ATTRS */
+ + nla_total_size(sizeof(u8)); /* NFEA_ACTIVITY_NOTIFY */
}
static void fdb_notify(struct net_bridge *br,
@@ -791,14 +830,41 @@ errout:
return err;
}
+/* returns true if the fdb is modified */
+static bool fdb_handle_notify(struct net_bridge_fdb_entry *fdb, u8 notify)
+{
+ bool modified = false;
+
+ /* allow to mark an entry as inactive, usually done on creation */
+ if ((notify & FDB_NOTIFY_INACTIVE_BIT) &&
+ !test_and_set_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags))
+ modified = true;
+
+ if ((notify & FDB_NOTIFY_BIT) &&
+ !test_and_set_bit(BR_FDB_NOTIFY, &fdb->flags)) {
+ /* enabled activity tracking */
+ modified = true;
+ } else if (!(notify & FDB_NOTIFY_BIT) &&
+ test_and_clear_bit(BR_FDB_NOTIFY, &fdb->flags)) {
+ /* disabled activity tracking, clear notify state */
+ clear_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags);
+ modified = true;
+ }
+
+ return modified;
+}
+
/* Update (create or replace) forwarding database entry */
static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
- const u8 *addr, u16 state, u16 flags, u16 vid,
- u8 ndm_flags)
+ const u8 *addr, struct ndmsg *ndm, u16 flags, u16 vid,
+ struct nlattr *nfea_tb[])
{
- bool is_sticky = !!(ndm_flags & NTF_STICKY);
+ bool is_sticky = !!(ndm->ndm_flags & NTF_STICKY);
+ bool refresh = !nfea_tb[NFEA_DONT_REFRESH];
struct net_bridge_fdb_entry *fdb;
+ u16 state = ndm->ndm_state;
bool modified = false;
+ u8 notify = 0;
/* If the port cannot learn allow only local and static entries */
if (source && !(state & NUD_PERMANENT) && !(state & NUD_NOARP) &&
@@ -815,6 +881,13 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
if (is_sticky && (state & NUD_PERMANENT))
return -EINVAL;
+ if (nfea_tb[NFEA_ACTIVITY_NOTIFY]) {
+ notify = nla_get_u8(nfea_tb[NFEA_ACTIVITY_NOTIFY]);
+ if ((notify & ~BR_FDB_NOTIFY_SETTABLE_BITS) ||
+ (notify & BR_FDB_NOTIFY_SETTABLE_BITS) == FDB_NOTIFY_INACTIVE_BIT)
+ return -EINVAL;
+ }
+
fdb = br_fdb_find(br, addr, vid);
if (fdb == NULL) {
if (!(flags & NLM_F_CREATE))
@@ -858,11 +931,15 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
modified = true;
}
+ if (fdb_handle_notify(fdb, notify))
+ modified = true;
+
set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
fdb->used = jiffies;
if (modified) {
- fdb->updated = jiffies;
+ if (refresh)
+ fdb->updated = jiffies;
fdb_notify(br, fdb, RTM_NEWNEIGH, true);
}
@@ -871,7 +948,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
struct net_bridge_port *p, const unsigned char *addr,
- u16 nlh_flags, u16 vid)
+ u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[])
{
int err = 0;
@@ -893,20 +970,25 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
err = br_fdb_external_learn_add(br, p, addr, vid, true);
} else {
spin_lock_bh(&br->hash_lock);
- err = fdb_add_entry(br, p, addr, ndm->ndm_state,
- nlh_flags, vid, ndm->ndm_flags);
+ err = fdb_add_entry(br, p, addr, ndm, nlh_flags, vid, nfea_tb);
spin_unlock_bh(&br->hash_lock);
}
return err;
}
+static const struct nla_policy br_nda_fdb_pol[NFEA_MAX + 1] = {
+ [NFEA_ACTIVITY_NOTIFY] = { .type = NLA_U8 },
+ [NFEA_DONT_REFRESH] = { .type = NLA_FLAG },
+};
+
/* Add new permanent fdb entry with RTM_NEWNEIGH */
int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr, u16 vid, u16 nlh_flags,
struct netlink_ext_ack *extack)
{
+ struct nlattr *nfea_tb[NFEA_MAX + 1], *attr;
struct net_bridge_vlan_group *vg;
struct net_bridge_port *p = NULL;
struct net_bridge_vlan *v;
@@ -939,6 +1021,16 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
vg = nbp_vlan_group(p);
}
+ if (tb[NDA_FDB_EXT_ATTRS]) {
+ attr = tb[NDA_FDB_EXT_ATTRS];
+ err = nla_parse_nested(nfea_tb, NFEA_MAX, attr,
+ br_nda_fdb_pol, extack);
+ if (err)
+ return err;
+ } else {
+ memset(nfea_tb, 0, sizeof(struct nlattr *) * (NFEA_MAX + 1));
+ }
+
if (vid) {
v = br_vlan_find(vg, vid);
if (!v || !br_vlan_should_use(v)) {
@@ -947,9 +1039,9 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
}
/* VID was specified, so use it. */
- err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid);
+ err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb);
} else {
- err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0);
+ err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb);
if (err || !vg || !vg->num_vlans)
goto out;
@@ -960,7 +1052,8 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
list_for_each_entry(v, &vg->vlan_list, vlist) {
if (!br_vlan_should_use(v))
continue;
- err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid);
+ err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid,
+ nfea_tb);
if (err)
goto out;
}
diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c
index 24986ec7d38c..779e1eb75443 100644
--- a/net/bridge/br_mrp.c
+++ b/net/bridge/br_mrp.c
@@ -411,10 +411,16 @@ int br_mrp_set_port_role(struct net_bridge_port *p,
if (!mrp)
return -EINVAL;
- if (role == BR_MRP_PORT_ROLE_PRIMARY)
+ switch (role) {
+ case BR_MRP_PORT_ROLE_PRIMARY:
rcu_assign_pointer(mrp->p_port, p);
- else
+ break;
+ case BR_MRP_PORT_ROLE_SECONDARY:
rcu_assign_pointer(mrp->s_port, p);
+ break;
+ default:
+ return -EINVAL;
+ }
br_mrp_port_switchdev_set_role(p, role);
diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c
index 34b3a8776991..c4f5c356811f 100644
--- a/net/bridge/br_mrp_netlink.c
+++ b/net/bridge/br_mrp_netlink.c
@@ -304,6 +304,70 @@ int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p,
return 0;
}
+int br_mrp_fill_info(struct sk_buff *skb, struct net_bridge *br)
+{
+ struct nlattr *tb, *mrp_tb;
+ struct br_mrp *mrp;
+
+ mrp_tb = nla_nest_start_noflag(skb, IFLA_BRIDGE_MRP);
+ if (!mrp_tb)
+ return -EMSGSIZE;
+
+ list_for_each_entry_rcu(mrp, &br->mrp_list, list) {
+ struct net_bridge_port *p;
+
+ tb = nla_nest_start_noflag(skb, IFLA_BRIDGE_MRP_INFO);
+ if (!tb)
+ goto nla_info_failure;
+
+ if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_RING_ID,
+ mrp->ring_id))
+ goto nla_put_failure;
+
+ p = rcu_dereference(mrp->p_port);
+ if (p && nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_P_IFINDEX,
+ p->dev->ifindex))
+ goto nla_put_failure;
+
+ p = rcu_dereference(mrp->s_port);
+ if (p && nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_S_IFINDEX,
+ p->dev->ifindex))
+ goto nla_put_failure;
+
+ if (nla_put_u16(skb, IFLA_BRIDGE_MRP_INFO_PRIO,
+ mrp->prio))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_RING_STATE,
+ mrp->ring_state))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_RING_ROLE,
+ mrp->ring_role))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_TEST_INTERVAL,
+ mrp->test_interval))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_TEST_MAX_MISS,
+ mrp->test_max_miss))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_TEST_MONITOR,
+ mrp->test_monitor))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, tb);
+ }
+ nla_nest_end(skb, mrp_tb);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, tb);
+
+nla_info_failure:
+ nla_nest_cancel(skb, mrp_tb);
+
+ return -EMSGSIZE;
+}
+
int br_mrp_port_open(struct net_device *dev, u8 loc)
{
struct net_bridge_port *p;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 240e260e3461..c532fa65c983 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -453,6 +453,28 @@ static int br_fill_ifinfo(struct sk_buff *skb,
rcu_read_unlock();
if (err)
goto nla_put_failure;
+
+ nla_nest_end(skb, af);
+ }
+
+ if (filter_mask & RTEXT_FILTER_MRP) {
+ struct nlattr *af;
+ int err;
+
+ if (!br_mrp_enabled(br) || port)
+ goto done;
+
+ af = nla_nest_start_noflag(skb, IFLA_AF_SPEC);
+ if (!af)
+ goto nla_put_failure;
+
+ rcu_read_lock();
+ err = br_mrp_fill_info(skb, br);
+ rcu_read_unlock();
+
+ if (err)
+ goto nla_put_failure;
+
nla_nest_end(skb, af);
}
@@ -516,7 +538,8 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
struct net_bridge_port *port = br_port_get_rtnl(dev);
if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN) &&
- !(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED))
+ !(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) &&
+ !(filter_mask & RTEXT_FILTER_MRP))
return 0;
return br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, nlflags,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 7501be4eeba0..65d2c163a24a 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -48,6 +48,8 @@ enum {
/* Path to usermode spanning tree program */
#define BR_STP_PROG "/sbin/bridge-stp"
+#define BR_FDB_NOTIFY_SETTABLE_BITS (FDB_NOTIFY_BIT | FDB_NOTIFY_INACTIVE_BIT)
+
typedef struct bridge_id bridge_id;
typedef struct mac_addr mac_addr;
typedef __u16 port_id;
@@ -184,6 +186,8 @@ enum {
BR_FDB_ADDED_BY_USER,
BR_FDB_ADDED_BY_EXT_LEARN,
BR_FDB_OFFLOADED,
+ BR_FDB_NOTIFY,
+ BR_FDB_NOTIFY_INACTIVE
};
struct net_bridge_fdb_key {
@@ -217,8 +221,8 @@ struct net_bridge_port_group {
struct rcu_head rcu;
struct timer_list timer;
struct br_ip addr;
+ unsigned char eth_addr[ETH_ALEN] __aligned(2);
unsigned char flags;
- unsigned char eth_addr[ETH_ALEN];
};
struct net_bridge_mdb_entry {
@@ -1313,6 +1317,7 @@ int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p,
int br_mrp_process(struct net_bridge_port *p, struct sk_buff *skb);
bool br_mrp_enabled(struct net_bridge *br);
void br_mrp_port_del(struct net_bridge *br, struct net_bridge_port *p);
+int br_mrp_fill_info(struct sk_buff *skb, struct net_bridge *br);
#else
static inline int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p,
struct nlattr *attr, int cmd,
@@ -1335,6 +1340,12 @@ static inline void br_mrp_port_del(struct net_bridge *br,
struct net_bridge_port *p)
{
}
+
+static inline int br_mrp_fill_info(struct sk_buff *skb, struct net_bridge *br)
+{
+ return 0;
+}
+
#endif
/* br_netlink.c */
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
index 7c9e92b2f806..8e8ffac037cd 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -155,3 +155,4 @@ module_exit(nft_meta_bridge_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("wenxu <wenxu@ucloud.cn>");
MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "meta");
+MODULE_DESCRIPTION("Support for bridge dedicated meta key");
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index f48cf4cfb80f..deae2c9a0f69 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -455,3 +455,4 @@ module_exit(nft_reject_bridge_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "reject");
+MODULE_DESCRIPTION("Reject packets from bridge via nftables");
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index afe0e8184c23..4e7edd707a14 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -332,6 +332,7 @@ struct ceph_options *ceph_alloc_options(void)
opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT;
+ opt->read_from_replica = CEPH_READ_FROM_REPLICA_DEFAULT;
return opt;
}
EXPORT_SYMBOL(ceph_alloc_options);
@@ -490,16 +491,13 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
case Opt_read_from_replica:
switch (result.uint_32) {
case Opt_read_from_replica_no:
- opt->osd_req_flags &= ~(CEPH_OSD_FLAG_BALANCE_READS |
- CEPH_OSD_FLAG_LOCALIZE_READS);
+ opt->read_from_replica = 0;
break;
case Opt_read_from_replica_balance:
- opt->osd_req_flags |= CEPH_OSD_FLAG_BALANCE_READS;
- opt->osd_req_flags &= ~CEPH_OSD_FLAG_LOCALIZE_READS;
+ opt->read_from_replica = CEPH_OSD_FLAG_BALANCE_READS;
break;
case Opt_read_from_replica_localize:
- opt->osd_req_flags |= CEPH_OSD_FLAG_LOCALIZE_READS;
- opt->osd_req_flags &= ~CEPH_OSD_FLAG_BALANCE_READS;
+ opt->read_from_replica = CEPH_OSD_FLAG_LOCALIZE_READS;
break;
default:
BUG();
@@ -613,9 +611,9 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
}
seq_putc(m, ',');
}
- if (opt->osd_req_flags & CEPH_OSD_FLAG_BALANCE_READS) {
+ if (opt->read_from_replica == CEPH_OSD_FLAG_BALANCE_READS) {
seq_puts(m, "read_from_replica=balance,");
- } else if (opt->osd_req_flags & CEPH_OSD_FLAG_LOCALIZE_READS) {
+ } else if (opt->read_from_replica == CEPH_OSD_FLAG_LOCALIZE_READS) {
seq_puts(m, "read_from_replica=localize,");
}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 4fea3c33af2a..2db8b44e70c2 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -445,8 +445,10 @@ static void target_copy(struct ceph_osd_request_target *dest,
dest->size = src->size;
dest->min_size = src->min_size;
dest->sort_bitwise = src->sort_bitwise;
+ dest->recovery_deletes = src->recovery_deletes;
dest->flags = src->flags;
+ dest->used_replica = src->used_replica;
dest->paused = src->paused;
dest->epoch = src->epoch;
@@ -1117,10 +1119,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
truncate_size, truncate_seq);
}
- req->r_flags = flags;
req->r_base_oloc.pool = layout->pool_id;
req->r_base_oloc.pool_ns = ceph_try_get_string(layout->pool_ns);
ceph_oid_printf(&req->r_base_oid, "%llx.%08llx", vino.ino, objnum);
+ req->r_flags = flags | osdc->client->options->read_from_replica;
req->r_snapid = vino.snap;
if (flags & CEPH_OSD_FLAG_WRITE)
@@ -2431,14 +2433,11 @@ promote:
static void account_request(struct ceph_osd_request *req)
{
- struct ceph_osd_client *osdc = req->r_osdc;
-
WARN_ON(req->r_flags & (CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK));
WARN_ON(!(req->r_flags & (CEPH_OSD_FLAG_READ | CEPH_OSD_FLAG_WRITE)));
req->r_flags |= CEPH_OSD_FLAG_ONDISK;
- req->r_flags |= osdc->client->options->osd_req_flags;
- atomic_inc(&osdc->num_requests);
+ atomic_inc(&req->r_osdc->num_requests);
req->r_start_stamp = jiffies;
req->r_start_latency = ktime_get();
diff --git a/net/core/dev.c b/net/core/dev.c
index 6bc2388141f6..c02bae927812 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -143,6 +143,7 @@
#include <linux/net_namespace.h>
#include <linux/indirect_call_wrapper.h>
#include <net/devlink.h>
+#include <linux/pm_runtime.h>
#include "net-sysfs.h"
@@ -1492,8 +1493,13 @@ static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
ASSERT_RTNL();
- if (!netif_device_present(dev))
- return -ENODEV;
+ if (!netif_device_present(dev)) {
+ /* may be detached because parent is runtime-suspended */
+ if (dev->dev.parent)
+ pm_runtime_resume(dev->dev.parent);
+ if (!netif_device_present(dev))
+ return -ENODEV;
+ }
/* Block netpoll from trying to do any rx path servicing.
* If we don't do this there is a chance ndo_poll_controller
@@ -3743,7 +3749,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_calculate_pkt_len(skb, q);
if (q->flags & TCQ_F_NOLOCK) {
- rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+ rc = q->enqueue(skb, q, NULL, &to_free) & NET_XMIT_MASK;
qdisc_run(q);
if (unlikely(to_free))
@@ -3786,7 +3792,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_run_end(q);
rc = NET_XMIT_SUCCESS;
} else {
- rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+ rc = q->enqueue(skb, q, root_lock, &to_free) & NET_XMIT_MASK;
if (qdisc_run_begin(q)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
@@ -4192,10 +4198,12 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
local_bh_disable();
+ dev_xmit_recursion_inc();
HARD_TX_LOCK(dev, txq, smp_processor_id());
if (!netif_xmit_frozen_or_drv_stopped(txq))
ret = netdev_start_xmit(skb, dev, txq, false);
HARD_TX_UNLOCK(dev, txq);
+ dev_xmit_recursion_dec();
local_bh_enable();
@@ -6683,7 +6691,9 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
trace_napi_poll(n, work, weight);
}
- WARN_ON_ONCE(work > weight);
+ if (unlikely(work > weight))
+ pr_err_once("NAPI poll function %pS returned %d, exceeding its budget of %d.\n",
+ n->poll, work, weight);
if (likely(work < weight))
goto out_unlock;
@@ -9547,6 +9557,13 @@ int register_netdevice(struct net_device *dev)
rcu_barrier();
dev->reg_state = NETREG_UNREGISTERED;
+ /* We should put the kobject that hold in
+ * netdev_unregister_kobject(), otherwise
+ * the net device cannot be freed when
+ * driver calls free_netdev(), because the
+ * kobject is being hold.
+ */
+ kobject_put(&dev->dev.kobj);
}
/*
* Prevent userspace races by waiting until the network
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 2cafbc808b09..6ae36808c152 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -85,6 +85,10 @@ EXPORT_SYMBOL(devlink_dpipe_header_ipv6);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
+static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = {
+ [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY },
+};
+
static LIST_HEAD(devlink_list);
/* devlink_mutex
@@ -563,10 +567,54 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
return 0;
}
+static int
+devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink *devlink = port->devlink;
+ const struct devlink_ops *ops;
+ struct nlattr *function_attr;
+ bool empty_nest = true;
+ int err = 0;
+
+ function_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PORT_FUNCTION);
+ if (!function_attr)
+ return -EMSGSIZE;
+
+ ops = devlink->ops;
+ if (ops->port_function_hw_addr_get) {
+ int hw_addr_len;
+ u8 hw_addr[MAX_ADDR_LEN];
+
+ err = ops->port_function_hw_addr_get(devlink, port, hw_addr, &hw_addr_len, extack);
+ if (err == -EOPNOTSUPP) {
+ /* Port function attributes are optional for a port. If port doesn't
+ * support function attribute, returning -EOPNOTSUPP is not an error.
+ */
+ err = 0;
+ goto out;
+ } else if (err) {
+ goto out;
+ }
+ err = nla_put(msg, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, hw_addr_len, hw_addr);
+ if (err)
+ goto out;
+ empty_nest = false;
+ }
+
+out:
+ if (err || empty_nest)
+ nla_nest_cancel(msg, function_attr);
+ else
+ nla_nest_end(msg, function_attr);
+ return err;
+}
+
static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
struct devlink_port *devlink_port,
enum devlink_command cmd, u32 portid,
- u32 seq, int flags)
+ u32 seq, int flags,
+ struct netlink_ext_ack *extack)
{
void *hdr;
@@ -607,6 +655,8 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
spin_unlock_bh(&devlink_port->type_lock);
if (devlink_nl_port_attrs_put(msg, devlink_port))
goto nla_put_failure;
+ if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack))
+ goto nla_put_failure;
genlmsg_end(msg, hdr);
return 0;
@@ -634,7 +684,8 @@ static void devlink_port_notify(struct devlink_port *devlink_port,
if (!msg)
return;
- err = devlink_nl_port_fill(msg, devlink, devlink_port, cmd, 0, 0, 0);
+ err = devlink_nl_port_fill(msg, devlink, devlink_port, cmd, 0, 0, 0,
+ NULL);
if (err) {
nlmsg_free(msg);
return;
@@ -708,7 +759,8 @@ static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
err = devlink_nl_port_fill(msg, devlink, devlink_port,
DEVLINK_CMD_PORT_NEW,
- info->snd_portid, info->snd_seq, 0);
+ info->snd_portid, info->snd_seq, 0,
+ info->extack);
if (err) {
nlmsg_free(msg);
return err;
@@ -740,7 +792,8 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
DEVLINK_CMD_NEW,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
+ NLM_F_MULTI,
+ cb->extack);
if (err) {
mutex_unlock(&devlink->lock);
goto out;
@@ -778,6 +831,67 @@ static int devlink_port_type_set(struct devlink *devlink,
return -EOPNOTSUPP;
}
+static int
+devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port *port,
+ const struct nlattr *attr, struct netlink_ext_ack *extack)
+{
+ const struct devlink_ops *ops;
+ const u8 *hw_addr;
+ int hw_addr_len;
+ int err;
+
+ hw_addr = nla_data(attr);
+ hw_addr_len = nla_len(attr);
+ if (hw_addr_len > MAX_ADDR_LEN) {
+ NL_SET_ERR_MSG_MOD(extack, "Port function hardware address too long");
+ return -EINVAL;
+ }
+ if (port->type == DEVLINK_PORT_TYPE_ETH) {
+ if (hw_addr_len != ETH_ALEN) {
+ NL_SET_ERR_MSG_MOD(extack, "Address must be 6 bytes for Ethernet device");
+ return -EINVAL;
+ }
+ if (!is_unicast_ether_addr(hw_addr)) {
+ NL_SET_ERR_MSG_MOD(extack, "Non-unicast hardware address unsupported");
+ return -EINVAL;
+ }
+ }
+
+ ops = devlink->ops;
+ if (!ops->port_function_hw_addr_set) {
+ NL_SET_ERR_MSG_MOD(extack, "Port doesn't support function attributes");
+ return -EOPNOTSUPP;
+ }
+
+ err = ops->port_function_hw_addr_set(devlink, port, hw_addr, hw_addr_len, extack);
+ if (err)
+ return err;
+
+ devlink_port_notify(port, DEVLINK_CMD_PORT_NEW);
+ return 0;
+}
+
+static int
+devlink_port_function_set(struct devlink *devlink, struct devlink_port *port,
+ const struct nlattr *attr, struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(tb, DEVLINK_PORT_FUNCTION_ATTR_MAX, attr,
+ devlink_function_nl_policy, extack);
+ if (err < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Fail to parse port function attributes");
+ return err;
+ }
+
+ attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR];
+ if (attr)
+ err = devlink_port_function_hw_addr_set(devlink, port, attr, extack);
+
+ return err;
+}
+
static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
@@ -793,6 +907,16 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
if (err)
return err;
}
+
+ if (info->attrs[DEVLINK_ATTR_PORT_FUNCTION]) {
+ struct nlattr *attr = info->attrs[DEVLINK_ATTR_PORT_FUNCTION];
+ struct netlink_ext_ack *extack = info->extack;
+
+ err = devlink_port_function_set(devlink, devlink_port, attr, extack);
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -4378,6 +4502,14 @@ int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn)
}
EXPORT_SYMBOL_GPL(devlink_info_serial_number_put);
+int devlink_info_board_serial_number_put(struct devlink_info_req *req,
+ const char *bsn)
+{
+ return nla_put_string(req->msg, DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER,
+ bsn);
+}
+EXPORT_SYMBOL_GPL(devlink_info_board_serial_number_put);
+
static int devlink_info_version_put(struct devlink_info_req *req, int attr,
const char *version_name,
const char *version_value)
@@ -6709,6 +6841,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32 },
[DEVLINK_ATTR_TRAP_POLICER_RATE] = { .type = NLA_U64 },
[DEVLINK_ATTR_TRAP_POLICER_BURST] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_PORT_FUNCTION] = { .type = NLA_NESTED },
};
static const struct genl_ops devlink_nl_ops[] = {
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 2ee7bc4c9e03..b09bebeadf0b 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -1721,3 +1721,4 @@ module_exit(exit_net_drop_monitor);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Neil Horman <nhorman@tuxdriver.com>");
MODULE_ALIAS_GENL_FAMILY("NET_DM");
+MODULE_DESCRIPTION("Monitoring code for network dropped packet alerts");
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 0cfc35e6be28..b739cfab796e 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -372,14 +372,15 @@ int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv)
}
EXPORT_SYMBOL(flow_indr_dev_register);
-static void __flow_block_indr_cleanup(flow_setup_cb_t *setup_cb, void *cb_priv,
+static void __flow_block_indr_cleanup(void (*release)(void *cb_priv),
+ void *cb_priv,
struct list_head *cleanup_list)
{
struct flow_block_cb *this, *next;
list_for_each_entry_safe(this, next, &flow_block_indr_list, indr.list) {
- if (this->cb == setup_cb &&
- this->cb_priv == cb_priv) {
+ if (this->release == release &&
+ this->indr.cb_priv == cb_priv) {
list_move(&this->indr.list, cleanup_list);
return;
}
@@ -397,7 +398,7 @@ static void flow_block_indr_notify(struct list_head *cleanup_list)
}
void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv,
- flow_setup_cb_t *setup_cb)
+ void (*release)(void *cb_priv))
{
struct flow_indr_dev *this, *next, *indr_dev = NULL;
LIST_HEAD(cleanup_list);
@@ -418,7 +419,7 @@ void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv,
return;
}
- __flow_block_indr_cleanup(setup_cb, cb_priv, &cleanup_list);
+ __flow_block_indr_cleanup(release, cb_priv, &cleanup_list);
mutex_unlock(&flow_indr_block_lock);
flow_block_indr_notify(&cleanup_list);
@@ -429,32 +430,37 @@ EXPORT_SYMBOL(flow_indr_dev_unregister);
static void flow_block_indr_init(struct flow_block_cb *flow_block,
struct flow_block_offload *bo,
struct net_device *dev, void *data,
+ void *cb_priv,
void (*cleanup)(struct flow_block_cb *block_cb))
{
flow_block->indr.binder_type = bo->binder_type;
flow_block->indr.data = data;
+ flow_block->indr.cb_priv = cb_priv;
flow_block->indr.dev = dev;
flow_block->indr.cleanup = cleanup;
}
-static void __flow_block_indr_binding(struct flow_block_offload *bo,
- struct net_device *dev, void *data,
- void (*cleanup)(struct flow_block_cb *block_cb))
+struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb,
+ void *cb_ident, void *cb_priv,
+ void (*release)(void *cb_priv),
+ struct flow_block_offload *bo,
+ struct net_device *dev, void *data,
+ void *indr_cb_priv,
+ void (*cleanup)(struct flow_block_cb *block_cb))
{
struct flow_block_cb *block_cb;
- list_for_each_entry(block_cb, &bo->cb_list, list) {
- switch (bo->command) {
- case FLOW_BLOCK_BIND:
- flow_block_indr_init(block_cb, bo, dev, data, cleanup);
- list_add(&block_cb->indr.list, &flow_block_indr_list);
- break;
- case FLOW_BLOCK_UNBIND:
- list_del(&block_cb->indr.list);
- break;
- }
- }
+ block_cb = flow_block_cb_alloc(cb, cb_ident, cb_priv, release);
+ if (IS_ERR(block_cb))
+ goto out;
+
+ flow_block_indr_init(block_cb, bo, dev, data, indr_cb_priv, cleanup);
+ list_add(&block_cb->indr.list, &flow_block_indr_list);
+
+out:
+ return block_cb;
}
+EXPORT_SYMBOL(flow_indr_block_cb_alloc);
int flow_indr_dev_setup_offload(struct net_device *dev,
enum tc_setup_type type, void *data,
@@ -465,9 +471,8 @@ int flow_indr_dev_setup_offload(struct net_device *dev,
mutex_lock(&flow_indr_block_lock);
list_for_each_entry(this, &flow_block_indr_dev_list, list)
- this->cb(dev, this->cb_priv, type, bo);
+ this->cb(dev, this->cb_priv, type, bo, data, cleanup);
- __flow_block_indr_binding(bo, dev, data, cleanup);
mutex_unlock(&flow_indr_block_lock);
return list_empty(&bo->cb_list) ? -EOPNOTSUPP : 0;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index ef6b5a8f629c..8e39e28b0a8d 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1783,6 +1783,7 @@ const struct nla_policy nda_policy[NDA_MAX+1] = {
[NDA_MASTER] = { .type = NLA_U32 },
[NDA_PROTOCOL] = { .type = NLA_U8 },
[NDA_NH_ID] = { .type = NLA_U32 },
+ [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED },
};
static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
diff --git a/net/core/sock.c b/net/core/sock.c
index 5ba4753bc04d..f5b5fdd61c88 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -709,7 +709,7 @@ bool sk_mc_loop(struct sock *sk)
return inet6_sk(sk)->mc_loop;
#endif
}
- WARN_ON(1);
+ WARN_ON_ONCE(1);
return true;
}
EXPORT_SYMBOL(sk_mc_loop);
@@ -1758,6 +1758,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
cgroup_sk_alloc(&sk->sk_cgrp_data);
sock_update_classid(&sk->sk_cgrp_data);
sock_update_netprioidx(&sk->sk_cgrp_data);
+ sk_tx_queue_clear(sk);
}
return sk;
@@ -1981,6 +1982,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
*/
sk_refcnt_debug_inc(newsk);
sk_set_socket(newsk, NULL);
+ sk_tx_queue_clear(newsk);
RCU_INIT_POINTER(newsk->sk_wq, NULL);
if (newsk->sk_prot->sockets_allocated)
diff --git a/net/core/tso.c b/net/core/tso.c
index d4d5c077ad72..4148f6d48953 100644
--- a/net/core/tso.c
+++ b/net/core/tso.c
@@ -6,18 +6,17 @@
#include <asm/unaligned.h>
/* Calculate expected number of TX descriptors */
-int tso_count_descs(struct sk_buff *skb)
+int tso_count_descs(const struct sk_buff *skb)
{
/* The Marvell Way */
return skb_shinfo(skb)->gso_segs * 2 + skb_shinfo(skb)->nr_frags;
}
EXPORT_SYMBOL(tso_count_descs);
-void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
+void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso,
int size, bool is_last)
{
- struct tcphdr *tcph;
- int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ int hdr_len = skb_transport_offset(skb) + tso->tlen;
int mac_hdr_len = skb_network_offset(skb);
memcpy(hdr, skb->data, hdr_len);
@@ -30,23 +29,31 @@ void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
} else {
struct ipv6hdr *iph = (void *)(hdr + mac_hdr_len);
- iph->payload_len = htons(size + tcp_hdrlen(skb));
+ iph->payload_len = htons(size + tso->tlen);
}
- tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb));
- put_unaligned_be32(tso->tcp_seq, &tcph->seq);
+ hdr += skb_transport_offset(skb);
+ if (tso->tlen != sizeof(struct udphdr)) {
+ struct tcphdr *tcph = (struct tcphdr *)hdr;
- if (!is_last) {
- /* Clear all special flags for not last packet */
- tcph->psh = 0;
- tcph->fin = 0;
- tcph->rst = 0;
+ put_unaligned_be32(tso->tcp_seq, &tcph->seq);
+
+ if (!is_last) {
+ /* Clear all special flags for not last packet */
+ tcph->psh = 0;
+ tcph->fin = 0;
+ tcph->rst = 0;
+ }
+ } else {
+ struct udphdr *uh = (struct udphdr *)hdr;
+
+ uh->len = htons(sizeof(*uh) + size);
}
}
EXPORT_SYMBOL(tso_build_hdr);
-void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size)
+void tso_build_data(const struct sk_buff *skb, struct tso_t *tso, int size)
{
- tso->tcp_seq += size;
+ tso->tcp_seq += size; /* not worth avoiding this operation for UDP */
tso->size -= size;
tso->data += size;
@@ -62,12 +69,14 @@ void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size)
}
EXPORT_SYMBOL(tso_build_data);
-void tso_start(struct sk_buff *skb, struct tso_t *tso)
+int tso_start(struct sk_buff *skb, struct tso_t *tso)
{
- int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ int tlen = skb_is_gso_tcp(skb) ? tcp_hdrlen(skb) : sizeof(struct udphdr);
+ int hdr_len = skb_transport_offset(skb) + tlen;
+ tso->tlen = tlen;
tso->ip_id = ntohs(ip_hdr(skb)->id);
- tso->tcp_seq = ntohl(tcp_hdr(skb)->seq);
+ tso->tcp_seq = (tlen != sizeof(struct udphdr)) ? ntohl(tcp_hdr(skb)->seq) : 0;
tso->next_frag_idx = 0;
tso->ipv6 = vlan_get_protocol(skb) == htons(ETH_P_IPV6);
@@ -83,5 +92,6 @@ void tso_start(struct sk_buff *skb, struct tso_t *tso)
tso->data = skb_frag_address(frag);
tso->next_frag_idx++;
}
+ return hdr_len;
}
EXPORT_SYMBOL(tso_start);
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 90f44f382115..3c45f99e26d5 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -462,6 +462,7 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp)
xdpf->len = totsize - metasize;
xdpf->headroom = 0;
xdpf->metasize = metasize;
+ xdpf->frame_sz = PAGE_SIZE;
xdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
xsk_buff_free(xdp);
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index d2a4553bcf39..84dde5a2066e 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1736,7 +1736,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net_device *netdev;
struct dcbmsg *dcb = nlmsg_data(nlh);
struct nlattr *tb[DCB_ATTR_MAX + 1];
- u32 portid = skb ? NETLINK_CB(skb).portid : 0;
+ u32 portid = NETLINK_CB(skb).portid;
int ret = -EINVAL;
struct sk_buff *reply_skb;
struct nlmsghdr *reply_nlh = NULL;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 06b9983325cc..9eb7e4b62d9b 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -670,7 +670,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
if (decnet_debug_level & 1)
printk(KERN_DEBUG
"dn_route_rcv: got 0x%02x from %s [%d %d %d]\n",
- (int)flags, (dev) ? dev->name : "???", len, skb->len,
+ (int)flags, dev->name, len, skb->len,
padlen);
if (flags & DN_RT_PKT_CNTL) {
diff --git a/net/devres.c b/net/devres.c
index 57a6a88d11f6..1f9be2133787 100644
--- a/net/devres.c
+++ b/net/devres.c
@@ -39,7 +39,7 @@ struct net_device *devm_alloc_etherdev_mqs(struct device *dev, int sizeof_priv,
}
EXPORT_SYMBOL(devm_alloc_etherdev_mqs);
-static void devm_netdev_release(struct device *dev, void *this)
+static void devm_unregister_netdev(struct device *dev, void *this)
{
struct net_device_devres *res = this;
@@ -77,7 +77,7 @@ int devm_register_netdev(struct device *dev, struct net_device *ndev)
netdev_devres_match, ndev)))
return -EINVAL;
- dr = devres_alloc(devm_netdev_release, sizeof(*dr), GFP_KERNEL);
+ dr = devres_alloc(devm_unregister_netdev, sizeof(*dr), GFP_KERNEL);
if (!dr)
return -ENOMEM;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 4c7f086a047b..e147e10b411c 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1795,7 +1795,8 @@ int dsa_slave_create(struct dsa_port *port)
ret = dsa_slave_phy_setup(slave_dev);
if (ret) {
- netdev_err(master, "error %d setting up slave phy\n", ret);
+ netdev_err(master, "error %d setting up slave PHY for %s\n",
+ ret, slave_dev->name);
goto out_gcells;
}
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index e8eaa804ccb9..d6200ff98200 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -13,6 +13,16 @@
#define DSA_HLEN 4
#define EDSA_HLEN 8
+#define FRAME_TYPE_TO_CPU 0x00
+#define FRAME_TYPE_FORWARD 0x03
+
+#define TO_CPU_CODE_MGMT_TRAP 0x00
+#define TO_CPU_CODE_FRAME2REG 0x01
+#define TO_CPU_CODE_IGMP_MLD_TRAP 0x02
+#define TO_CPU_CODE_POLICY_TRAP 0x03
+#define TO_CPU_CODE_ARP_MIRROR 0x04
+#define TO_CPU_CODE_POLICY_MIRROR 0x05
+
static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
@@ -77,6 +87,8 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt)
{
u8 *edsa_header;
+ int frame_type;
+ int code;
int source_device;
int source_port;
@@ -91,8 +103,29 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
/*
* Check that frame type is either TO_CPU or FORWARD.
*/
- if ((edsa_header[0] & 0xc0) != 0x00 && (edsa_header[0] & 0xc0) != 0xc0)
+ frame_type = edsa_header[0] >> 6;
+
+ switch (frame_type) {
+ case FRAME_TYPE_TO_CPU:
+ code = (edsa_header[1] & 0x6) | ((edsa_header[2] >> 4) & 1);
+
+ /*
+ * Mark the frame to never egress on any port of the same switch
+ * unless it's a trapped IGMP/MLD packet, in which case the
+ * bridge might want to forward it.
+ */
+ if (code != TO_CPU_CODE_IGMP_MLD_TRAP)
+ skb->offload_fwd_mark = 1;
+
+ break;
+
+ case FRAME_TYPE_FORWARD:
+ skb->offload_fwd_mark = 1;
+ break;
+
+ default:
return NULL;
+ }
/*
* Determine source device and port.
@@ -156,8 +189,6 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
2 * ETH_ALEN);
}
- skb->offload_fwd_mark = 1;
-
return skb;
}
diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c
index 7b7a0456c15c..7194956aa09e 100644
--- a/net/ethtool/cabletest.c
+++ b/net/ethtool/cabletest.c
@@ -234,6 +234,14 @@ static int ethnl_act_cable_test_tdr_cfg(const struct nlattr *nest,
struct nlattr *tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX + 1];
int ret;
+ cfg->first = 100;
+ cfg->step = 100;
+ cfg->last = MAX_CABLE_LENGTH_CM;
+ cfg->pair = PHY_PAIR_ALL;
+
+ if (!nest)
+ return 0;
+
ret = nla_parse_nested(tb, ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX, nest,
cable_test_tdr_act_cfg_policy, info->extack);
if (ret < 0)
@@ -242,17 +250,12 @@ static int ethnl_act_cable_test_tdr_cfg(const struct nlattr *nest,
if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST])
cfg->first = nla_get_u32(
tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST]);
- else
- cfg->first = 100;
+
if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST])
cfg->last = nla_get_u32(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST]);
- else
- cfg->last = MAX_CABLE_LENGTH_CM;
if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP])
cfg->step = nla_get_u32(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP]);
- else
- cfg->step = 100;
if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR]) {
cfg->pair = nla_get_u8(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR]);
@@ -263,8 +266,6 @@ static int ethnl_act_cable_test_tdr_cfg(const struct nlattr *nest,
"invalid pair parameter");
return -EINVAL;
}
- } else {
- cfg->pair = PHY_PAIR_ALL;
}
if (cfg->first > MAX_CABLE_LENGTH_CM) {
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 423e640e3876..aaecfc916a4d 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -40,9 +40,11 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
[NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
[NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation",
[NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial",
+ [NETIF_F_GSO_TUNNEL_REMCSUM_BIT] = "tx-tunnel-remcsum-segmentation",
[NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation",
[NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation",
[NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation",
+ [NETIF_F_GSO_FRAGLIST_BIT] = "tx-gso-list",
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index b5df90c981c2..83f22196d64c 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -1918,7 +1918,7 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
if (copy_to_user(useraddr, &stats, sizeof(stats)))
goto out;
useraddr += sizeof(stats);
- if (n_stats && copy_to_user(useraddr, data, n_stats * sizeof(u64)))
+ if (n_stats && copy_to_user(useraddr, data, array_size(n_stats, sizeof(u64))))
goto out;
ret = 0;
@@ -1973,7 +1973,7 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
if (copy_to_user(useraddr, &stats, sizeof(stats)))
goto out;
useraddr += sizeof(stats);
- if (n_stats && copy_to_user(useraddr, data, n_stats * sizeof(u64)))
+ if (n_stats && copy_to_user(useraddr, data, array_size(n_stats, sizeof(u64))))
goto out;
ret = 0;
@@ -2978,7 +2978,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
sizeof(match->mask.ipv6.dst));
}
if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr)) ||
- memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) {
+ memcmp(v6_m_spec->ip6dst, &zero_addr, sizeof(zero_addr))) {
match->dissector.used_keys |=
BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS);
match->dissector.offset[FLOW_DISSECTOR_KEY_IPV6_ADDRS] =
diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c
index 7f47ba89054e..4834091ec24c 100644
--- a/net/ethtool/linkstate.c
+++ b/net/ethtool/linkstate.c
@@ -9,10 +9,12 @@ struct linkstate_req_info {
};
struct linkstate_reply_data {
- struct ethnl_reply_data base;
- int link;
- int sqi;
- int sqi_max;
+ struct ethnl_reply_data base;
+ int link;
+ int sqi;
+ int sqi_max;
+ bool link_ext_state_provided;
+ struct ethtool_link_ext_state_info ethtool_link_ext_state_info;
};
#define LINKSTATE_REPDATA(__reply_base) \
@@ -25,6 +27,8 @@ linkstate_get_policy[ETHTOOL_A_LINKSTATE_MAX + 1] = {
[ETHTOOL_A_LINKSTATE_LINK] = { .type = NLA_REJECT },
[ETHTOOL_A_LINKSTATE_SQI] = { .type = NLA_REJECT },
[ETHTOOL_A_LINKSTATE_SQI_MAX] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKSTATE_EXT_STATE] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKSTATE_EXT_SUBSTATE] = { .type = NLA_REJECT },
};
static int linkstate_get_sqi(struct net_device *dev)
@@ -61,6 +65,23 @@ static int linkstate_get_sqi_max(struct net_device *dev)
mutex_unlock(&phydev->lock);
return ret;
+};
+
+static int linkstate_get_link_ext_state(struct net_device *dev,
+ struct linkstate_reply_data *data)
+{
+ int err;
+
+ if (!dev->ethtool_ops->get_link_ext_state)
+ return -EOPNOTSUPP;
+
+ err = dev->ethtool_ops->get_link_ext_state(dev, &data->ethtool_link_ext_state_info);
+ if (err)
+ return err;
+
+ data->link_ext_state_provided = true;
+
+ return 0;
}
static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
@@ -78,19 +99,24 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
ret = linkstate_get_sqi(dev);
if (ret < 0 && ret != -EOPNOTSUPP)
- return ret;
-
+ goto out;
data->sqi = ret;
ret = linkstate_get_sqi_max(dev);
if (ret < 0 && ret != -EOPNOTSUPP)
- return ret;
-
+ goto out;
data->sqi_max = ret;
- ethnl_ops_complete(dev);
+ if (dev->flags & IFF_UP) {
+ ret = linkstate_get_link_ext_state(dev, data);
+ if (ret < 0 && ret != -EOPNOTSUPP && ret != -ENODATA)
+ goto out;
+ }
- return 0;
+ ret = 0;
+out:
+ ethnl_ops_complete(dev);
+ return ret;
}
static int linkstate_reply_size(const struct ethnl_req_info *req_base,
@@ -108,6 +134,12 @@ static int linkstate_reply_size(const struct ethnl_req_info *req_base,
if (data->sqi_max != -EOPNOTSUPP)
len += nla_total_size(sizeof(u32));
+ if (data->link_ext_state_provided)
+ len += nla_total_size(sizeof(u8)); /* LINKSTATE_EXT_STATE */
+
+ if (data->ethtool_link_ext_state_info.__link_ext_substate)
+ len += nla_total_size(sizeof(u8)); /* LINKSTATE_EXT_SUBSTATE */
+
return len;
}
@@ -129,6 +161,17 @@ static int linkstate_fill_reply(struct sk_buff *skb,
nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, data->sqi_max))
return -EMSGSIZE;
+ if (data->link_ext_state_provided) {
+ if (nla_put_u8(skb, ETHTOOL_A_LINKSTATE_EXT_STATE,
+ data->ethtool_link_ext_state_info.link_ext_state))
+ return -EMSGSIZE;
+
+ if (data->ethtool_link_ext_state_info.__link_ext_substate &&
+ nla_put_u8(skb, ETHTOOL_A_LINKSTATE_EXT_SUBSTATE,
+ data->ethtool_link_ext_state_info.__link_ext_substate))
+ return -EMSGSIZE;
+ }
+
return 0;
}
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index cd99f548e440..1032b83d7047 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -210,7 +210,7 @@ static netdev_features_t hsr_fix_features(struct net_device *dev,
return hsr_features_recompute(hsr, features);
}
-static int hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct hsr_priv *hsr = netdev_priv(dev);
struct hsr_port *master;
@@ -339,7 +339,7 @@ static void hsr_announce(struct timer_list *t)
rcu_read_unlock();
}
-static void hsr_del_ports(struct hsr_priv *hsr)
+void hsr_del_ports(struct hsr_priv *hsr)
{
struct hsr_port *port;
@@ -356,31 +356,12 @@ static void hsr_del_ports(struct hsr_priv *hsr)
hsr_del_port(port);
}
-/* This has to be called after all the readers are gone.
- * Otherwise we would have to check the return value of
- * hsr_port_get_hsr().
- */
-static void hsr_dev_destroy(struct net_device *hsr_dev)
-{
- struct hsr_priv *hsr = netdev_priv(hsr_dev);
-
- hsr_debugfs_term(hsr);
- hsr_del_ports(hsr);
-
- del_timer_sync(&hsr->prune_timer);
- del_timer_sync(&hsr->announce_timer);
-
- hsr_del_self_node(hsr);
- hsr_del_nodes(&hsr->node_db);
-}
-
static const struct net_device_ops hsr_device_ops = {
.ndo_change_mtu = hsr_dev_change_mtu,
.ndo_open = hsr_dev_open,
.ndo_stop = hsr_dev_close,
.ndo_start_xmit = hsr_dev_xmit,
.ndo_fix_features = hsr_fix_features,
- .ndo_uninit = hsr_dev_destroy,
};
static struct device_type hsr_type = {
diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h
index a099d7de7e79..b8f9262ed101 100644
--- a/net/hsr/hsr_device.h
+++ b/net/hsr/hsr_device.h
@@ -11,6 +11,7 @@
#include <linux/netdevice.h>
#include "hsr_main.h"
+void hsr_del_ports(struct hsr_priv *hsr);
void hsr_dev_setup(struct net_device *dev);
int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
unsigned char multicast_spec, u8 protocol_version,
@@ -18,5 +19,4 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
void hsr_check_carrier_and_operstate(struct hsr_priv *hsr);
bool is_hsr_master(struct net_device *dev);
int hsr_get_max_mtu(struct hsr_priv *hsr);
-
#endif /* __HSR_DEVICE_H */
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index e2564de67603..144da15f0a81 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -6,6 +6,7 @@
*/
#include <linux/netdevice.h>
+#include <net/rtnetlink.h>
#include <linux/rculist.h>
#include <linux/timer.h>
#include <linux/etherdevice.h>
@@ -100,8 +101,10 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
master = hsr_port_get_hsr(port->hsr, HSR_PT_MASTER);
hsr_del_port(port);
if (hsr_slave_empty(master->hsr)) {
- unregister_netdevice_queue(master->dev,
- &list_kill);
+ const struct rtnl_link_ops *ops;
+
+ ops = master->dev->rtnl_link_ops;
+ ops->dellink(master->dev, &list_kill);
unregister_netdevice_many(&list_kill);
}
}
@@ -144,9 +147,9 @@ static int __init hsr_init(void)
static void __exit hsr_exit(void)
{
- unregister_netdevice_notifier(&hsr_nb);
hsr_netlink_exit();
hsr_debugfs_remove_root();
+ unregister_netdevice_notifier(&hsr_nb);
}
module_init(hsr_init);
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index 1decb25f6764..6e14b7d22639 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -83,6 +83,22 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
return hsr_dev_finalize(dev, link, multicast_spec, hsr_version, extack);
}
+static void hsr_dellink(struct net_device *dev, struct list_head *head)
+{
+ struct hsr_priv *hsr = netdev_priv(dev);
+
+ del_timer_sync(&hsr->prune_timer);
+ del_timer_sync(&hsr->announce_timer);
+
+ hsr_debugfs_term(hsr);
+ hsr_del_ports(hsr);
+
+ hsr_del_self_node(hsr);
+ hsr_del_nodes(&hsr->node_db);
+
+ unregister_netdevice_queue(dev, head);
+}
+
static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct hsr_priv *hsr = netdev_priv(dev);
@@ -118,6 +134,7 @@ static struct rtnl_link_ops hsr_link_ops __read_mostly = {
.priv_size = sizeof(struct hsr_priv),
.setup = hsr_dev_setup,
.newlink = hsr_newlink,
+ .dellink = hsr_dellink,
.fill_info = hsr_fill_info,
};
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 6ecbb0ced177..e64e59b536d3 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -340,29 +340,31 @@ config NET_FOU_IP_TUNNELS
config INET_AH
tristate "IP: AH transformation"
- select XFRM_ALGO
- select CRYPTO
- select CRYPTO_HMAC
- select CRYPTO_MD5
- select CRYPTO_SHA1
+ select XFRM_AH
help
- Support for IPsec AH.
+ Support for IPsec AH (Authentication Header).
+
+ AH can be used with various authentication algorithms. Besides
+ enabling AH support itself, this option enables the generic
+ implementations of the algorithms that RFC 8221 lists as MUST be
+ implemented. If you need any other algorithms, you'll need to enable
+ them in the crypto API. You should also enable accelerated
+ implementations of any needed algorithms when available.
If unsure, say Y.
config INET_ESP
tristate "IP: ESP transformation"
- select XFRM_ALGO
- select CRYPTO
- select CRYPTO_AUTHENC
- select CRYPTO_HMAC
- select CRYPTO_MD5
- select CRYPTO_CBC
- select CRYPTO_SHA1
- select CRYPTO_DES
- select CRYPTO_ECHAINIV
+ select XFRM_ESP
help
- Support for IPsec ESP.
+ Support for IPsec ESP (Encapsulating Security Payload).
+
+ ESP can be used with various encryption and authentication algorithms.
+ Besides enabling ESP support itself, this option enables the generic
+ implementations of the algorithms that RFC 8221 lists as MUST be
+ implemented. If you need any other algorithms, you'll need to enable
+ them in the crypto API. You should also enable accelerated
+ implementations of any needed algorithms when available.
If unsure, say Y.
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 02aa5cb3a4fd..ea6ed6d487ed 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1432,10 +1432,6 @@ static struct sk_buff *ipip_gso_segment(struct sk_buff *skb,
return inet_gso_segment(skb, features);
}
-INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *,
- struct sk_buff *));
-INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *,
- struct sk_buff *));
struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
{
const struct net_offload *ops;
@@ -1608,8 +1604,6 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
return -EINVAL;
}
-INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *, int));
-INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int));
int inet_gro_complete(struct sk_buff *skb, int nhoff)
{
__be16 newlen = htons(skb->len - nhoff);
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index d14133eac476..5bda5aeda579 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -361,3 +361,4 @@ module_exit(esp4_offload_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>");
MODULE_ALIAS_XFRM_OFFLOAD_TYPE(AF_INET, XFRM_PROTO_ESP);
+MODULE_DESCRIPTION("IPV4 GSO/GRO offload support");
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index e53871e4a097..1f75dc686b6b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1109,7 +1109,7 @@ static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table,
if (fl4.flowi4_scope < RT_SCOPE_LINK)
fl4.flowi4_scope = RT_SCOPE_LINK;
- if (table)
+ if (table && table != RT_TABLE_MAIN)
tbl = fib_get_table(net, table);
if (tbl)
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index dcc79ff54b41..abd083415f89 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -1304,3 +1304,4 @@ module_init(fou_init);
module_exit(fou_fini);
MODULE_AUTHOR("Tom Herbert <therbert@google.com>");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Foo over UDP");
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 090d3097ee15..d946356187ed 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -539,6 +539,12 @@ no_route:
}
EXPORT_SYMBOL(__ip_queue_xmit);
+int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
+{
+ return __ip_queue_xmit(sk, skb, fl, inet_sk(sk)->tos);
+}
+EXPORT_SYMBOL(ip_queue_xmit);
+
static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
{
to->pkt_type = from->pkt_type;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index f4f1d11eab50..0c1f36404471 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -85,9 +85,10 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
__be32 remote, __be32 local,
__be32 key)
{
- unsigned int hash;
struct ip_tunnel *t, *cand = NULL;
struct hlist_head *head;
+ struct net_device *ndev;
+ unsigned int hash;
hash = ip_tunnel_hash(key, remote);
head = &itn->tunnels[hash];
@@ -162,8 +163,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
if (t && t->dev->flags & IFF_UP)
return t;
- if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
- return netdev_priv(itn->fb_tunnel_dev);
+ ndev = READ_ONCE(itn->fb_tunnel_dev);
+ if (ndev && ndev->flags & IFF_UP)
+ return netdev_priv(ndev);
return NULL;
}
@@ -1259,9 +1261,9 @@ void ip_tunnel_uninit(struct net_device *dev)
struct ip_tunnel_net *itn;
itn = net_generic(net, tunnel->ip_tnl_net_id);
- /* fb_tunnel_dev will be unregisted in net-exit call. */
- if (itn->fb_tunnel_dev != dev)
- ip_tunnel_del(itn, netdev_priv(dev));
+ ip_tunnel_del(itn, netdev_priv(dev));
+ if (itn->fb_tunnel_dev == dev)
+ WRITE_ONCE(itn->fb_tunnel_dev, NULL);
dst_cache_reset(&tunnel->dst_cache);
}
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index c2670eaa74e6..5bf9fa06aee0 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1797,11 +1797,22 @@ out_free:
return ret;
}
+void ipt_unregister_table_pre_exit(struct net *net, struct xt_table *table,
+ const struct nf_hook_ops *ops)
+{
+ nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+}
+
+void ipt_unregister_table_exit(struct net *net, struct xt_table *table)
+{
+ __ipt_unregister_table(net, table);
+}
+
void ipt_unregister_table(struct net *net, struct xt_table *table,
const struct nf_hook_ops *ops)
{
if (ops)
- nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+ ipt_unregister_table_pre_exit(net, table, ops);
__ipt_unregister_table(net, table);
}
@@ -1958,6 +1969,8 @@ static void __exit ip_tables_fini(void)
EXPORT_SYMBOL(ipt_register_table);
EXPORT_SYMBOL(ipt_unregister_table);
+EXPORT_SYMBOL(ipt_unregister_table_pre_exit);
+EXPORT_SYMBOL(ipt_unregister_table_exit);
EXPORT_SYMBOL(ipt_do_table);
module_init(ip_tables_init);
module_exit(ip_tables_fini);
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 748dc3ce58d3..f2984c7eef40 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -118,3 +118,4 @@ module_exit(synproxy_tg4_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Intercept TCP connections and establish them using syncookies");
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 9d54b4017e50..8f7bc1ee7453 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -72,16 +72,24 @@ static int __net_init iptable_filter_net_init(struct net *net)
return 0;
}
+static void __net_exit iptable_filter_net_pre_exit(struct net *net)
+{
+ if (net->ipv4.iptable_filter)
+ ipt_unregister_table_pre_exit(net, net->ipv4.iptable_filter,
+ filter_ops);
+}
+
static void __net_exit iptable_filter_net_exit(struct net *net)
{
if (!net->ipv4.iptable_filter)
return;
- ipt_unregister_table(net, net->ipv4.iptable_filter, filter_ops);
+ ipt_unregister_table_exit(net, net->ipv4.iptable_filter);
net->ipv4.iptable_filter = NULL;
}
static struct pernet_operations iptable_filter_net_ops = {
.init = iptable_filter_net_init,
+ .pre_exit = iptable_filter_net_pre_exit,
.exit = iptable_filter_net_exit,
};
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index bb9266ea3785..f703a717ab1d 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -100,15 +100,23 @@ static int __net_init iptable_mangle_table_init(struct net *net)
return ret;
}
+static void __net_exit iptable_mangle_net_pre_exit(struct net *net)
+{
+ if (net->ipv4.iptable_mangle)
+ ipt_unregister_table_pre_exit(net, net->ipv4.iptable_mangle,
+ mangle_ops);
+}
+
static void __net_exit iptable_mangle_net_exit(struct net *net)
{
if (!net->ipv4.iptable_mangle)
return;
- ipt_unregister_table(net, net->ipv4.iptable_mangle, mangle_ops);
+ ipt_unregister_table_exit(net, net->ipv4.iptable_mangle);
net->ipv4.iptable_mangle = NULL;
}
static struct pernet_operations iptable_mangle_net_ops = {
+ .pre_exit = iptable_mangle_net_pre_exit,
.exit = iptable_mangle_net_exit,
};
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index ad33687b7444..b0143b109f25 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -113,16 +113,22 @@ static int __net_init iptable_nat_table_init(struct net *net)
return ret;
}
+static void __net_exit iptable_nat_net_pre_exit(struct net *net)
+{
+ if (net->ipv4.nat_table)
+ ipt_nat_unregister_lookups(net);
+}
+
static void __net_exit iptable_nat_net_exit(struct net *net)
{
if (!net->ipv4.nat_table)
return;
- ipt_nat_unregister_lookups(net);
- ipt_unregister_table(net, net->ipv4.nat_table, NULL);
+ ipt_unregister_table_exit(net, net->ipv4.nat_table);
net->ipv4.nat_table = NULL;
}
static struct pernet_operations iptable_nat_net_ops = {
+ .pre_exit = iptable_nat_net_pre_exit,
.exit = iptable_nat_net_exit,
};
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 69697eb4bfc6..9abfe6bf2cb9 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -67,15 +67,23 @@ static int __net_init iptable_raw_table_init(struct net *net)
return ret;
}
+static void __net_exit iptable_raw_net_pre_exit(struct net *net)
+{
+ if (net->ipv4.iptable_raw)
+ ipt_unregister_table_pre_exit(net, net->ipv4.iptable_raw,
+ rawtable_ops);
+}
+
static void __net_exit iptable_raw_net_exit(struct net *net)
{
if (!net->ipv4.iptable_raw)
return;
- ipt_unregister_table(net, net->ipv4.iptable_raw, rawtable_ops);
+ ipt_unregister_table_exit(net, net->ipv4.iptable_raw);
net->ipv4.iptable_raw = NULL;
}
static struct pernet_operations iptable_raw_net_ops = {
+ .pre_exit = iptable_raw_net_pre_exit,
.exit = iptable_raw_net_exit,
};
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index ac633c1db97e..415c1975d770 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -62,16 +62,23 @@ static int __net_init iptable_security_table_init(struct net *net)
return ret;
}
+static void __net_exit iptable_security_net_pre_exit(struct net *net)
+{
+ if (net->ipv4.iptable_security)
+ ipt_unregister_table_pre_exit(net, net->ipv4.iptable_security,
+ sectbl_ops);
+}
+
static void __net_exit iptable_security_net_exit(struct net *net)
{
if (!net->ipv4.iptable_security)
return;
-
- ipt_unregister_table(net, net->ipv4.iptable_security, sectbl_ops);
+ ipt_unregister_table_exit(net, net->ipv4.iptable_security);
net->ipv4.iptable_security = NULL;
}
static struct pernet_operations iptable_security_net_ops = {
+ .pre_exit = iptable_security_net_pre_exit,
.exit = iptable_security_net_exit,
};
diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c
index e32e41b99f0f..aba65fe90345 100644
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
@@ -34,3 +34,4 @@ module_exit(nf_flow_ipv4_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NF_FLOWTABLE(AF_INET);
+MODULE_DESCRIPTION("Netfilter flow table support");
diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c
index abf89b972094..bcdb37f86a94 100644
--- a/net/ipv4/netfilter/nft_dup_ipv4.c
+++ b/net/ipv4/netfilter/nft_dup_ipv4.c
@@ -107,3 +107,4 @@ module_exit(nft_dup_ipv4_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "dup");
+MODULE_DESCRIPTION("IPv4 nftables packet duplication support");
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index ce294113dbcd..03df986217b7 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -210,3 +210,4 @@ module_exit(nft_fib4_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
MODULE_ALIAS_NFT_AF_EXPR(2, "fib");
+MODULE_DESCRIPTION("nftables fib / ip route lookup support");
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
index 7e6fd5cde50f..e408f813f5d8 100644
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -71,3 +71,4 @@ module_exit(nft_reject_ipv4_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "reject");
+MODULE_DESCRIPTION("IPv4 packet rejection for nftables");
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 8f8eefd3a3ce..c7bf5b26bf0c 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -432,10 +432,9 @@ static void hystart_update(struct sock *sk, u32 delay)
if (hystart_detect & HYSTART_DELAY) {
/* obtain the minimum delay of more than sampling packets */
+ if (ca->curr_rtt > delay)
+ ca->curr_rtt = delay;
if (ca->sample_cnt < HYSTART_MIN_SAMPLES) {
- if (ca->curr_rtt > delay)
- ca->curr_rtt = delay;
-
ca->sample_cnt++;
} else {
if (ca->curr_rtt > ca->delay_min +
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 12fda8f27b08..12c26c9565b7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -261,7 +261,8 @@ static void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb)
* cwnd may be very low (even just 1 packet), so we should ACK
* immediately.
*/
- inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
+ if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq)
+ inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
}
}
@@ -961,6 +962,15 @@ void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb)
}
}
+/* Updates the delivered and delivered_ce counts */
+static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
+ bool ece_ack)
+{
+ tp->delivered += delivered;
+ if (ece_ack)
+ tp->delivered_ce += delivered;
+}
+
/* This procedure tags the retransmission queue when SACKs arrive.
*
* We have three tag bits: SACKED(S), RETRANS(R) and LOST(L).
@@ -1137,6 +1147,7 @@ struct tcp_sacktag_state {
struct rate_sample *rate;
int flag;
unsigned int mss_now;
+ u32 sack_delivered;
};
/* Check if skb is fully within the SACK block. In presence of GSO skbs,
@@ -1257,7 +1268,8 @@ static u8 tcp_sacktag_one(struct sock *sk,
sacked |= TCPCB_SACKED_ACKED;
state->flag |= FLAG_DATA_SACKED;
tp->sacked_out += pcount;
- tp->delivered += pcount; /* Out-of-order packets delivered */
+ /* Out-of-order packets delivered */
+ state->sack_delivered += pcount;
/* Lost marker hint past SACKed? Tweak RFC3517 cnt */
if (tp->lost_skb_hint &&
@@ -1683,7 +1695,8 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
num_sacks, prior_snd_una);
if (found_dup_sack) {
state->flag |= FLAG_DSACKING_ACK;
- tp->delivered++; /* A spurious retransmission is delivered */
+ /* A spurious retransmission is delivered */
+ state->sack_delivered++;
}
/* Eliminate too old ACKs, but take into
@@ -1892,7 +1905,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
/* Emulate SACKs for SACKless connection: account for a new dupack. */
-static void tcp_add_reno_sack(struct sock *sk, int num_dupack)
+static void tcp_add_reno_sack(struct sock *sk, int num_dupack, bool ece_ack)
{
if (num_dupack) {
struct tcp_sock *tp = tcp_sk(sk);
@@ -1903,20 +1916,21 @@ static void tcp_add_reno_sack(struct sock *sk, int num_dupack)
tcp_check_reno_reordering(sk, 0);
delivered = tp->sacked_out - prior_sacked;
if (delivered > 0)
- tp->delivered += delivered;
+ tcp_count_delivered(tp, delivered, ece_ack);
tcp_verify_left_out(tp);
}
}
/* Account for ACK, ACKing some data in Reno Recovery phase. */
-static void tcp_remove_reno_sacks(struct sock *sk, int acked)
+static void tcp_remove_reno_sacks(struct sock *sk, int acked, bool ece_ack)
{
struct tcp_sock *tp = tcp_sk(sk);
if (acked > 0) {
/* One ACK acked hole. The rest eat duplicate ACKs. */
- tp->delivered += max_t(int, acked - tp->sacked_out, 1);
+ tcp_count_delivered(tp, max_t(int, acked - tp->sacked_out, 1),
+ ece_ack);
if (acked - 1 >= tp->sacked_out)
tp->sacked_out = 0;
else
@@ -2696,7 +2710,7 @@ static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
* delivered. Lower inflight to clock out (re)tranmissions.
*/
if (after(tp->snd_nxt, tp->high_seq) && num_dupack)
- tcp_add_reno_sack(sk, num_dupack);
+ tcp_add_reno_sack(sk, num_dupack, flag & FLAG_ECE);
else if (flag & FLAG_SND_UNA_ADVANCED)
tcp_reset_reno_sack(tp);
}
@@ -2778,6 +2792,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int fast_rexmit = 0, flag = *ack_flag;
+ bool ece_ack = flag & FLAG_ECE;
bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) &&
tcp_force_fast_retransmit(sk));
@@ -2786,7 +2801,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
/* Now state machine starts.
* A. ECE, hence prohibit cwnd undoing, the reduction is required. */
- if (flag & FLAG_ECE)
+ if (ece_ack)
tp->prior_ssthresh = 0;
/* B. In all the states check for reneging SACKs. */
@@ -2827,7 +2842,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
case TCP_CA_Recovery:
if (!(flag & FLAG_SND_UNA_ADVANCED)) {
if (tcp_is_reno(tp))
- tcp_add_reno_sack(sk, num_dupack);
+ tcp_add_reno_sack(sk, num_dupack, ece_ack);
} else {
if (tcp_try_undo_partial(sk, prior_snd_una))
return;
@@ -2852,7 +2867,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
if (tcp_is_reno(tp)) {
if (flag & FLAG_SND_UNA_ADVANCED)
tcp_reset_reno_sack(tp);
- tcp_add_reno_sack(sk, num_dupack);
+ tcp_add_reno_sack(sk, num_dupack, ece_ack);
}
if (icsk->icsk_ca_state <= TCP_CA_Disorder)
@@ -2876,7 +2891,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
}
/* Otherwise enter Recovery state */
- tcp_enter_recovery(sk, (flag & FLAG_ECE));
+ tcp_enter_recovery(sk, ece_ack);
fast_rexmit = 1;
}
@@ -3052,7 +3067,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
*/
static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
u32 prior_snd_una,
- struct tcp_sacktag_state *sack)
+ struct tcp_sacktag_state *sack, bool ece_ack)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
u64 first_ackt, last_ackt;
@@ -3077,8 +3092,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
u8 sacked = scb->sacked;
u32 acked_pcount;
- tcp_ack_tstamp(sk, skb, prior_snd_una);
-
/* Determine how many packets and what bytes were acked, tso and else */
if (after(scb->end_seq, tp->snd_una)) {
if (tcp_skb_pcount(skb) == 1 ||
@@ -3113,7 +3126,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
if (sacked & TCPCB_SACKED_ACKED) {
tp->sacked_out -= acked_pcount;
} else if (tcp_is_sack(tp)) {
- tp->delivered += acked_pcount;
+ tcp_count_delivered(tp, acked_pcount, ece_ack);
if (!tcp_skb_spurious_retrans(tp, skb))
tcp_rack_advance(tp, sacked, scb->end_seq,
tcp_skb_timestamp_us(skb));
@@ -3142,6 +3155,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
if (!fully_acked)
break;
+ tcp_ack_tstamp(sk, skb, prior_snd_una);
+
next = skb_rb_next(skb);
if (unlikely(skb == tp->retransmit_skb_hint))
tp->retransmit_skb_hint = NULL;
@@ -3157,8 +3172,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
tp->snd_up = tp->snd_una;
- if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
- flag |= FLAG_SACK_RENEGING;
+ if (skb) {
+ tcp_ack_tstamp(sk, skb, prior_snd_una);
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
+ flag |= FLAG_SACK_RENEGING;
+ }
if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
@@ -3190,7 +3208,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
}
if (tcp_is_reno(tp)) {
- tcp_remove_reno_sacks(sk, pkts_acked);
+ tcp_remove_reno_sacks(sk, pkts_acked, ece_ack);
/* If any of the cumulatively ACKed segments was
* retransmitted, non-SACK case cannot confirm that
@@ -3557,10 +3575,9 @@ static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag)
delivered = tp->delivered - prior_delivered;
NET_ADD_STATS(net, LINUX_MIB_TCPDELIVERED, delivered);
- if (flag & FLAG_ECE) {
- tp->delivered_ce += delivered;
+ if (flag & FLAG_ECE)
NET_ADD_STATS(net, LINUX_MIB_TCPDELIVEREDCE, delivered);
- }
+
return delivered;
}
@@ -3584,6 +3601,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
sack_state.first_sackt = 0;
sack_state.rate = &rs;
+ sack_state.sack_delivered = 0;
/* We very likely will need to access rtx queue. */
prefetch(sk->tcp_rtx_queue.rb_node);
@@ -3659,12 +3677,25 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
ack_ev_flags |= CA_ACK_ECE;
}
+ if (sack_state.sack_delivered)
+ tcp_count_delivered(tp, sack_state.sack_delivered,
+ flag & FLAG_ECE);
+
if (flag & FLAG_WIN_UPDATE)
ack_ev_flags |= CA_ACK_WIN_UPDATE;
tcp_in_ack_event(sk, ack_ev_flags);
}
+ /* This is a deviation from RFC3168 since it states that:
+ * "When the TCP data sender is ready to set the CWR bit after reducing
+ * the congestion window, it SHOULD set the CWR bit only on the first
+ * new data packet that it transmits."
+ * We accept CWR on pure ACKs to be more robust
+ * with widely-deployed TCP implementations that do this.
+ */
+ tcp_ecn_accept_cwr(sk, skb);
+
/* We passed data and got it acked, remove any soft error
* log. Something worked...
*/
@@ -3675,7 +3706,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
goto no_queue;
/* See if we can take anything off of the retransmit queue. */
- flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state);
+ flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state,
+ flag & FLAG_ECE);
tcp_rack_update_reo_wnd(sk, &rs);
@@ -4800,8 +4832,6 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
skb_dst_drop(skb);
__skb_pull(skb, tcp_hdr(skb)->doff * 4);
- tcp_ecn_accept_cwr(sk, skb);
-
tp->rx_opt.dsack = 0;
/* Queue data for delivery to the user.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a50e1990a845..04b70fe31fa2 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1064,6 +1064,10 @@ static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb,
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
}
+INDIRECT_CALLABLE_DECLARE(int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl));
+INDIRECT_CALLABLE_DECLARE(int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl));
+INDIRECT_CALLABLE_DECLARE(void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb));
+
/* This routine actually transmits TCP packets queued in by
* tcp_do_sendmsg(). This is used by both the initial
* transmission and possible later retransmissions.
@@ -1207,7 +1211,9 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
}
#endif
- icsk->icsk_af_ops->send_check(sk, skb);
+ INDIRECT_CALL_INET(icsk->icsk_af_ops->send_check,
+ tcp_v6_send_check, tcp_v4_send_check,
+ sk, skb);
if (likely(tcb->tcp_flags & TCPHDR_ACK))
tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
@@ -1235,7 +1241,9 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
tcp_add_tx_delay(skb, tp);
- err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
+ err = INDIRECT_CALL_INET(icsk->icsk_af_ops->queue_xmit,
+ inet6_csk_xmit, ip_queue_xmit,
+ sk, skb, &inet->cork.fl);
if (unlikely(err > 0)) {
tcp_enter_cwr(sk);
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 992cf45fb4f6..f4f19e89af5e 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -49,29 +49,31 @@ config IPV6_OPTIMISTIC_DAD
config INET6_AH
tristate "IPv6: AH transformation"
- select XFRM_ALGO
- select CRYPTO
- select CRYPTO_HMAC
- select CRYPTO_MD5
- select CRYPTO_SHA1
+ select XFRM_AH
help
- Support for IPsec AH.
+ Support for IPsec AH (Authentication Header).
+
+ AH can be used with various authentication algorithms. Besides
+ enabling AH support itself, this option enables the generic
+ implementations of the algorithms that RFC 8221 lists as MUST be
+ implemented. If you need any other algorithms, you'll need to enable
+ them in the crypto API. You should also enable accelerated
+ implementations of any needed algorithms when available.
If unsure, say Y.
config INET6_ESP
tristate "IPv6: ESP transformation"
- select XFRM_ALGO
- select CRYPTO
- select CRYPTO_AUTHENC
- select CRYPTO_HMAC
- select CRYPTO_MD5
- select CRYPTO_CBC
- select CRYPTO_SHA1
- select CRYPTO_DES
- select CRYPTO_ECHAINIV
+ select XFRM_ESP
help
- Support for IPsec ESP.
+ Support for IPsec ESP (Encapsulating Security Payload).
+
+ ESP can be used with various encryption and authentication algorithms.
+ Besides enabling ESP support itself, this option enables the generic
+ implementations of the algorithms that RFC 8221 lists as MUST be
+ implemented. If you need any other algorithms, you'll need to enable
+ them in the crypto API. You should also enable accelerated
+ implementations of any needed algorithms when available.
If unsure, say Y.
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 55addea1948f..1ca516fb30e1 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -395,3 +395,4 @@ module_exit(esp6_offload_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>");
MODULE_ALIAS_XFRM_OFFLOAD_TYPE(AF_INET6, XFRM_PROTO_ESP);
+MODULE_DESCRIPTION("IPV6 GSO/GRO offload support");
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 5a8bbcdcaf2b..e9b366994475 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -580,7 +580,7 @@ looped_back:
hdr->segments_left--;
i = n - hdr->segments_left;
- buf = kzalloc(ipv6_rpl_srh_alloc_size(n + 1) * 2, GFP_ATOMIC);
+ buf = kcalloc(struct_size(hdr, segments.addr, n + 2), 2, GFP_ATOMIC);
if (unlikely(!buf)) {
kfree_skb(skb);
return -1;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index fafe556d21e0..6053ef851555 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -111,11 +111,13 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
} else {
struct rt6_info *rt;
- rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, skb, flags);
+ rt = pol_lookup_func(lookup,
+ net, net->ipv6.fib6_local_tbl, fl6, skb, flags);
if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN)
return &rt->dst;
ip6_rt_put_flags(rt, flags);
- rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
+ rt = pol_lookup_func(lookup,
+ net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
if (rt->dst.error != -EAGAIN)
return &rt->dst;
ip6_rt_put_flags(rt, flags);
@@ -226,7 +228,8 @@ static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
goto out;
}
- rt = lookup(net, table, flp6, arg->lookup_data, flags);
+ rt = pol_lookup_func(lookup,
+ net, table, flp6, arg->lookup_data, flags);
if (rt != net->ipv6.ip6_null_entry) {
err = fib6_rule_saddr(net, rule, flags, flp6,
ip6_dst_idev(&rt->dst)->dev);
diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c
index 091f94184dc1..430518ae26fa 100644
--- a/net/ipv6/fou6.c
+++ b/net/ipv6/fou6.c
@@ -224,3 +224,4 @@ module_init(fou6_init);
module_exit(fou6_fini);
MODULE_AUTHOR("Tom Herbert <therbert@google.com>");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Foo over UDP (IPv6)");
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index fc5000370030..91e0f2fd2523 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -439,8 +439,8 @@ static int icmp6_iif(const struct sk_buff *skb)
/*
* Send an ICMP message in response to a packet in error
*/
-static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
- const struct in6_addr *force_saddr)
+void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
+ const struct in6_addr *force_saddr)
{
struct inet6_dev *idev = NULL;
struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -625,6 +625,7 @@ out:
out_bh_enable:
local_bh_enable();
}
+EXPORT_SYMBOL(icmp6_send);
/* Slightly more convenient version of icmp6_send.
*/
diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c
index 257d2b681246..36c58aa257e8 100644
--- a/net/ipv6/ila/ila_main.c
+++ b/net/ipv6/ila/ila_main.c
@@ -120,3 +120,4 @@ module_init(ila_init);
module_exit(ila_fini);
MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv6: Identifier Locator Addressing (ILA)");
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 49ee89bbcba0..25a90f3f705c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -314,7 +314,8 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
{
struct rt6_info *rt;
- rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
+ rt = pol_lookup_func(lookup,
+ net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
if (rt->dst.error == -EAGAIN) {
ip6_rt_put_flags(rt, flags);
rt = net->ipv6.ip6_null_entry;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 781ca8c07a0d..6532bde82b40 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -127,6 +127,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
gre_proto == htons(ETH_P_ERSPAN2)) ?
ARPHRD_ETHER : ARPHRD_IP6GRE;
int score, cand_score = 4;
+ struct net_device *ndev;
for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
if (!ipv6_addr_equal(local, &t->parms.laddr) ||
@@ -238,9 +239,9 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
if (t && t->dev->flags & IFF_UP)
return t;
- dev = ign->fb_tunnel_dev;
- if (dev && dev->flags & IFF_UP)
- return netdev_priv(dev);
+ ndev = READ_ONCE(ign->fb_tunnel_dev);
+ if (ndev && ndev->flags & IFF_UP)
+ return netdev_priv(ndev);
return NULL;
}
@@ -413,6 +414,8 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
ip6gre_tunnel_unlink_md(ign, t);
ip6gre_tunnel_unlink(ign, t);
+ if (ign->fb_tunnel_dev == dev)
+ WRITE_ONCE(ign->fb_tunnel_dev, NULL);
dst_cache_reset(&t->dst_cache);
dev_put(dev);
}
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
index e0086758b6ee..70c8c2f36c98 100644
--- a/net/ipv6/ip6_icmp.c
+++ b/net/ipv6/ip6_icmp.c
@@ -9,6 +9,8 @@
#if IS_ENABLED(CONFIG_IPV6)
+#if !IS_BUILTIN(CONFIG_IPV6)
+
static ip6_icmp_send_t __rcu *ip6_icmp_send;
int inet6_register_icmp_sender(ip6_icmp_send_t *fn)
@@ -37,14 +39,12 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
rcu_read_lock();
send = rcu_dereference(ip6_icmp_send);
-
- if (!send)
- goto out;
- send(skb, type, code, info, NULL);
-out:
+ if (send)
+ send(skb, type, code, info, NULL);
rcu_read_unlock();
}
EXPORT_SYMBOL(icmpv6_send);
+#endif
#if IS_ENABLED(CONFIG_NF_NAT)
#include <net/netfilter/nf_conntrack.h>
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 7fbb44736a34..a80f90bf3ae7 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -13,6 +13,8 @@
#include <net/protocol.h>
#include <net/ipv6.h>
#include <net/inet_common.h>
+#include <net/tcp.h>
+#include <net/udp.h>
#include "ip6_offload.h"
@@ -177,10 +179,6 @@ static int ipv6_exthdrs_len(struct ipv6hdr *iph,
return len;
}
-INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *,
- struct sk_buff *));
-INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *,
- struct sk_buff *));
INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
struct sk_buff *skb)
{
@@ -319,8 +317,6 @@ static struct sk_buff *ip4ip6_gro_receive(struct list_head *head,
return inet_gro_receive(head, skb);
}
-INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *, int));
-INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int));
INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
{
const struct net_offload *ops;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index e27393498ecb..e96a431549bc 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1807,11 +1807,22 @@ out_free:
return ret;
}
+void ip6t_unregister_table_pre_exit(struct net *net, struct xt_table *table,
+ const struct nf_hook_ops *ops)
+{
+ nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+}
+
+void ip6t_unregister_table_exit(struct net *net, struct xt_table *table)
+{
+ __ip6t_unregister_table(net, table);
+}
+
void ip6t_unregister_table(struct net *net, struct xt_table *table,
const struct nf_hook_ops *ops)
{
if (ops)
- nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+ ip6t_unregister_table_pre_exit(net, table, ops);
__ip6t_unregister_table(net, table);
}
@@ -1969,6 +1980,8 @@ static void __exit ip6_tables_fini(void)
EXPORT_SYMBOL(ip6t_register_table);
EXPORT_SYMBOL(ip6t_unregister_table);
+EXPORT_SYMBOL(ip6t_unregister_table_pre_exit);
+EXPORT_SYMBOL(ip6t_unregister_table_exit);
EXPORT_SYMBOL(ip6t_do_table);
module_init(ip6_tables_init);
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index fd1f52a21bf1..d51d0c3e5fe9 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -121,3 +121,4 @@ module_exit(synproxy_tg6_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Intercept IPv6 TCP connections and establish them using syncookies");
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 32667f5d5a33..88337b51ffbf 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -73,16 +73,24 @@ static int __net_init ip6table_filter_net_init(struct net *net)
return 0;
}
+static void __net_exit ip6table_filter_net_pre_exit(struct net *net)
+{
+ if (net->ipv6.ip6table_filter)
+ ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_filter,
+ filter_ops);
+}
+
static void __net_exit ip6table_filter_net_exit(struct net *net)
{
if (!net->ipv6.ip6table_filter)
return;
- ip6t_unregister_table(net, net->ipv6.ip6table_filter, filter_ops);
+ ip6t_unregister_table_exit(net, net->ipv6.ip6table_filter);
net->ipv6.ip6table_filter = NULL;
}
static struct pernet_operations ip6table_filter_net_ops = {
.init = ip6table_filter_net_init,
+ .pre_exit = ip6table_filter_net_pre_exit,
.exit = ip6table_filter_net_exit,
};
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 070afb97fa2b..1a2748611e00 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -93,16 +93,24 @@ static int __net_init ip6table_mangle_table_init(struct net *net)
return ret;
}
+static void __net_exit ip6table_mangle_net_pre_exit(struct net *net)
+{
+ if (net->ipv6.ip6table_mangle)
+ ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_mangle,
+ mangle_ops);
+}
+
static void __net_exit ip6table_mangle_net_exit(struct net *net)
{
if (!net->ipv6.ip6table_mangle)
return;
- ip6t_unregister_table(net, net->ipv6.ip6table_mangle, mangle_ops);
+ ip6t_unregister_table_exit(net, net->ipv6.ip6table_mangle);
net->ipv6.ip6table_mangle = NULL;
}
static struct pernet_operations ip6table_mangle_net_ops = {
+ .pre_exit = ip6table_mangle_net_pre_exit,
.exit = ip6table_mangle_net_exit,
};
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 0f4875952efc..0a23265e3caa 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -114,16 +114,22 @@ static int __net_init ip6table_nat_table_init(struct net *net)
return ret;
}
+static void __net_exit ip6table_nat_net_pre_exit(struct net *net)
+{
+ if (net->ipv6.ip6table_nat)
+ ip6t_nat_unregister_lookups(net);
+}
+
static void __net_exit ip6table_nat_net_exit(struct net *net)
{
if (!net->ipv6.ip6table_nat)
return;
- ip6t_nat_unregister_lookups(net);
- ip6t_unregister_table(net, net->ipv6.ip6table_nat, NULL);
+ ip6t_unregister_table_exit(net, net->ipv6.ip6table_nat);
net->ipv6.ip6table_nat = NULL;
}
static struct pernet_operations ip6table_nat_net_ops = {
+ .pre_exit = ip6table_nat_net_pre_exit,
.exit = ip6table_nat_net_exit,
};
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index a22100b1cf2c..8f9e742226f7 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -66,15 +66,23 @@ static int __net_init ip6table_raw_table_init(struct net *net)
return ret;
}
+static void __net_exit ip6table_raw_net_pre_exit(struct net *net)
+{
+ if (net->ipv6.ip6table_raw)
+ ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_raw,
+ rawtable_ops);
+}
+
static void __net_exit ip6table_raw_net_exit(struct net *net)
{
if (!net->ipv6.ip6table_raw)
return;
- ip6t_unregister_table(net, net->ipv6.ip6table_raw, rawtable_ops);
+ ip6t_unregister_table_exit(net, net->ipv6.ip6table_raw);
net->ipv6.ip6table_raw = NULL;
}
static struct pernet_operations ip6table_raw_net_ops = {
+ .pre_exit = ip6table_raw_net_pre_exit,
.exit = ip6table_raw_net_exit,
};
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index a74335fe2bd9..5e8c48fed032 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -61,15 +61,23 @@ static int __net_init ip6table_security_table_init(struct net *net)
return ret;
}
+static void __net_exit ip6table_security_net_pre_exit(struct net *net)
+{
+ if (net->ipv6.ip6table_security)
+ ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_security,
+ sectbl_ops);
+}
+
static void __net_exit ip6table_security_net_exit(struct net *net)
{
if (!net->ipv6.ip6table_security)
return;
- ip6t_unregister_table(net, net->ipv6.ip6table_security, sectbl_ops);
+ ip6t_unregister_table_exit(net, net->ipv6.ip6table_security);
net->ipv6.ip6table_security = NULL;
}
static struct pernet_operations ip6table_security_net_ops = {
+ .pre_exit = ip6table_security_net_pre_exit,
.exit = ip6table_security_net_exit,
};
diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
index a8566ee12e83..667b8af2546a 100644
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -35,3 +35,4 @@ module_exit(nf_flow_ipv6_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NF_FLOWTABLE(AF_INET6);
+MODULE_DESCRIPTION("Netfilter flow table IPv6 module");
diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c
index 2af32200507d..8b5193efb1f1 100644
--- a/net/ipv6/netfilter/nft_dup_ipv6.c
+++ b/net/ipv6/netfilter/nft_dup_ipv6.c
@@ -105,3 +105,4 @@ module_exit(nft_dup_ipv6_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "dup");
+MODULE_DESCRIPTION("IPv6 nftables packet duplication support");
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 7ece86afd079..e204163c7036 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -255,3 +255,4 @@ module_exit(nft_fib6_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
MODULE_ALIAS_NFT_AF_EXPR(10, "fib");
+MODULE_DESCRIPTION("nftables fib / ipv6 route lookup support");
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
index 680a28ce29fd..c1098a1968e1 100644
--- a/net/ipv6/netfilter/nft_reject_ipv6.c
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -72,3 +72,4 @@ module_exit(nft_reject_ipv6_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "reject");
+MODULE_DESCRIPTION("IPv6 packet rejection for nftables");
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 98ac32b49d8c..6caa062f68e7 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -114,6 +114,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
ipcm6_init_sk(&ipc6, np);
+ ipc6.sockc.mark = sk->sk_mark;
fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, false);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 82cbb46a2a4f..5852039ca9cf 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1207,7 +1207,7 @@ fallback:
return nrt;
}
-static struct rt6_info *ip6_pol_route_lookup(struct net *net,
+INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net,
struct fib6_table *table,
struct flowi6 *fl6,
const struct sk_buff *skb,
@@ -2274,7 +2274,7 @@ out:
}
EXPORT_SYMBOL_GPL(ip6_pol_route);
-static struct rt6_info *ip6_pol_route_input(struct net *net,
+INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_input(struct net *net,
struct fib6_table *table,
struct flowi6 *fl6,
const struct sk_buff *skb,
@@ -2465,7 +2465,7 @@ void ip6_route_input(struct sk_buff *skb)
&fl6, skb, flags));
}
-static struct rt6_info *ip6_pol_route_output(struct net *net,
+INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_output(struct net *net,
struct fib6_table *table,
struct flowi6 *fl6,
const struct sk_buff *skb,
@@ -2912,7 +2912,7 @@ struct ip6rd_flowi {
struct in6_addr gateway;
};
-static struct rt6_info *__ip6_route_redirect(struct net *net,
+INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net,
struct fib6_table *table,
struct flowi6 *fl6,
const struct sk_buff *skb,
diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index c3ececd7cfc1..5fdf3ebb953f 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -136,8 +136,7 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
oldhdr = ipv6_hdr(skb);
- buf = kzalloc(ipv6_rpl_srh_alloc_size(srh->segments_left - 1) * 2,
- GFP_ATOMIC);
+ buf = kcalloc(struct_size(srh, segments.addr, srh->segments_left), 2, GFP_ATOMIC);
if (!buf)
return -ENOMEM;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f67d45ff00b4..4502db706f75 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1811,6 +1811,13 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = {
.twsk_destructor = tcp_twsk_destructor,
};
+INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
+}
+
const struct inet_connection_sock_af_ops ipv6_specific = {
.queue_xmit = inet6_csk_xmit,
.send_check = tcp_v6_send_check,
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index fd5ac2788e45..3099efa19249 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -73,7 +73,7 @@ static void l2tp_eth_dev_uninit(struct net_device *dev)
*/
}
-static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct l2tp_eth *priv = netdev_priv(dev);
struct l2tp_session *session = priv->session;
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index f35899d45a9a..e71ca5aec684 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -9,6 +9,99 @@
#include <net/fib_rules.h>
#include <net/l3mdev.h>
+static DEFINE_SPINLOCK(l3mdev_lock);
+
+struct l3mdev_handler {
+ lookup_by_table_id_t dev_lookup;
+};
+
+static struct l3mdev_handler l3mdev_handlers[L3MDEV_TYPE_MAX + 1];
+
+static int l3mdev_check_type(enum l3mdev_type l3type)
+{
+ if (l3type <= L3MDEV_TYPE_UNSPEC || l3type > L3MDEV_TYPE_MAX)
+ return -EINVAL;
+
+ return 0;
+}
+
+int l3mdev_table_lookup_register(enum l3mdev_type l3type,
+ lookup_by_table_id_t fn)
+{
+ struct l3mdev_handler *hdlr;
+ int res;
+
+ res = l3mdev_check_type(l3type);
+ if (res)
+ return res;
+
+ hdlr = &l3mdev_handlers[l3type];
+
+ spin_lock(&l3mdev_lock);
+
+ if (hdlr->dev_lookup) {
+ res = -EBUSY;
+ goto unlock;
+ }
+
+ hdlr->dev_lookup = fn;
+ res = 0;
+
+unlock:
+ spin_unlock(&l3mdev_lock);
+
+ return res;
+}
+EXPORT_SYMBOL_GPL(l3mdev_table_lookup_register);
+
+void l3mdev_table_lookup_unregister(enum l3mdev_type l3type,
+ lookup_by_table_id_t fn)
+{
+ struct l3mdev_handler *hdlr;
+
+ if (l3mdev_check_type(l3type))
+ return;
+
+ hdlr = &l3mdev_handlers[l3type];
+
+ spin_lock(&l3mdev_lock);
+
+ if (hdlr->dev_lookup == fn)
+ hdlr->dev_lookup = NULL;
+
+ spin_unlock(&l3mdev_lock);
+}
+EXPORT_SYMBOL_GPL(l3mdev_table_lookup_unregister);
+
+int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type,
+ struct net *net, u32 table_id)
+{
+ lookup_by_table_id_t lookup;
+ struct l3mdev_handler *hdlr;
+ int ifindex = -EINVAL;
+ int res;
+
+ res = l3mdev_check_type(l3type);
+ if (res)
+ return res;
+
+ hdlr = &l3mdev_handlers[l3type];
+
+ spin_lock(&l3mdev_lock);
+
+ lookup = hdlr->dev_lookup;
+ if (!lookup)
+ goto unlock;
+
+ ifindex = lookup(net, table_id);
+
+unlock:
+ spin_unlock(&l3mdev_lock);
+
+ return ifindex;
+}
+EXPORT_SYMBOL_GPL(l3mdev_ifindex_lookup_by_table_id);
+
/**
* l3mdev_master_ifindex - get index of L3 master device
* @dev: targeted interface
diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig
index a9ed3bf1d93f..af84fce70bb0 100644
--- a/net/mptcp/Kconfig
+++ b/net/mptcp/Kconfig
@@ -18,12 +18,20 @@ config MPTCP_IPV6
select IPV6
default y
-config MPTCP_HMAC_TEST
- bool "Tests for MPTCP HMAC implementation"
+endif
+
+config MPTCP_KUNIT_TESTS
+ tristate "This builds the MPTCP KUnit tests" if !KUNIT_ALL_TESTS
+ select MPTCP
+ depends on KUNIT
+ default KUNIT_ALL_TESTS
help
- This option enable boot time self-test for the HMAC implementation
- used by the MPTCP code
+ Currently covers the MPTCP crypto and token helpers.
+ Only useful for kernel devs running KUnit test harness and are not
+ for inclusion into a production build.
- Say N if you are unsure.
+ For more information on KUnit and unit tests in general please refer
+ to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+ If unsure, say N.
-endif
diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
index baa0640527c7..c53f9b845523 100644
--- a/net/mptcp/Makefile
+++ b/net/mptcp/Makefile
@@ -3,3 +3,7 @@ obj-$(CONFIG_MPTCP) += mptcp.o
mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \
mib.o pm_netlink.o
+
+mptcp_crypto_test-objs := crypto_test.o
+mptcp_token_test-objs := token_test.o
+obj-$(CONFIG_MPTCP_KUNIT_TESTS) += mptcp_crypto_test.o mptcp_token_test.o
diff --git a/net/mptcp/crypto.c b/net/mptcp/crypto.c
index 3d980713a9e2..6c4ea979dfd4 100644
--- a/net/mptcp/crypto.c
+++ b/net/mptcp/crypto.c
@@ -87,65 +87,6 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac)
sha256_final(&state, (u8 *)hmac);
}
-#ifdef CONFIG_MPTCP_HMAC_TEST
-struct test_cast {
- char *key;
- char *msg;
- char *result;
-};
-
-/* we can't reuse RFC 4231 test vectors, as we have constraint on the
- * input and key size.
- */
-static struct test_cast tests[] = {
- {
- .key = "0b0b0b0b0b0b0b0b",
- .msg = "48692054",
- .result = "8385e24fb4235ac37556b6b886db106284a1da671699f46db1f235ec622dcafa",
- },
- {
- .key = "aaaaaaaaaaaaaaaa",
- .msg = "dddddddd",
- .result = "2c5e219164ff1dca1c4a92318d847bb6b9d44492984e1eb71aff9022f71046e9",
- },
- {
- .key = "0102030405060708",
- .msg = "cdcdcdcd",
- .result = "e73b9ba9969969cefb04aa0d6df18ec2fcc075b6f23b4d8c4da736a5dbbc6e7d",
- },
-};
-
-static int __init test_mptcp_crypto(void)
-{
- char hmac[32], hmac_hex[65];
- u32 nonce1, nonce2;
- u64 key1, key2;
- u8 msg[8];
- int i, j;
-
- for (i = 0; i < ARRAY_SIZE(tests); ++i) {
- /* mptcp hmap will convert to be before computing the hmac */
- key1 = be64_to_cpu(*((__be64 *)&tests[i].key[0]));
- key2 = be64_to_cpu(*((__be64 *)&tests[i].key[8]));
- nonce1 = be32_to_cpu(*((__be32 *)&tests[i].msg[0]));
- nonce2 = be32_to_cpu(*((__be32 *)&tests[i].msg[4]));
-
- put_unaligned_be32(nonce1, &msg[0]);
- put_unaligned_be32(nonce2, &msg[4]);
-
- mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
- for (j = 0; j < 32; ++j)
- sprintf(&hmac_hex[j << 1], "%02x", hmac[j] & 0xff);
- hmac_hex[64] = 0;
-
- if (memcmp(hmac_hex, tests[i].result, 64))
- pr_err("test %d failed, got %s expected %s", i,
- hmac_hex, tests[i].result);
- else
- pr_info("test %d [ ok ]", i);
- }
- return 0;
-}
-
-late_initcall(test_mptcp_crypto);
+#if IS_MODULE(CONFIG_MPTCP_KUNIT_TESTS)
+EXPORT_SYMBOL_GPL(mptcp_crypto_hmac_sha);
#endif
diff --git a/net/mptcp/crypto_test.c b/net/mptcp/crypto_test.c
new file mode 100644
index 000000000000..017248dea038
--- /dev/null
+++ b/net/mptcp/crypto_test.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <kunit/test.h>
+
+#include "protocol.h"
+
+struct test_case {
+ char *key;
+ char *msg;
+ char *result;
+};
+
+/* we can't reuse RFC 4231 test vectors, as we have constraint on the
+ * input and key size.
+ */
+static struct test_case tests[] = {
+ {
+ .key = "0b0b0b0b0b0b0b0b",
+ .msg = "48692054",
+ .result = "8385e24fb4235ac37556b6b886db106284a1da671699f46db1f235ec622dcafa",
+ },
+ {
+ .key = "aaaaaaaaaaaaaaaa",
+ .msg = "dddddddd",
+ .result = "2c5e219164ff1dca1c4a92318d847bb6b9d44492984e1eb71aff9022f71046e9",
+ },
+ {
+ .key = "0102030405060708",
+ .msg = "cdcdcdcd",
+ .result = "e73b9ba9969969cefb04aa0d6df18ec2fcc075b6f23b4d8c4da736a5dbbc6e7d",
+ },
+};
+
+static void mptcp_crypto_test_basic(struct kunit *test)
+{
+ char hmac[32], hmac_hex[65];
+ u32 nonce1, nonce2;
+ u64 key1, key2;
+ u8 msg[8];
+ int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+ /* mptcp hmap will convert to be before computing the hmac */
+ key1 = be64_to_cpu(*((__be64 *)&tests[i].key[0]));
+ key2 = be64_to_cpu(*((__be64 *)&tests[i].key[8]));
+ nonce1 = be32_to_cpu(*((__be32 *)&tests[i].msg[0]));
+ nonce2 = be32_to_cpu(*((__be32 *)&tests[i].msg[4]));
+
+ put_unaligned_be32(nonce1, &msg[0]);
+ put_unaligned_be32(nonce2, &msg[4]);
+
+ mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
+ for (j = 0; j < 32; ++j)
+ sprintf(&hmac_hex[j << 1], "%02x", hmac[j] & 0xff);
+ hmac_hex[64] = 0;
+
+ KUNIT_EXPECT_STREQ(test, &hmac_hex[0], tests[i].result);
+ }
+}
+
+static struct kunit_case mptcp_crypto_test_cases[] = {
+ KUNIT_CASE(mptcp_crypto_test_basic),
+ {}
+};
+
+static struct kunit_suite mptcp_crypto_suite = {
+ .name = "mptcp-crypto",
+ .test_cases = mptcp_crypto_test_cases,
+};
+
+kunit_test_suite(mptcp_crypto_suite);
+
+MODULE_LICENSE("GPL");
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 490b92534afc..b96d3660562f 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -336,9 +336,7 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
*/
subflow->snd_isn = TCP_SKB_CB(skb)->end_seq;
if (subflow->request_mptcp) {
- pr_debug("local_key=%llu", subflow->local_key);
opts->suboptions = OPTION_MPTCP_MPC_SYN;
- opts->sndr_key = subflow->local_key;
*size = TCPOLEN_MPTCP_MPC_SYN;
return true;
} else if (subflow->request_join) {
@@ -626,6 +624,9 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
opts->suboptions = 0;
+ if (unlikely(mptcp_check_fallback(sk)))
+ return false;
+
if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts))
ret = true;
else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining,
@@ -716,7 +717,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
*/
if (!mp_opt->mp_capable) {
subflow->mp_capable = 0;
- tcp_sk(sk)->is_mptcp = 0;
+ pr_fallback(msk);
+ __mptcp_do_fallback(msk);
return false;
}
@@ -816,6 +818,9 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
struct mptcp_options_received mp_opt;
struct mptcp_ext *mpext;
+ if (__mptcp_check_fallback(msk))
+ return;
+
mptcp_get_options(skb, &mp_opt);
if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
return;
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 977d9c8b1453..7de09fdd42a3 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -234,7 +234,7 @@ void mptcp_pm_close(struct mptcp_sock *msk)
sock_put((struct sock *)msk);
}
-void mptcp_pm_init(void)
+void __init mptcp_pm_init(void)
{
pm_wq = alloc_workqueue("pm_wq", WQ_UNBOUND | WQ_MEM_RECLAIM, 8);
if (!pm_wq)
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index b78edf237ba0..c8820c4156e6 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -851,7 +851,7 @@ static struct pernet_operations mptcp_pm_pernet_ops = {
.size = sizeof(struct pm_nl_pernet),
};
-void mptcp_pm_nl_init(void)
+void __init mptcp_pm_nl_init(void)
{
if (register_pernet_subsys(&mptcp_pm_pernet_ops) < 0)
panic("Failed to register MPTCP PM pernet subsystem.\n");
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3980fbb6f31e..fa137a9c42d1 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -52,18 +52,10 @@ static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
return msk->subflow;
}
-static bool __mptcp_needs_tcp_fallback(const struct mptcp_sock *msk)
-{
- return msk->first && !sk_is_mptcp(msk->first);
-}
-
-static struct socket *mptcp_is_tcpsk(struct sock *sk)
+static bool mptcp_is_tcpsk(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
- if (sock->sk != sk)
- return NULL;
-
if (unlikely(sk->sk_prot == &tcp_prot)) {
/* we are being invoked after mptcp_accept() has
* accepted a non-mp-capable flow: sk is a tcp_sk,
@@ -73,59 +65,37 @@ static struct socket *mptcp_is_tcpsk(struct sock *sk)
* bypass mptcp.
*/
sock->ops = &inet_stream_ops;
- return sock;
+ return true;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
} else if (unlikely(sk->sk_prot == &tcpv6_prot)) {
sock->ops = &inet6_stream_ops;
- return sock;
+ return true;
#endif
}
- return NULL;
+ return false;
}
-static struct socket *__mptcp_tcp_fallback(struct mptcp_sock *msk)
+static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
{
- struct socket *sock;
-
sock_owned_by_me((const struct sock *)msk);
- sock = mptcp_is_tcpsk((struct sock *)msk);
- if (unlikely(sock))
- return sock;
-
- if (likely(!__mptcp_needs_tcp_fallback(msk)))
+ if (likely(!__mptcp_check_fallback(msk)))
return NULL;
- return msk->subflow;
-}
-
-static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk)
-{
- return !msk->first;
+ return msk->first;
}
-static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state)
+static int __mptcp_socket_create(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
struct socket *ssock;
int err;
- ssock = __mptcp_tcp_fallback(msk);
- if (unlikely(ssock))
- return ssock;
-
- ssock = __mptcp_nmpc_socket(msk);
- if (ssock)
- goto set_state;
-
- if (!__mptcp_can_create_subflow(msk))
- return ERR_PTR(-EINVAL);
-
err = mptcp_subflow_create_socket(sk, &ssock);
if (err)
- return ERR_PTR(err);
+ return err;
msk->first = ssock->sk;
msk->subflow = ssock;
@@ -133,10 +103,12 @@ static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state)
list_add(&subflow->node, &msk->conn_list);
subflow->request_mptcp = 1;
-set_state:
- if (state != MPTCP_SAME_STATE)
- inet_sk_state_store(sk, state);
- return ssock;
+ /* accept() will wait on first subflow sk_wq, and we always wakes up
+ * via msk->sk_socket
+ */
+ RCU_INIT_POINTER(msk->first->sk_wq, &sk->sk_socket->wq);
+
+ return 0;
}
static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
@@ -207,13 +179,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
return false;
}
- if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
- int rcvbuf = max(ssk->sk_rcvbuf, sk->sk_rcvbuf);
-
- if (rcvbuf > sk->sk_rcvbuf)
- sk->sk_rcvbuf = rcvbuf;
- }
-
tp = tcp_sk(ssk);
do {
u32 map_remaining, offset;
@@ -229,6 +194,15 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
if (!skb)
break;
+ if (__mptcp_check_fallback(msk)) {
+ /* if we are running under the workqueue, TCP could have
+ * collapsed skbs between dummy map creation and now
+ * be sure to adjust the size
+ */
+ map_remaining = skb->len;
+ subflow->map_data_len = skb->len;
+ }
+
offset = seq - TCP_SKB_CB(skb)->seq;
fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
if (fin) {
@@ -466,8 +440,15 @@ static void mptcp_clean_una(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_data_frag *dtmp, *dfrag;
- u64 snd_una = atomic64_read(&msk->snd_una);
bool cleaned = false;
+ u64 snd_una;
+
+ /* on fallback we just need to ignore snd_una, as this is really
+ * plain TCP
+ */
+ if (__mptcp_check_fallback(msk))
+ atomic64_set(&msk->snd_una, msk->write_seq);
+ snd_una = atomic64_read(&msk->snd_una);
list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) {
if (after64(dfrag->data_seq + dfrag->data_len, snd_una))
@@ -740,7 +721,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int mss_now = 0, size_goal = 0, ret = 0;
struct mptcp_sock *msk = mptcp_sk(sk);
struct page_frag *pfrag;
- struct socket *ssock;
size_t copied = 0;
struct sock *ssk;
bool tx_ok;
@@ -759,15 +739,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto out;
}
-fallback:
- ssock = __mptcp_tcp_fallback(msk);
- if (unlikely(ssock)) {
- release_sock(sk);
- pr_debug("fallback passthrough");
- ret = sock_sendmsg(ssock, msg);
- return ret >= 0 ? ret + copied : (copied ? copied : ret);
- }
-
pfrag = sk_page_frag(sk);
restart:
mptcp_clean_una(sk);
@@ -819,17 +790,6 @@ wait_for_sndbuf:
}
break;
}
- if (ret == 0 && unlikely(__mptcp_needs_tcp_fallback(msk))) {
- /* Can happen for passive sockets:
- * 3WHS negotiated MPTCP, but first packet after is
- * plain TCP (e.g. due to middlebox filtering unknown
- * options).
- *
- * Fall back to TCP.
- */
- release_sock(ssk);
- goto fallback;
- }
copied += ret;
@@ -949,6 +909,100 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
return copied;
}
+/* receive buffer autotuning. See tcp_rcv_space_adjust for more information.
+ *
+ * Only difference: Use highest rtt estimate of the subflows in use.
+ */
+static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
+{
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ u32 time, advmss = 1;
+ u64 rtt_us, mstamp;
+
+ sock_owned_by_me(sk);
+
+ if (copied <= 0)
+ return;
+
+ msk->rcvq_space.copied += copied;
+
+ mstamp = div_u64(tcp_clock_ns(), NSEC_PER_USEC);
+ time = tcp_stamp_us_delta(mstamp, msk->rcvq_space.time);
+
+ rtt_us = msk->rcvq_space.rtt_us;
+ if (rtt_us && time < (rtt_us >> 3))
+ return;
+
+ rtt_us = 0;
+ mptcp_for_each_subflow(msk, subflow) {
+ const struct tcp_sock *tp;
+ u64 sf_rtt_us;
+ u32 sf_advmss;
+
+ tp = tcp_sk(mptcp_subflow_tcp_sock(subflow));
+
+ sf_rtt_us = READ_ONCE(tp->rcv_rtt_est.rtt_us);
+ sf_advmss = READ_ONCE(tp->advmss);
+
+ rtt_us = max(sf_rtt_us, rtt_us);
+ advmss = max(sf_advmss, advmss);
+ }
+
+ msk->rcvq_space.rtt_us = rtt_us;
+ if (time < (rtt_us >> 3) || rtt_us == 0)
+ return;
+
+ if (msk->rcvq_space.copied <= msk->rcvq_space.space)
+ goto new_measure;
+
+ if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
+ !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
+ int rcvmem, rcvbuf;
+ u64 rcvwin, grow;
+
+ rcvwin = ((u64)msk->rcvq_space.copied << 1) + 16 * advmss;
+
+ grow = rcvwin * (msk->rcvq_space.copied - msk->rcvq_space.space);
+
+ do_div(grow, msk->rcvq_space.space);
+ rcvwin += (grow << 1);
+
+ rcvmem = SKB_TRUESIZE(advmss + MAX_TCP_HEADER);
+ while (tcp_win_from_space(sk, rcvmem) < advmss)
+ rcvmem += 128;
+
+ do_div(rcvwin, advmss);
+ rcvbuf = min_t(u64, rcvwin * rcvmem,
+ sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+
+ if (rcvbuf > sk->sk_rcvbuf) {
+ u32 window_clamp;
+
+ window_clamp = tcp_win_from_space(sk, rcvbuf);
+ WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
+
+ /* Make subflows follow along. If we do not do this, we
+ * get drops at subflow level if skbs can't be moved to
+ * the mptcp rx queue fast enough (announced rcv_win can
+ * exceed ssk->sk_rcvbuf).
+ */
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk;
+
+ ssk = mptcp_subflow_tcp_sock(subflow);
+ WRITE_ONCE(ssk->sk_rcvbuf, rcvbuf);
+ tcp_sk(ssk)->window_clamp = window_clamp;
+ }
+ }
+ }
+
+ msk->rcvq_space.space = msk->rcvq_space.copied;
+new_measure:
+ msk->rcvq_space.copied = 0;
+ msk->rcvq_space.time = mstamp;
+}
+
static bool __mptcp_move_skbs(struct mptcp_sock *msk)
{
unsigned int moved = 0;
@@ -972,7 +1026,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *ssock;
int copied = 0;
int target;
long timeo;
@@ -981,16 +1034,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return -EOPNOTSUPP;
lock_sock(sk);
- ssock = __mptcp_tcp_fallback(msk);
- if (unlikely(ssock)) {
-fallback:
- release_sock(sk);
- pr_debug("fallback-read subflow=%p",
- mptcp_subflow_ctx(ssock->sk));
- copied = sock_recvmsg(ssock, msg, flags);
- return copied;
- }
-
timeo = sock_rcvtimeo(sk, nonblock);
len = min_t(size_t, len, INT_MAX);
@@ -1056,9 +1099,6 @@ fallback:
pr_debug("block timeout %ld", timeo);
mptcp_wait_data(sk, &timeo);
- ssock = __mptcp_tcp_fallback(msk);
- if (unlikely(ssock))
- goto fallback;
}
if (skb_queue_empty(&sk->sk_receive_queue)) {
@@ -1075,6 +1115,8 @@ fallback:
set_bit(MPTCP_DATA_READY, &msk->flags);
}
out_err:
+ mptcp_rcv_space_adjust(msk, copied);
+
release_sock(sk);
return copied;
}
@@ -1283,7 +1325,12 @@ static int mptcp_init_sock(struct sock *sk)
if (ret)
return ret;
+ ret = __mptcp_socket_create(mptcp_sk(sk));
+ if (ret)
+ return ret;
+
sk_sockets_allocated_inc(sk);
+ sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[2];
return 0;
@@ -1335,8 +1382,6 @@ static void mptcp_subflow_shutdown(struct sock *ssk, int how,
break;
}
- /* Wake up anyone sleeping in poll. */
- ssk->sk_state_change(ssk);
release_sock(ssk);
}
@@ -1448,20 +1493,6 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->token = subflow_req->token;
msk->subflow = NULL;
- if (unlikely(mptcp_token_new_accept(subflow_req->token, nsk))) {
- nsk->sk_state = TCP_CLOSE;
- bh_unlock_sock(nsk);
-
- /* we can't call into mptcp_close() here - possible BH context
- * free the sock directly.
- * sk_clone_lock() sets nsk refcnt to two, hence call sk_free()
- * too.
- */
- sk_common_release(nsk);
- sk_free(nsk);
- return NULL;
- }
-
msk->write_seq = subflow_req->idsn + 1;
atomic64_set(&msk->snd_una, msk->write_seq);
if (mp_opt->mp_capable) {
@@ -1482,6 +1513,22 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
return nsk;
}
+void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
+{
+ const struct tcp_sock *tp = tcp_sk(ssk);
+
+ msk->rcvq_space.copied = 0;
+ msk->rcvq_space.rtt_us = 0;
+
+ msk->rcvq_space.time = tp->tcp_mstamp;
+
+ /* initial rcv_space offering made to peer */
+ msk->rcvq_space.space = min_t(u32, tp->rcv_wnd,
+ TCP_INIT_CWND * tp->advmss);
+ if (msk->rcvq_space.space == 0)
+ msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT;
+}
+
static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
bool kern)
{
@@ -1501,7 +1548,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
return NULL;
pr_debug("msk=%p, subflow is mptcp=%d", msk, sk_is_mptcp(newsk));
-
if (sk_is_mptcp(newsk)) {
struct mptcp_subflow_context *subflow;
struct sock *new_mptcp_sock;
@@ -1531,6 +1577,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
list_add(&subflow->node, &msk->conn_list);
inet_sk_state_store(newsk, TCP_ESTABLISHED);
+ mptcp_rcv_space_init(msk, ssk);
bh_unlock_sock(new_mptcp_sock);
__MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
@@ -1547,7 +1594,7 @@ static void mptcp_destroy(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- mptcp_token_destroy(msk->token);
+ mptcp_token_destroy(msk);
if (msk->cached_ext)
__skb_ext_put(msk->cached_ext);
@@ -1558,7 +1605,7 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *ssock;
+ struct sock *ssk;
pr_debug("msk=%p", msk);
@@ -1569,11 +1616,10 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname,
* to the one remaining subflow.
*/
lock_sock(sk);
- ssock = __mptcp_tcp_fallback(msk);
+ ssk = __mptcp_tcp_fallback(msk);
release_sock(sk);
- if (ssock)
- return tcp_setsockopt(ssock->sk, level, optname, optval,
- optlen);
+ if (ssk)
+ return tcp_setsockopt(ssk, level, optname, optval, optlen);
return -EOPNOTSUPP;
}
@@ -1582,7 +1628,7 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *option)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *ssock;
+ struct sock *ssk;
pr_debug("msk=%p", msk);
@@ -1593,11 +1639,10 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname,
* to the one remaining subflow.
*/
lock_sock(sk);
- ssock = __mptcp_tcp_fallback(msk);
+ ssk = __mptcp_tcp_fallback(msk);
release_sock(sk);
- if (ssock)
- return tcp_getsockopt(ssock->sk, level, optname, optval,
- option);
+ if (ssk)
+ return tcp_getsockopt(ssk, level, optname, optval, option);
return -EOPNOTSUPP;
}
@@ -1636,6 +1681,20 @@ static void mptcp_release_cb(struct sock *sk)
}
}
+static int mptcp_hash(struct sock *sk)
+{
+ /* should never be called,
+ * we hash the TCP subflows not the master socket
+ */
+ WARN_ON_ONCE(1);
+ return 0;
+}
+
+static void mptcp_unhash(struct sock *sk)
+{
+ /* called from sk_common_release(), but nothing to do here */
+}
+
static int mptcp_get_port(struct sock *sk, unsigned short snum)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1660,12 +1719,6 @@ void mptcp_finish_connect(struct sock *ssk)
sk = subflow->conn;
msk = mptcp_sk(sk);
- if (!subflow->mp_capable) {
- MPTCP_INC_STATS(sock_net(sk),
- MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
- return;
- }
-
pr_debug("msk=%p, token=%u", sk, subflow->token);
mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq);
@@ -1679,13 +1732,14 @@ void mptcp_finish_connect(struct sock *ssk)
*/
WRITE_ONCE(msk->remote_key, subflow->remote_key);
WRITE_ONCE(msk->local_key, subflow->local_key);
- WRITE_ONCE(msk->token, subflow->token);
WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
WRITE_ONCE(msk->ack_seq, ack_seq);
WRITE_ONCE(msk->can_ack, 1);
atomic64_set(&msk->snd_una, msk->write_seq);
mptcp_pm_new_connection(msk, 0);
+
+ mptcp_rcv_space_init(msk, ssk);
}
static void mptcp_sock_graft(struct sock *sk, struct socket *parent)
@@ -1761,8 +1815,8 @@ static struct proto mptcp_prot = {
.sendmsg = mptcp_sendmsg,
.recvmsg = mptcp_recvmsg,
.release_cb = mptcp_release_cb,
- .hash = inet_hash,
- .unhash = inet_unhash,
+ .hash = mptcp_hash,
+ .unhash = mptcp_unhash,
.get_port = mptcp_get_port,
.sockets_allocated = &mptcp_sockets_allocated,
.memory_allocated = &tcp_memory_allocated,
@@ -1771,6 +1825,7 @@ static struct proto mptcp_prot = {
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
.sysctl_mem = sysctl_tcp_mem,
.obj_size = sizeof(struct mptcp_sock),
+ .slab_flags = SLAB_TYPESAFE_BY_RCU,
.no_autobind = true,
};
@@ -1781,9 +1836,9 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
int err;
lock_sock(sock->sk);
- ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE);
- if (IS_ERR(ssock)) {
- err = PTR_ERR(ssock);
+ ssock = __mptcp_nmpc_socket(msk);
+ if (!ssock) {
+ err = -EINVAL;
goto unlock;
}
@@ -1800,6 +1855,7 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
+ struct mptcp_subflow_context *subflow;
struct socket *ssock;
int err;
@@ -1812,19 +1868,24 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
goto do_connect;
}
- ssock = __mptcp_socket_create(msk, TCP_SYN_SENT);
- if (IS_ERR(ssock)) {
- err = PTR_ERR(ssock);
+ ssock = __mptcp_nmpc_socket(msk);
+ if (!ssock) {
+ err = -EINVAL;
goto unlock;
}
+ mptcp_token_destroy(msk);
+ inet_sk_state_store(sock->sk, TCP_SYN_SENT);
+ subflow = mptcp_subflow_ctx(ssock->sk);
#ifdef CONFIG_TCP_MD5SIG
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
* TCP option space.
*/
if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
- mptcp_subflow_ctx(ssock->sk)->request_mptcp = 0;
+ subflow->request_mptcp = 0;
#endif
+ if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk))
+ subflow->request_mptcp = 0;
do_connect:
err = ssock->ops->connect(ssock, uaddr, addr_len, flags);
@@ -1843,42 +1904,6 @@ unlock:
return err;
}
-static int mptcp_v4_getname(struct socket *sock, struct sockaddr *uaddr,
- int peer)
-{
- if (sock->sk->sk_prot == &tcp_prot) {
- /* we are being invoked from __sys_accept4, after
- * mptcp_accept() has just accepted a non-mp-capable
- * flow: sk is a tcp_sk, not an mptcp one.
- *
- * Hand the socket over to tcp so all further socket ops
- * bypass mptcp.
- */
- sock->ops = &inet_stream_ops;
- }
-
- return inet_getname(sock, uaddr, peer);
-}
-
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
-static int mptcp_v6_getname(struct socket *sock, struct sockaddr *uaddr,
- int peer)
-{
- if (sock->sk->sk_prot == &tcpv6_prot) {
- /* we are being invoked from __sys_accept4 after
- * mptcp_accept() has accepted a non-mp-capable
- * subflow: sk is a tcp_sk, not mptcp.
- *
- * Hand the socket over to tcp so all further
- * socket ops bypass mptcp.
- */
- sock->ops = &inet6_stream_ops;
- }
-
- return inet6_getname(sock, uaddr, peer);
-}
-#endif
-
static int mptcp_listen(struct socket *sock, int backlog)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
@@ -1888,12 +1913,14 @@ static int mptcp_listen(struct socket *sock, int backlog)
pr_debug("msk=%p", msk);
lock_sock(sock->sk);
- ssock = __mptcp_socket_create(msk, TCP_LISTEN);
- if (IS_ERR(ssock)) {
- err = PTR_ERR(ssock);
+ ssock = __mptcp_nmpc_socket(msk);
+ if (!ssock) {
+ err = -EINVAL;
goto unlock;
}
+ mptcp_token_destroy(msk);
+ inet_sk_state_store(sock->sk, TCP_LISTEN);
sock_set_flag(sock->sk, SOCK_RCU_FREE);
err = ssock->ops->listen(ssock, backlog);
@@ -1906,15 +1933,6 @@ unlock:
return err;
}
-static bool is_tcp_proto(const struct proto *p)
-{
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- return p == &tcp_prot || p == &tcpv6_prot;
-#else
- return p == &tcp_prot;
-#endif
-}
-
static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
int flags, bool kern)
{
@@ -1932,11 +1950,12 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (!ssock)
goto unlock_fail;
+ clear_bit(MPTCP_DATA_READY, &msk->flags);
sock_hold(ssock->sk);
release_sock(sock->sk);
err = ssock->ops->accept(sock, newsock, flags, kern);
- if (err == 0 && !is_tcp_proto(newsock->sk->sk_prot)) {
+ if (err == 0 && !mptcp_is_tcpsk(newsock->sk)) {
struct mptcp_sock *msk = mptcp_sk(newsock->sk);
struct mptcp_subflow_context *subflow;
@@ -1952,6 +1971,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
}
}
+ if (inet_csk_listen_poll(ssock->sk))
+ set_bit(MPTCP_DATA_READY, &msk->flags);
sock_put(ssock->sk);
return err;
@@ -1960,39 +1981,36 @@ unlock_fail:
return -EINVAL;
}
+static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
+{
+ return test_bit(MPTCP_DATA_READY, &msk->flags) ? EPOLLIN | EPOLLRDNORM :
+ 0;
+}
+
static __poll_t mptcp_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait)
{
struct sock *sk = sock->sk;
struct mptcp_sock *msk;
- struct socket *ssock;
__poll_t mask = 0;
+ int state;
msk = mptcp_sk(sk);
- lock_sock(sk);
- ssock = __mptcp_tcp_fallback(msk);
- if (!ssock)
- ssock = __mptcp_nmpc_socket(msk);
- if (ssock) {
- mask = ssock->ops->poll(file, ssock, wait);
- release_sock(sk);
- return mask;
- }
-
- release_sock(sk);
sock_poll_wait(file, sock, wait);
- lock_sock(sk);
- if (test_bit(MPTCP_DATA_READY, &msk->flags))
- mask = EPOLLIN | EPOLLRDNORM;
- if (sk_stream_is_writeable(sk) &&
- test_bit(MPTCP_SEND_SPACE, &msk->flags))
- mask |= EPOLLOUT | EPOLLWRNORM;
+ state = inet_sk_state_load(sk);
+ if (state == TCP_LISTEN)
+ return mptcp_check_readable(msk);
+
+ if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) {
+ mask |= mptcp_check_readable(msk);
+ if (sk_stream_is_writeable(sk) &&
+ test_bit(MPTCP_SEND_SPACE, &msk->flags))
+ mask |= EPOLLOUT | EPOLLWRNORM;
+ }
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
- release_sock(sk);
-
return mask;
}
@@ -2000,18 +2018,11 @@ static int mptcp_shutdown(struct socket *sock, int how)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
struct mptcp_subflow_context *subflow;
- struct socket *ssock;
int ret = 0;
pr_debug("sk=%p, how=%d", msk, how);
lock_sock(sock->sk);
- ssock = __mptcp_tcp_fallback(msk);
- if (ssock) {
- release_sock(sock->sk);
- return inet_shutdown(ssock, how);
- }
-
if (how == SHUT_WR || how == SHUT_RDWR)
inet_sk_state_store(sock->sk, TCP_FIN_WAIT1);
@@ -2037,6 +2048,9 @@ static int mptcp_shutdown(struct socket *sock, int how)
mptcp_subflow_shutdown(tcp_sk, how, 1, msk->write_seq);
}
+ /* Wake up anyone sleeping in poll. */
+ sock->sk->sk_state_change(sock->sk);
+
out_unlock:
release_sock(sock->sk);
@@ -2051,7 +2065,7 @@ static const struct proto_ops mptcp_stream_ops = {
.connect = mptcp_stream_connect,
.socketpair = sock_no_socketpair,
.accept = mptcp_stream_accept,
- .getname = mptcp_v4_getname,
+ .getname = inet_getname,
.poll = mptcp_poll,
.ioctl = inet_ioctl,
.gettstamp = sock_gettstamp,
@@ -2077,7 +2091,7 @@ static struct inet_protosw mptcp_protosw = {
.flags = INET_PROTOSW_ICSK,
};
-void mptcp_proto_init(void)
+void __init mptcp_proto_init(void)
{
mptcp_prot.h.hashinfo = tcp_prot.h.hashinfo;
@@ -2086,6 +2100,7 @@ void mptcp_proto_init(void)
mptcp_subflow_init();
mptcp_pm_init();
+ mptcp_token_init();
if (proto_register(&mptcp_prot, 1) != 0)
panic("Failed to register MPTCP proto.\n");
@@ -2104,7 +2119,7 @@ static const struct proto_ops mptcp_v6_stream_ops = {
.connect = mptcp_stream_connect,
.socketpair = sock_no_socketpair,
.accept = mptcp_stream_accept,
- .getname = mptcp_v6_getname,
+ .getname = inet6_getname,
.poll = mptcp_poll,
.ioctl = inet6_ioctl,
.gettstamp = sock_gettstamp,
@@ -2139,7 +2154,7 @@ static struct inet_protosw mptcp_v6_protosw = {
.flags = INET_PROTOSW_ICSK,
};
-int mptcp_proto_v6_init(void)
+int __init mptcp_proto_v6_init(void)
{
int err;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index db56535dfc29..a6412ff0fddb 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -89,6 +89,7 @@
#define MPTCP_SEND_SPACE 1
#define MPTCP_WORK_RTX 2
#define MPTCP_WORK_EOF 3
+#define MPTCP_FALLBACK_DONE 4
struct mptcp_options_received {
u64 sndr_key;
@@ -208,6 +209,12 @@ struct mptcp_sock {
struct socket *subflow; /* outgoing connect/listener/!mp_capable */
struct sock *first;
struct mptcp_pm_data pm;
+ struct {
+ u32 space; /* bytes copied in last measurement window */
+ u32 copied; /* bytes copied in this measurement window */
+ u64 time; /* start time of measurement window */
+ u64 rtt_us; /* last maximum rtt of subflows */
+ } rcvq_space;
};
#define mptcp_for_each_subflow(__msk, __subflow) \
@@ -249,6 +256,8 @@ struct mptcp_subflow_request_sock {
u64 thmac;
u32 local_nonce;
u32 remote_nonce;
+ struct mptcp_sock *msk;
+ struct hlist_nulls_node token_node;
};
static inline struct mptcp_subflow_request_sock *
@@ -336,7 +345,7 @@ mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
int mptcp_is_enabled(struct net *net);
bool mptcp_subflow_data_available(struct sock *sk);
-void mptcp_subflow_init(void);
+void __init mptcp_subflow_init(void);
/* called with sk socket lock held */
int __mptcp_subflow_connect(struct sock *sk, int ifindex,
@@ -354,14 +363,9 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
}
-extern const struct inet_connection_sock_af_ops ipv4_specific;
+void __init mptcp_proto_init(void);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
-extern const struct inet_connection_sock_af_ops ipv6_specific;
-#endif
-
-void mptcp_proto_init(void);
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
-int mptcp_proto_v6_init(void);
+int __init mptcp_proto_v6_init(void);
#endif
struct sock *mptcp_sk_clone(const struct sock *sk,
@@ -371,17 +375,25 @@ void mptcp_get_options(const struct sk_buff *skb,
struct mptcp_options_received *mp_opt);
void mptcp_finish_connect(struct sock *sk);
+void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk);
void mptcp_data_ready(struct sock *sk, struct sock *ssk);
bool mptcp_finish_join(struct sock *sk);
void mptcp_data_acked(struct sock *sk);
void mptcp_subflow_eof(struct sock *sk);
+void __init mptcp_token_init(void);
+static inline void mptcp_token_init_request(struct request_sock *req)
+{
+ mptcp_subflow_rsk(req)->token_node.pprev = NULL;
+}
+
int mptcp_token_new_request(struct request_sock *req);
-void mptcp_token_destroy_request(u32 token);
+void mptcp_token_destroy_request(struct request_sock *req);
int mptcp_token_new_connect(struct sock *sk);
-int mptcp_token_new_accept(u32 token, struct sock *conn);
+void mptcp_token_accept(struct mptcp_subflow_request_sock *r,
+ struct mptcp_sock *msk);
struct mptcp_sock *mptcp_token_get_sock(u32 token);
-void mptcp_token_destroy(u32 token);
+void mptcp_token_destroy(struct mptcp_sock *msk);
void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn);
static inline void mptcp_crypto_key_gen_sha(u64 *key, u32 *token, u64 *idsn)
@@ -398,7 +410,7 @@ static inline void mptcp_crypto_key_gen_sha(u64 *key, u32 *token, u64 *idsn)
void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
-void mptcp_pm_init(void);
+void __init mptcp_pm_init(void);
void mptcp_pm_data_init(struct mptcp_sock *msk);
void mptcp_pm_close(struct mptcp_sock *msk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side);
@@ -432,7 +444,7 @@ bool mptcp_pm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
struct mptcp_addr_info *saddr);
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
-void mptcp_pm_nl_init(void);
+void __init mptcp_pm_nl_init(void);
void mptcp_pm_nl_data_init(struct mptcp_sock *msk);
void mptcp_pm_nl_fully_established(struct mptcp_sock *msk);
void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk);
@@ -453,4 +465,46 @@ static inline bool before64(__u64 seq1, __u64 seq2)
void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops);
+static inline bool __mptcp_check_fallback(struct mptcp_sock *msk)
+{
+ return test_bit(MPTCP_FALLBACK_DONE, &msk->flags);
+}
+
+static inline bool mptcp_check_fallback(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+ return __mptcp_check_fallback(msk);
+}
+
+static inline void __mptcp_do_fallback(struct mptcp_sock *msk)
+{
+ if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) {
+ pr_debug("TCP fallback already done (msk=%p)", msk);
+ return;
+ }
+ set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
+}
+
+static inline void mptcp_do_fallback(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+ __mptcp_do_fallback(msk);
+}
+
+#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a)
+
+static inline bool subflow_simultaneous_connect(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct sock *parent = subflow->conn;
+
+ return sk->sk_state == TCP_ESTABLISHED &&
+ !mptcp_sk(parent)->pm.server_side &&
+ !subflow->conn_finished;
+}
+
#endif /* __MPTCP_PROTOCOL_H */
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index bbdb74b8bc3c..e1e19c76e267 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -29,48 +29,16 @@ static void SUBFLOW_REQ_INC_STATS(struct request_sock *req,
MPTCP_INC_STATS(sock_net(req_to_sk(req)), field);
}
-static int subflow_rebuild_header(struct sock *sk)
-{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- int local_id, err = 0;
-
- if (subflow->request_mptcp && !subflow->token) {
- pr_debug("subflow=%p", sk);
- err = mptcp_token_new_connect(sk);
- } else if (subflow->request_join && !subflow->local_nonce) {
- struct mptcp_sock *msk = (struct mptcp_sock *)subflow->conn;
-
- pr_debug("subflow=%p", sk);
-
- do {
- get_random_bytes(&subflow->local_nonce, sizeof(u32));
- } while (!subflow->local_nonce);
-
- if (subflow->local_id)
- goto out;
-
- local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)sk);
- if (local_id < 0)
- return -EINVAL;
-
- subflow->local_id = local_id;
- }
-
-out:
- if (err)
- return err;
-
- return subflow->icsk_af_ops->rebuild_header(sk);
-}
-
static void subflow_req_destructor(struct request_sock *req)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
pr_debug("subflow_req=%p", subflow_req);
- if (subflow_req->mp_capable)
- mptcp_token_destroy_request(subflow_req->token);
+ if (subflow_req->msk)
+ sock_put((struct sock *)subflow_req->msk);
+
+ mptcp_token_destroy_request(req);
tcp_request_sock_ops.destructor(req);
}
@@ -86,8 +54,8 @@ static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
}
/* validate received token and create truncated hmac and nonce for SYN-ACK */
-static bool subflow_token_join_request(struct request_sock *req,
- const struct sk_buff *skb)
+static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
+ const struct sk_buff *skb)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
u8 hmac[SHA256_DIGEST_SIZE];
@@ -97,13 +65,13 @@ static bool subflow_token_join_request(struct request_sock *req,
msk = mptcp_token_get_sock(subflow_req->token);
if (!msk) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN);
- return false;
+ return NULL;
}
local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req);
if (local_id < 0) {
sock_put((struct sock *)msk);
- return false;
+ return NULL;
}
subflow_req->local_id = local_id;
@@ -114,9 +82,7 @@ static bool subflow_token_join_request(struct request_sock *req,
subflow_req->remote_nonce, hmac);
subflow_req->thmac = get_unaligned_be64(hmac);
-
- sock_put((struct sock *)msk);
- return true;
+ return msk;
}
static void subflow_init_req(struct request_sock *req,
@@ -133,6 +99,8 @@ static void subflow_init_req(struct request_sock *req,
subflow_req->mp_capable = 0;
subflow_req->mp_join = 0;
+ subflow_req->msk = NULL;
+ mptcp_token_init_request(req);
#ifdef CONFIG_TCP_MD5SIG
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
@@ -166,12 +134,9 @@ static void subflow_init_req(struct request_sock *req,
subflow_req->remote_id = mp_opt.join_id;
subflow_req->token = mp_opt.token;
subflow_req->remote_nonce = mp_opt.nonce;
- pr_debug("token=%u, remote_nonce=%u", subflow_req->token,
- subflow_req->remote_nonce);
- if (!subflow_token_join_request(req, skb)) {
- subflow_req->mp_join = 0;
- // @@ need to trigger RST
- }
+ subflow_req->msk = subflow_token_join_request(req, skb);
+ pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token,
+ subflow_req->remote_nonce, subflow_req->msk);
}
}
@@ -223,7 +188,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_options_received mp_opt;
struct sock *parent = subflow->conn;
- struct tcp_sock *tp = tcp_sk(sk);
subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
@@ -237,6 +201,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
return;
subflow->conn_finished = 1;
+ subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
+ pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
mptcp_get_options(skb, &mp_opt);
if (subflow->request_mptcp && mp_opt.mp_capable) {
@@ -251,22 +217,23 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->remote_nonce = mp_opt.nonce;
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
subflow->thmac, subflow->remote_nonce);
- } else if (subflow->request_mptcp) {
- tp->is_mptcp = 0;
+ } else {
+ if (subflow->request_mptcp)
+ MPTCP_INC_STATS(sock_net(sk),
+ MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
+ mptcp_do_fallback(sk);
+ pr_fallback(mptcp_sk(subflow->conn));
}
- if (!tp->is_mptcp)
+ if (mptcp_check_fallback(sk)) {
+ mptcp_rcv_space_init(mptcp_sk(parent), sk);
return;
+ }
if (subflow->mp_capable) {
pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
subflow->remote_key);
mptcp_finish_connect(sk);
-
- if (skb) {
- pr_debug("synack seq=%u", TCP_SKB_CB(skb)->seq);
- subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
- }
} else if (subflow->mp_join) {
u8 hmac[SHA256_DIGEST_SIZE];
@@ -286,9 +253,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
- if (skb)
- subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
-
if (!mptcp_finish_join(sk))
goto do_reset;
@@ -354,10 +318,9 @@ static bool subflow_hmac_valid(const struct request_sock *req,
const struct mptcp_subflow_request_sock *subflow_req;
u8 hmac[SHA256_DIGEST_SIZE];
struct mptcp_sock *msk;
- bool ret;
subflow_req = mptcp_subflow_rsk(req);
- msk = mptcp_token_get_sock(subflow_req->token);
+ msk = subflow_req->msk;
if (!msk)
return false;
@@ -365,12 +328,7 @@ static bool subflow_hmac_valid(const struct request_sock *req,
subflow_req->remote_nonce,
subflow_req->local_nonce, hmac);
- ret = true;
- if (crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN))
- ret = false;
-
- sock_put((struct sock *)msk);
- return ret;
+ return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN);
}
static void mptcp_sock_destruct(struct sock *sk)
@@ -393,7 +351,7 @@ static void mptcp_sock_destruct(struct sock *sk)
sock_orphan(sk);
}
- mptcp_token_destroy(mptcp_sk(sk)->token);
+ mptcp_token_destroy(mptcp_sk(sk));
inet_sock_destruct(sk);
}
@@ -438,22 +396,25 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
struct mptcp_subflow_request_sock *subflow_req;
struct mptcp_options_received mp_opt;
- bool fallback_is_fatal = false;
+ bool fallback, fallback_is_fatal;
struct sock *new_msk = NULL;
- bool fallback = false;
struct sock *child;
pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
- /* we need later a valid 'mp_capable' value even when options are not
- * parsed
+ /* After child creation we must look for 'mp_capable' even when options
+ * are not parsed
*/
mp_opt.mp_capable = 0;
- if (tcp_rsk(req)->is_mptcp == 0)
+
+ /* hopefully temporary handling for MP_JOIN+syncookie */
+ subflow_req = mptcp_subflow_rsk(req);
+ fallback_is_fatal = subflow_req->mp_join;
+ fallback = !tcp_rsk(req)->is_mptcp;
+ if (fallback)
goto create_child;
/* if the sk is MP_CAPABLE, we try to fetch the client key */
- subflow_req = mptcp_subflow_rsk(req);
if (subflow_req->mp_capable) {
if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) {
/* here we can receive and accept an in-window,
@@ -474,12 +435,11 @@ create_msk:
if (!new_msk)
fallback = true;
} else if (subflow_req->mp_join) {
- fallback_is_fatal = true;
mptcp_get_options(skb, &mp_opt);
if (!mp_opt.mp_join ||
!subflow_hmac_valid(req, &mp_opt)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
- return NULL;
+ fallback = true;
}
}
@@ -510,6 +470,7 @@ create_child:
*/
new_msk->sk_destruct = mptcp_sock_destruct;
mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
+ mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
ctx->conn = new_msk;
new_msk = NULL;
@@ -522,10 +483,12 @@ create_child:
} else if (ctx->mp_join) {
struct mptcp_sock *owner;
- owner = mptcp_token_get_sock(ctx->token);
+ owner = subflow_req->msk;
if (!owner)
goto dispose_child;
+ /* move the msk reference ownership to the subflow */
+ subflow_req->msk = NULL;
ctx->conn = (struct sock *)owner;
if (!mptcp_finish_join(child))
goto dispose_child;
@@ -565,7 +528,8 @@ enum mapping_status {
MAPPING_OK,
MAPPING_INVALID,
MAPPING_EMPTY,
- MAPPING_DATA_FIN
+ MAPPING_DATA_FIN,
+ MAPPING_DUMMY
};
static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
@@ -629,6 +593,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk)
if (!skb)
return MAPPING_EMPTY;
+ if (mptcp_check_fallback(ssk))
+ return MAPPING_DUMMY;
+
mpext = mptcp_get_ext(skb);
if (!mpext || !mpext->use_map) {
if (!subflow->map_valid && !skb->len) {
@@ -770,6 +737,16 @@ static bool subflow_check_data_avail(struct sock *ssk)
ssk->sk_err = EBADMSG;
goto fatal;
}
+ if (status == MAPPING_DUMMY) {
+ __mptcp_do_fallback(msk);
+ skb = skb_peek(&ssk->sk_receive_queue);
+ subflow->map_valid = 1;
+ subflow->map_seq = READ_ONCE(msk->ack_seq);
+ subflow->map_data_len = skb->len;
+ subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
+ subflow->ssn_offset;
+ return true;
+ }
if (status != MAPPING_OK)
return false;
@@ -893,14 +870,18 @@ static void subflow_data_ready(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct sock *parent = subflow->conn;
+ struct mptcp_sock *msk;
- if (!subflow->mp_capable && !subflow->mp_join) {
- subflow->tcp_data_ready(sk);
-
+ msk = mptcp_sk(parent);
+ if (inet_sk_state_load(sk) == TCP_LISTEN) {
+ set_bit(MPTCP_DATA_READY, &msk->flags);
parent->sk_data_ready(parent);
return;
}
+ WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
+ !subflow->mp_join);
+
if (mptcp_subflow_data_available(sk))
mptcp_data_ready(parent, sk);
}
@@ -977,7 +958,9 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex,
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_subflow_context *subflow;
struct sockaddr_storage addr;
+ int local_id = loc->id;
struct socket *sf;
+ struct sock *ssk;
u32 remote_token;
int addrlen;
int err;
@@ -989,7 +972,20 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex,
if (err)
return err;
- subflow = mptcp_subflow_ctx(sf->sk);
+ ssk = sf->sk;
+ subflow = mptcp_subflow_ctx(ssk);
+ do {
+ get_random_bytes(&subflow->local_nonce, sizeof(u32));
+ } while (!subflow->local_nonce);
+
+ if (!local_id) {
+ err = mptcp_pm_get_local_id(msk, (struct sock_common *)ssk);
+ if (err < 0)
+ goto failed;
+
+ local_id = err;
+ }
+
subflow->remote_key = msk->remote_key;
subflow->local_key = msk->local_key;
subflow->token = msk->token;
@@ -1000,15 +996,16 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex,
if (loc->family == AF_INET6)
addrlen = sizeof(struct sockaddr_in6);
#endif
- sf->sk->sk_bound_dev_if = ifindex;
+ ssk->sk_bound_dev_if = ifindex;
err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
if (err)
goto failed;
mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL);
- pr_debug("msk=%p remote_token=%u", msk, remote_token);
+ pr_debug("msk=%p remote_token=%u local_id=%d", msk, remote_token,
+ local_id);
subflow->remote_token = remote_token;
- subflow->local_id = loc->id;
+ subflow->local_id = local_id;
subflow->request_join = 1;
subflow->request_bkup = 1;
mptcp_info2sockaddr(remote, &addr);
@@ -1121,11 +1118,22 @@ static void subflow_state_change(struct sock *sk)
__subflow_state_change(sk);
+ if (subflow_simultaneous_connect(sk)) {
+ mptcp_do_fallback(sk);
+ mptcp_rcv_space_init(mptcp_sk(parent), sk);
+ pr_fallback(mptcp_sk(parent));
+ subflow->conn_finished = 1;
+ if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
+ inet_sk_state_store(parent, TCP_ESTABLISHED);
+ parent->sk_state_change(parent);
+ }
+ }
+
/* as recvmsg() does not acquire the subflow socket for ssk selection
* a fin packet carrying a DSS can be unnoticed if we don't trigger
* the data available machinery here.
*/
- if (subflow->mp_capable && mptcp_subflow_data_available(sk))
+ if (mptcp_subflow_data_available(sk))
mptcp_data_ready(parent, sk);
if (!(parent->sk_shutdown & RCV_SHUTDOWN) &&
@@ -1258,7 +1266,7 @@ static int subflow_ops_init(struct request_sock_ops *subflow_ops)
return 0;
}
-void mptcp_subflow_init(void)
+void __init mptcp_subflow_init(void)
{
subflow_request_sock_ops = tcp_request_sock_ops;
if (subflow_ops_init(&subflow_request_sock_ops) != 0)
@@ -1271,7 +1279,6 @@ void mptcp_subflow_init(void)
subflow_specific.conn_request = subflow_v4_conn_request;
subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
subflow_specific.sk_rx_dst_set = subflow_finish_connect;
- subflow_specific.rebuild_header = subflow_rebuild_header;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
@@ -1281,7 +1288,6 @@ void mptcp_subflow_init(void)
subflow_v6_specific.conn_request = subflow_v6_conn_request;
subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock;
subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect;
- subflow_v6_specific.rebuild_header = subflow_rebuild_header;
subflow_v6m_specific = subflow_v6_specific;
subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit;
diff --git a/net/mptcp/token.c b/net/mptcp/token.c
index 33352dd99d4d..66a4990bd897 100644
--- a/net/mptcp/token.c
+++ b/net/mptcp/token.c
@@ -24,7 +24,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/radix-tree.h>
+#include <linux/memblock.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <net/sock.h>
@@ -33,10 +33,55 @@
#include <net/mptcp.h>
#include "protocol.h"
-static RADIX_TREE(token_tree, GFP_ATOMIC);
-static RADIX_TREE(token_req_tree, GFP_ATOMIC);
-static DEFINE_SPINLOCK(token_tree_lock);
-static int token_used __read_mostly;
+#define TOKEN_MAX_RETRIES 4
+#define TOKEN_MAX_CHAIN_LEN 4
+
+struct token_bucket {
+ spinlock_t lock;
+ int chain_len;
+ struct hlist_nulls_head req_chain;
+ struct hlist_nulls_head msk_chain;
+};
+
+static struct token_bucket *token_hash __read_mostly;
+static unsigned int token_mask __read_mostly;
+
+static struct token_bucket *token_bucket(u32 token)
+{
+ return &token_hash[token & token_mask];
+}
+
+/* called with bucket lock held */
+static struct mptcp_subflow_request_sock *
+__token_lookup_req(struct token_bucket *t, u32 token)
+{
+ struct mptcp_subflow_request_sock *req;
+ struct hlist_nulls_node *pos;
+
+ hlist_nulls_for_each_entry_rcu(req, pos, &t->req_chain, token_node)
+ if (req->token == token)
+ return req;
+ return NULL;
+}
+
+/* called with bucket lock held */
+static struct mptcp_sock *
+__token_lookup_msk(struct token_bucket *t, u32 token)
+{
+ struct hlist_nulls_node *pos;
+ struct sock *sk;
+
+ sk_nulls_for_each_rcu(sk, pos, &t->msk_chain)
+ if (mptcp_sk(sk)->token == token)
+ return mptcp_sk(sk);
+ return NULL;
+}
+
+static bool __token_bucket_busy(struct token_bucket *t, u32 token)
+{
+ return !token || t->chain_len >= TOKEN_MAX_CHAIN_LEN ||
+ __token_lookup_req(t, token) || __token_lookup_msk(t, token);
+}
/**
* mptcp_token_new_request - create new key/idsn/token for subflow_request
@@ -52,30 +97,32 @@ static int token_used __read_mostly;
int mptcp_token_new_request(struct request_sock *req)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
- int err;
-
- while (1) {
- u32 token;
-
- mptcp_crypto_key_gen_sha(&subflow_req->local_key,
- &subflow_req->token,
- &subflow_req->idsn);
- pr_debug("req=%p local_key=%llu, token=%u, idsn=%llu\n",
- req, subflow_req->local_key, subflow_req->token,
- subflow_req->idsn);
-
- token = subflow_req->token;
- spin_lock_bh(&token_tree_lock);
- if (!radix_tree_lookup(&token_req_tree, token) &&
- !radix_tree_lookup(&token_tree, token))
- break;
- spin_unlock_bh(&token_tree_lock);
+ int retries = TOKEN_MAX_RETRIES;
+ struct token_bucket *bucket;
+ u32 token;
+
+again:
+ mptcp_crypto_key_gen_sha(&subflow_req->local_key,
+ &subflow_req->token,
+ &subflow_req->idsn);
+ pr_debug("req=%p local_key=%llu, token=%u, idsn=%llu\n",
+ req, subflow_req->local_key, subflow_req->token,
+ subflow_req->idsn);
+
+ token = subflow_req->token;
+ bucket = token_bucket(token);
+ spin_lock_bh(&bucket->lock);
+ if (__token_bucket_busy(bucket, token)) {
+ spin_unlock_bh(&bucket->lock);
+ if (!--retries)
+ return -EBUSY;
+ goto again;
}
- err = radix_tree_insert(&token_req_tree,
- subflow_req->token, &token_used);
- spin_unlock_bh(&token_tree_lock);
- return err;
+ hlist_nulls_add_head_rcu(&subflow_req->token_node, &bucket->req_chain);
+ bucket->chain_len++;
+ spin_unlock_bh(&bucket->lock);
+ return 0;
}
/**
@@ -97,48 +144,56 @@ int mptcp_token_new_request(struct request_sock *req)
int mptcp_token_new_connect(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- struct sock *mptcp_sock = subflow->conn;
- int err;
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+ int retries = TOKEN_MAX_RETRIES;
+ struct token_bucket *bucket;
- while (1) {
- u32 token;
+ pr_debug("ssk=%p, local_key=%llu, token=%u, idsn=%llu\n",
+ sk, subflow->local_key, subflow->token, subflow->idsn);
- mptcp_crypto_key_gen_sha(&subflow->local_key, &subflow->token,
- &subflow->idsn);
+again:
+ mptcp_crypto_key_gen_sha(&subflow->local_key, &subflow->token,
+ &subflow->idsn);
- pr_debug("ssk=%p, local_key=%llu, token=%u, idsn=%llu\n",
- sk, subflow->local_key, subflow->token, subflow->idsn);
-
- token = subflow->token;
- spin_lock_bh(&token_tree_lock);
- if (!radix_tree_lookup(&token_req_tree, token) &&
- !radix_tree_lookup(&token_tree, token))
- break;
- spin_unlock_bh(&token_tree_lock);
+ bucket = token_bucket(subflow->token);
+ spin_lock_bh(&bucket->lock);
+ if (__token_bucket_busy(bucket, subflow->token)) {
+ spin_unlock_bh(&bucket->lock);
+ if (!--retries)
+ return -EBUSY;
+ goto again;
}
- err = radix_tree_insert(&token_tree, subflow->token, mptcp_sock);
- spin_unlock_bh(&token_tree_lock);
- return err;
+ WRITE_ONCE(msk->token, subflow->token);
+ __sk_nulls_add_node_rcu((struct sock *)msk, &bucket->msk_chain);
+ bucket->chain_len++;
+ spin_unlock_bh(&bucket->lock);
+ return 0;
}
/**
- * mptcp_token_new_accept - insert token for later processing
- * @token: the token to insert to the tree
- * @conn: the just cloned socket linked to the new connection
+ * mptcp_token_accept - replace a req sk with full sock in token hash
+ * @req: the request socket to be removed
+ * @msk: the just cloned socket linked to the new connection
*
* Called when a SYN packet creates a new logical connection, i.e.
* is not a join request.
*/
-int mptcp_token_new_accept(u32 token, struct sock *conn)
+void mptcp_token_accept(struct mptcp_subflow_request_sock *req,
+ struct mptcp_sock *msk)
{
- int err;
+ struct mptcp_subflow_request_sock *pos;
+ struct token_bucket *bucket;
- spin_lock_bh(&token_tree_lock);
- err = radix_tree_insert(&token_tree, token, conn);
- spin_unlock_bh(&token_tree_lock);
+ bucket = token_bucket(req->token);
+ spin_lock_bh(&bucket->lock);
- return err;
+ /* pedantic lookup check for the moved token */
+ pos = __token_lookup_req(bucket, req->token);
+ if (!WARN_ON_ONCE(pos != req))
+ hlist_nulls_del_init_rcu(&req->token_node);
+ __sk_nulls_add_node_rcu((struct sock *)msk, &bucket->msk_chain);
+ spin_unlock_bh(&bucket->lock);
}
/**
@@ -152,45 +207,112 @@ int mptcp_token_new_accept(u32 token, struct sock *conn)
*/
struct mptcp_sock *mptcp_token_get_sock(u32 token)
{
- struct sock *conn;
-
- spin_lock_bh(&token_tree_lock);
- conn = radix_tree_lookup(&token_tree, token);
- if (conn) {
- /* token still reserved? */
- if (conn == (struct sock *)&token_used)
- conn = NULL;
- else
- sock_hold(conn);
+ struct hlist_nulls_node *pos;
+ struct token_bucket *bucket;
+ struct mptcp_sock *msk;
+ struct sock *sk;
+
+ rcu_read_lock();
+ bucket = token_bucket(token);
+
+again:
+ sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) {
+ msk = mptcp_sk(sk);
+ if (READ_ONCE(msk->token) != token)
+ continue;
+ if (!refcount_inc_not_zero(&sk->sk_refcnt))
+ goto not_found;
+ if (READ_ONCE(msk->token) != token) {
+ sock_put(sk);
+ goto again;
+ }
+ goto found;
}
- spin_unlock_bh(&token_tree_lock);
+ if (get_nulls_value(pos) != (token & token_mask))
+ goto again;
- return mptcp_sk(conn);
+not_found:
+ msk = NULL;
+
+found:
+ rcu_read_unlock();
+ return msk;
}
/**
* mptcp_token_destroy_request - remove mptcp connection/token
- * @token: token of mptcp connection to remove
+ * @req: mptcp request socket dropping the token
*
- * Remove not-yet-fully-established incoming connection identified
- * by @token.
+ * Remove the token associated to @req.
*/
-void mptcp_token_destroy_request(u32 token)
+void mptcp_token_destroy_request(struct request_sock *req)
{
- spin_lock_bh(&token_tree_lock);
- radix_tree_delete(&token_req_tree, token);
- spin_unlock_bh(&token_tree_lock);
+ struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
+ struct mptcp_subflow_request_sock *pos;
+ struct token_bucket *bucket;
+
+ if (hlist_nulls_unhashed(&subflow_req->token_node))
+ return;
+
+ bucket = token_bucket(subflow_req->token);
+ spin_lock_bh(&bucket->lock);
+ pos = __token_lookup_req(bucket, subflow_req->token);
+ if (!WARN_ON_ONCE(pos != subflow_req)) {
+ hlist_nulls_del_init_rcu(&pos->token_node);
+ bucket->chain_len--;
+ }
+ spin_unlock_bh(&bucket->lock);
}
/**
* mptcp_token_destroy - remove mptcp connection/token
- * @token: token of mptcp connection to remove
+ * @msk: mptcp connection dropping the token
*
- * Remove the connection identified by @token.
+ * Remove the token associated to @msk
*/
-void mptcp_token_destroy(u32 token)
+void mptcp_token_destroy(struct mptcp_sock *msk)
{
- spin_lock_bh(&token_tree_lock);
- radix_tree_delete(&token_tree, token);
- spin_unlock_bh(&token_tree_lock);
+ struct token_bucket *bucket;
+ struct mptcp_sock *pos;
+
+ if (sk_unhashed((struct sock *)msk))
+ return;
+
+ bucket = token_bucket(msk->token);
+ spin_lock_bh(&bucket->lock);
+ pos = __token_lookup_msk(bucket, msk->token);
+ if (!WARN_ON_ONCE(pos != msk)) {
+ __sk_nulls_del_node_init_rcu((struct sock *)pos);
+ bucket->chain_len--;
+ }
+ spin_unlock_bh(&bucket->lock);
+}
+
+void __init mptcp_token_init(void)
+{
+ int i;
+
+ token_hash = alloc_large_system_hash("MPTCP token",
+ sizeof(struct token_bucket),
+ 0,
+ 20,/* one slot per 1MB of memory */
+ 0,
+ NULL,
+ &token_mask,
+ 0,
+ 64 * 1024);
+ for (i = 0; i < token_mask + 1; ++i) {
+ INIT_HLIST_NULLS_HEAD(&token_hash[i].req_chain, i);
+ INIT_HLIST_NULLS_HEAD(&token_hash[i].msk_chain, i);
+ spin_lock_init(&token_hash[i].lock);
+ }
}
+
+#if IS_MODULE(CONFIG_MPTCP_KUNIT_TESTS)
+EXPORT_SYMBOL_GPL(mptcp_token_new_request);
+EXPORT_SYMBOL_GPL(mptcp_token_new_connect);
+EXPORT_SYMBOL_GPL(mptcp_token_accept);
+EXPORT_SYMBOL_GPL(mptcp_token_get_sock);
+EXPORT_SYMBOL_GPL(mptcp_token_destroy_request);
+EXPORT_SYMBOL_GPL(mptcp_token_destroy);
+#endif
diff --git a/net/mptcp/token_test.c b/net/mptcp/token_test.c
new file mode 100644
index 000000000000..e1bd6f0a0676
--- /dev/null
+++ b/net/mptcp/token_test.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <kunit/test.h>
+
+#include "protocol.h"
+
+static struct mptcp_subflow_request_sock *build_req_sock(struct kunit *test)
+{
+ struct mptcp_subflow_request_sock *req;
+
+ req = kunit_kzalloc(test, sizeof(struct mptcp_subflow_request_sock),
+ GFP_USER);
+ KUNIT_EXPECT_NOT_ERR_OR_NULL(test, req);
+ mptcp_token_init_request((struct request_sock *)req);
+ return req;
+}
+
+static void mptcp_token_test_req_basic(struct kunit *test)
+{
+ struct mptcp_subflow_request_sock *req = build_req_sock(test);
+ struct mptcp_sock *null_msk = NULL;
+
+ KUNIT_ASSERT_EQ(test, 0,
+ mptcp_token_new_request((struct request_sock *)req));
+ KUNIT_EXPECT_NE(test, 0, (int)req->token);
+ KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(req->token));
+
+ /* cleanup */
+ mptcp_token_destroy_request((struct request_sock *)req);
+}
+
+static struct inet_connection_sock *build_icsk(struct kunit *test)
+{
+ struct inet_connection_sock *icsk;
+
+ icsk = kunit_kzalloc(test, sizeof(struct inet_connection_sock),
+ GFP_USER);
+ KUNIT_EXPECT_NOT_ERR_OR_NULL(test, icsk);
+ return icsk;
+}
+
+static struct mptcp_subflow_context *build_ctx(struct kunit *test)
+{
+ struct mptcp_subflow_context *ctx;
+
+ ctx = kunit_kzalloc(test, sizeof(struct mptcp_subflow_context),
+ GFP_USER);
+ KUNIT_EXPECT_NOT_ERR_OR_NULL(test, ctx);
+ return ctx;
+}
+
+static struct mptcp_sock *build_msk(struct kunit *test)
+{
+ struct mptcp_sock *msk;
+
+ msk = kunit_kzalloc(test, sizeof(struct mptcp_sock), GFP_USER);
+ KUNIT_EXPECT_NOT_ERR_OR_NULL(test, msk);
+ refcount_set(&((struct sock *)msk)->sk_refcnt, 1);
+ return msk;
+}
+
+static void mptcp_token_test_msk_basic(struct kunit *test)
+{
+ struct inet_connection_sock *icsk = build_icsk(test);
+ struct mptcp_subflow_context *ctx = build_ctx(test);
+ struct mptcp_sock *msk = build_msk(test);
+ struct mptcp_sock *null_msk = NULL;
+ struct sock *sk;
+
+ rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
+ ctx->conn = (struct sock *)msk;
+ sk = (struct sock *)msk;
+
+ KUNIT_ASSERT_EQ(test, 0,
+ mptcp_token_new_connect((struct sock *)icsk));
+ KUNIT_EXPECT_NE(test, 0, (int)ctx->token);
+ KUNIT_EXPECT_EQ(test, ctx->token, msk->token);
+ KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(ctx->token));
+ KUNIT_EXPECT_EQ(test, 2, (int)refcount_read(&sk->sk_refcnt));
+
+ mptcp_token_destroy(msk);
+ KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(ctx->token));
+}
+
+static void mptcp_token_test_accept(struct kunit *test)
+{
+ struct mptcp_subflow_request_sock *req = build_req_sock(test);
+ struct mptcp_sock *msk = build_msk(test);
+
+ KUNIT_ASSERT_EQ(test, 0,
+ mptcp_token_new_request((struct request_sock *)req));
+ msk->token = req->token;
+ mptcp_token_accept(req, msk);
+ KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(msk->token));
+
+ /* this is now a no-op */
+ mptcp_token_destroy_request((struct request_sock *)req);
+ KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(msk->token));
+
+ /* cleanup */
+ mptcp_token_destroy(msk);
+}
+
+static void mptcp_token_test_destroyed(struct kunit *test)
+{
+ struct mptcp_subflow_request_sock *req = build_req_sock(test);
+ struct mptcp_sock *msk = build_msk(test);
+ struct mptcp_sock *null_msk = NULL;
+ struct sock *sk;
+
+ sk = (struct sock *)msk;
+
+ KUNIT_ASSERT_EQ(test, 0,
+ mptcp_token_new_request((struct request_sock *)req));
+ msk->token = req->token;
+ mptcp_token_accept(req, msk);
+
+ /* simulate race on removal */
+ refcount_set(&sk->sk_refcnt, 0);
+ KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(msk->token));
+
+ /* cleanup */
+ mptcp_token_destroy(msk);
+}
+
+static struct kunit_case mptcp_token_test_cases[] = {
+ KUNIT_CASE(mptcp_token_test_req_basic),
+ KUNIT_CASE(mptcp_token_test_msk_basic),
+ KUNIT_CASE(mptcp_token_test_accept),
+ KUNIT_CASE(mptcp_token_test_destroyed),
+ {}
+};
+
+static struct kunit_suite mptcp_token_suite = {
+ .name = "mptcp-token",
+ .test_cases = mptcp_token_test_cases,
+};
+
+kunit_test_suite(mptcp_token_suite);
+
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 340cb955af25..56621d6bfd29 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -460,6 +460,8 @@ ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len,
for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
if (!add_extension(id, cadt_flags, tb))
continue;
+ if (align < ip_set_extensions[id].align)
+ align = ip_set_extensions[id].align;
len = ALIGN(len, ip_set_extensions[id].align);
set->offset[id] = len;
set->extensions |= ip_set_extensions[id].type;
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
index f108a76925dd..2b01a151eaa8 100644
--- a/net/netfilter/nf_dup_netdev.c
+++ b/net/netfilter/nf_dup_netdev.c
@@ -73,3 +73,4 @@ EXPORT_SYMBOL_GPL(nft_fwd_dup_netdev_offload);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("Netfilter packet duplication support");
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index afa85171df38..b1eb5272b379 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -594,3 +594,4 @@ module_exit(nf_flow_table_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("Netfilter flow table module");
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index 88bedf1ff1ae..bc4126d8ef65 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -72,3 +72,4 @@ module_exit(nf_flow_inet_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NF_FLOWTABLE(1); /* NFPROTO_INET */
+MODULE_DESCRIPTION("Netfilter flow table mixed IPv4/IPv6 module");
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 62651e6683f6..5fff1e040168 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -950,6 +950,7 @@ static void nf_flow_table_indr_cleanup(struct flow_block_cb *block_cb)
nf_flow_table_gc_cleanup(flowtable, dev);
down_write(&flowtable->flow_block_lock);
list_del(&block_cb->list);
+ list_del(&block_cb->driver_list);
flow_block_cb_free(block_cb);
up_write(&flowtable->flow_block_lock);
}
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index b9cbe1e2453e..ebcdc8e54476 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -1237,3 +1237,4 @@ EXPORT_SYMBOL_GPL(nf_synproxy_ipv6_fini);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("nftables SYNPROXY expression support");
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 185fc82c99aa..c7cf1cde46de 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -296,6 +296,7 @@ static void nft_indr_block_cleanup(struct flow_block_cb *block_cb)
nft_flow_block_offload_init(&bo, dev_net(dev), FLOW_BLOCK_UNBIND,
basechain, &extack);
mutex_lock(&net->nft.commit_mutex);
+ list_del(&block_cb->driver_list);
list_move(&block_cb->list, &bo.cb_list);
nft_flow_offload_unbind(&bo, basechain);
mutex_unlock(&net->nft.commit_mutex);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 99127e2d95a8..5f24edf95830 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -33,6 +33,7 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
+MODULE_DESCRIPTION("Netfilter messages via netlink socket");
#define nfnl_dereference_protected(id) \
rcu_dereference_protected(table[(id)].subsys, \
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index f9adca62ccb3..aa1a066cb74b 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -902,3 +902,4 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NFT_EXPR("match");
MODULE_ALIAS_NFT_EXPR("target");
+MODULE_DESCRIPTION("x_tables over nftables support");
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index 69d6173f91e2..7d0761fad37e 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -280,3 +280,4 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso");
MODULE_ALIAS_NFT_EXPR("connlimit");
MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CONNLIMIT);
+MODULE_DESCRIPTION("nftables connlimit rule support");
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index f6d4d0fa23a6..85ed461ec24e 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -303,3 +303,4 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_EXPR("counter");
MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_COUNTER);
+MODULE_DESCRIPTION("nftables counter rule support");
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index faea72c2df32..77258af1fce0 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -1345,3 +1345,4 @@ MODULE_ALIAS_NFT_EXPR("notrack");
MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_HELPER);
MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_TIMEOUT);
MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_EXPECT);
+MODULE_DESCRIPTION("Netfilter nf_tables conntrack module");
diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c
index c2e78c160fd7..40788b3f1071 100644
--- a/net/netfilter/nft_dup_netdev.c
+++ b/net/netfilter/nft_dup_netdev.c
@@ -102,3 +102,4 @@ module_exit(nft_dup_netdev_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NFT_AF_EXPR(5, "dup");
+MODULE_DESCRIPTION("nftables netdev packet duplication support");
diff --git a/net/netfilter/nft_fib_inet.c b/net/netfilter/nft_fib_inet.c
index 465432e0531b..a88d44e163d1 100644
--- a/net/netfilter/nft_fib_inet.c
+++ b/net/netfilter/nft_fib_inet.c
@@ -76,3 +76,4 @@ module_exit(nft_fib_inet_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
MODULE_ALIAS_NFT_AF_EXPR(1, "fib");
+MODULE_DESCRIPTION("nftables fib inet support");
diff --git a/net/netfilter/nft_fib_netdev.c b/net/netfilter/nft_fib_netdev.c
index a2e726ae7f07..3f3478abd845 100644
--- a/net/netfilter/nft_fib_netdev.c
+++ b/net/netfilter/nft_fib_netdev.c
@@ -85,3 +85,4 @@ module_exit(nft_fib_netdev_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo M. Bermudo Garay <pablombg@gmail.com>");
MODULE_ALIAS_NFT_AF_EXPR(5, "fib");
+MODULE_DESCRIPTION("nftables netdev fib lookups support");
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index b70b48996801..3b9b97aa4b32 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -286,3 +286,4 @@ module_exit(nft_flow_offload_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NFT_EXPR("flow_offload");
+MODULE_DESCRIPTION("nftables hardware flow offload module");
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index b836d550b919..96371d878e7e 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -248,3 +248,4 @@ module_exit(nft_hash_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Laura Garcia <nevola@gmail.com>");
MODULE_ALIAS_NFT_EXPR("hash");
+MODULE_DESCRIPTION("Netfilter nftables hash module");
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 35b67d7e3694..0e2c315c3b5e 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -372,3 +372,4 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_EXPR("limit");
MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_LIMIT);
+MODULE_DESCRIPTION("nftables limit expression support");
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index fe4831f2258f..57899454a530 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -298,3 +298,4 @@ module_exit(nft_log_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_EXPR("log");
+MODULE_DESCRIPTION("Netfilter nf_tables log module");
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index bc9fd98c5d6d..71390b727040 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -305,3 +305,4 @@ module_exit(nft_masq_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
MODULE_ALIAS_NFT_EXPR("masq");
+MODULE_DESCRIPTION("Netfilter nftables masquerade expression support");
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 23a7bfd10521..4bcf33b049c4 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -402,3 +402,4 @@ module_exit(nft_nat_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
MODULE_ALIAS_NFT_EXPR("nat");
+MODULE_DESCRIPTION("Network Address Translation support");
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index 48edb9d5f012..f1fc824f9737 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -217,3 +217,4 @@ module_exit(nft_ng_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Laura Garcia <nevola@gmail.com>");
MODULE_ALIAS_NFT_EXPR("numgen");
+MODULE_DESCRIPTION("nftables number generator module");
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index bfd18d2b65a2..5f9207a9f485 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -252,3 +252,4 @@ module_exit(nft_objref_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NFT_EXPR("objref");
+MODULE_DESCRIPTION("nftables stateful object reference module");
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index b42247aa48a9..c261d57a666a 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -149,3 +149,4 @@ module_exit(nft_osf_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Fernando Fernandez <ffmancera@riseup.net>");
MODULE_ALIAS_NFT_EXPR("osf");
+MODULE_DESCRIPTION("nftables passive OS fingerprint support");
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index 5ece0a6aa8c3..23265d757acb 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -216,3 +216,4 @@ module_exit(nft_queue_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Eric Leblond <eric@regit.org>");
MODULE_ALIAS_NFT_EXPR("queue");
+MODULE_DESCRIPTION("Netfilter nftables queue module");
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 4413690591f2..0363f533a42b 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -254,3 +254,4 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NFT_EXPR("quota");
MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_QUOTA);
+MODULE_DESCRIPTION("Netfilter nftables quota module");
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 5b779171565c..2056051c0af0 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -292,3 +292,4 @@ module_exit(nft_redir_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
MODULE_ALIAS_NFT_EXPR("redir");
+MODULE_DESCRIPTION("Netfilter nftables redirect support");
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index 00f865fb80ca..86eafbb0fdd0 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -119,3 +119,4 @@ EXPORT_SYMBOL_GPL(nft_reject_icmpv6_code);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Netfilter x_tables over nftables module");
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index f41f414b72d1..cf8f2646e93c 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -149,3 +149,4 @@ module_exit(nft_reject_inet_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_AF_EXPR(1, "reject");
+MODULE_DESCRIPTION("Netfilter nftables reject inet support");
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index e2c1fc608841..4fda8b3f1762 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -388,3 +388,4 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Fernando Fernandez <ffmancera@riseup.net>");
MODULE_ALIAS_NFT_EXPR("synproxy");
MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_SYNPROXY);
+MODULE_DESCRIPTION("nftables SYNPROXY expression support");
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 30be5787fbde..d3eb953d0333 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -719,3 +719,4 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NFT_EXPR("tunnel");
MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_TUNNEL);
+MODULE_DESCRIPTION("nftables tunnel expression support");
diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
index a8e5f6c8db7a..b4f7bbc3f3ca 100644
--- a/net/netfilter/xt_nat.c
+++ b/net/netfilter/xt_nat.c
@@ -244,3 +244,4 @@ MODULE_ALIAS("ipt_SNAT");
MODULE_ALIAS("ipt_DNAT");
MODULE_ALIAS("ip6t_SNAT");
MODULE_ALIAS("ip6t_DNAT");
+MODULE_DESCRIPTION("SNAT and DNAT targets support");
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index fc0efd8833c8..2611657f40ca 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1169,9 +1169,10 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key,
const struct nlattr *attr, bool last)
{
+ struct ovs_skb_cb *ovs_cb = OVS_CB(skb);
const struct nlattr *actions, *cpl_arg;
+ int len, max_len, rem = nla_len(attr);
const struct check_pkt_len_arg *arg;
- int rem = nla_len(attr);
bool clone_flow_key;
/* The first netlink attribute in 'attr' is always
@@ -1180,7 +1181,11 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
cpl_arg = nla_data(attr);
arg = nla_data(cpl_arg);
- if (skb->len <= arg->pkt_len) {
+ len = ovs_cb->mru ? ovs_cb->mru + skb->mac_len : skb->len;
+ max_len = arg->pkt_len;
+
+ if ((skb_is_gso(skb) && skb_gso_validate_mac_len(skb, max_len)) ||
+ len <= max_len) {
/* Second netlink attribute in 'attr' is always
* 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'.
*/
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 29bd405adbbd..7b436ebde61d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4293,7 +4293,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
struct packet_ring_buffer *rb;
struct sk_buff_head *rb_queue;
__be16 num;
- int err = -EINVAL;
+ int err;
/* Added to avoid minimal code churn */
struct tpacket_req *req = &req_u->req;
diff --git a/net/rds/transport.c b/net/rds/transport.c
index 46f709a4b577..f8001ec80867 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -38,6 +38,12 @@
#include "rds.h"
#include "loop.h"
+static char * const rds_trans_modules[] = {
+ [RDS_TRANS_IB] = "rds_rdma",
+ [RDS_TRANS_GAP] = NULL,
+ [RDS_TRANS_TCP] = "rds_tcp",
+};
+
static struct rds_transport *transports[RDS_TRANS_COUNT];
static DECLARE_RWSEM(rds_trans_sem);
@@ -110,18 +116,20 @@ struct rds_transport *rds_trans_get(int t_type)
{
struct rds_transport *ret = NULL;
struct rds_transport *trans;
- unsigned int i;
down_read(&rds_trans_sem);
- for (i = 0; i < RDS_TRANS_COUNT; i++) {
- trans = transports[i];
-
- if (trans && trans->t_type == t_type &&
- (!trans->t_owner || try_module_get(trans->t_owner))) {
- ret = trans;
- break;
- }
+ trans = transports[t_type];
+ if (!trans) {
+ up_read(&rds_trans_sem);
+ if (rds_trans_modules[t_type])
+ request_module(rds_trans_modules[t_type]);
+ down_read(&rds_trans_sem);
+ trans = transports[t_type];
}
+ if (trans && trans->t_type == t_type &&
+ (!trans->t_owner || try_module_get(trans->t_owner)))
+ ret = trans;
+
up_read(&rds_trans_sem);
return ret;
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index b7611cc159e5..032ed76c0166 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -22,6 +22,11 @@
#include <net/ip.h>
#include "ar-internal.h"
+static void rxrpc_dummy_notify(struct sock *sk, struct rxrpc_call *call,
+ unsigned long user_call_ID)
+{
+}
+
/*
* Preallocate a single service call, connection and peer and, if possible,
* give them a user ID and attach the user's side of the ID to them.
@@ -228,6 +233,8 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
if (rx->discard_new_call) {
_debug("discard %lx", call->user_call_ID);
rx->discard_new_call(call, call->user_call_ID);
+ if (call->notify_rx)
+ call->notify_rx = rxrpc_dummy_notify;
rxrpc_put_call(call, rxrpc_call_put_kernel);
}
rxrpc_call_completed(call);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index aa1c8eee6557..6be2672a65ea 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -253,7 +253,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
* confuse things
*/
annotation &= ~RXRPC_TX_ANNO_MASK;
- annotation |= RXRPC_TX_ANNO_RESENT;
+ annotation |= RXRPC_TX_ANNO_UNACK | RXRPC_TX_ANNO_RESENT;
call->rxtx_annotations[ix] = annotation;
skb = call->rxtx_buffer[ix];
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 299ac98e9754..767579328a06 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -722,13 +722,12 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU),
rwind, ntohl(ackinfo->jumbo_max));
+ if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
+ rwind = RXRPC_RXTX_BUFF_SIZE - 1;
if (call->tx_winsize != rwind) {
- if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
- rwind = RXRPC_RXTX_BUFF_SIZE - 1;
if (rwind > call->tx_winsize)
wake = true;
- trace_rxrpc_rx_rwind_change(call, sp->hdr.serial,
- ntohl(ackinfo->rwind), wake);
+ trace_rxrpc_rx_rwind_change(call, sp->hdr.serial, rwind, wake);
call->tx_winsize = rwind;
}
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 84badf00647e..a3b37d88800e 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -468,6 +468,9 @@ choice
config DEFAULT_FQ_CODEL
bool "Fair Queue Controlled Delay" if NET_SCH_FQ_CODEL
+ config DEFAULT_FQ_PIE
+ bool "Flow Queue Proportional Integral controller Enhanced" if NET_SCH_FQ_PIE
+
config DEFAULT_SFQ
bool "Stochastic Fair Queue" if NET_SCH_SFQ
@@ -480,6 +483,7 @@ config DEFAULT_NET_SCH
default "pfifo_fast" if DEFAULT_PFIFO_FAST
default "fq" if DEFAULT_FQ
default "fq_codel" if DEFAULT_FQ_CODEL
+ default "fq_pie" if DEFAULT_FQ_PIE
default "sfq" if DEFAULT_SFQ
default "pfifo_fast"
endif
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 8ac7eb0a8309..063d8aaf2900 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1059,14 +1059,13 @@ err:
return err;
}
-void tcf_action_update_stats(struct tc_action *a, u64 bytes, u32 packets,
- bool drop, bool hw)
+void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets,
+ u64 drops, bool hw)
{
if (a->cpu_bstats) {
_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
- if (drop)
- this_cpu_ptr(a->cpu_qstats)->drops += packets;
+ this_cpu_ptr(a->cpu_qstats)->drops += drops;
if (hw)
_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
@@ -1075,8 +1074,7 @@ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u32 packets,
}
_bstats_update(&a->tcfa_bstats, bytes, packets);
- if (drop)
- a->tcfa_qstats.drops += packets;
+ a->tcfa_qstats.drops += drops;
if (hw)
_bstats_update(&a->tcfa_bstats_hw, bytes, packets);
}
@@ -1475,7 +1473,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_ROOT_MAX + 1];
- u32 portid = skb ? NETLINK_CB(skb).portid : 0;
+ u32 portid = NETLINK_CB(skb).portid;
int ret = 0, ovr = 0;
if ((n->nlmsg_type != RTM_GETACTION) &&
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index e9f3576cbf71..1b9c6d4a1b6b 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1450,12 +1450,12 @@ static int tcf_ct_search(struct net *net, struct tc_action **a, u32 index)
return tcf_idr_search(tn, a, index);
}
-static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
- u64 lastuse, bool hw)
+static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets,
+ u64 drops, u64 lastuse, bool hw)
{
struct tcf_ct *c = to_ct(a);
- tcf_action_update_stats(a, bytes, packets, false, hw);
+ tcf_action_update_stats(a, bytes, packets, drops, hw);
c->tcf_tm.lastuse = max_t(u64, c->tcf_tm.lastuse, lastuse);
}
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 416065772719..410e3bbfb9ca 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -171,14 +171,15 @@ static int tcf_gact_act(struct sk_buff *skb, const struct tc_action *a,
return action;
}
-static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
- u64 lastuse, bool hw)
+static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u64 packets,
+ u64 drops, u64 lastuse, bool hw)
{
struct tcf_gact *gact = to_gact(a);
int action = READ_ONCE(gact->tcf_action);
struct tcf_t *tm = &gact->tcf_tm;
- tcf_action_update_stats(a, bytes, packets, action == TC_ACT_SHOT, hw);
+ tcf_action_update_stats(a, bytes, packets,
+ action == TC_ACT_SHOT ? packets : drops, hw);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index 9c628591f452..1fb8d428d2c1 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -32,7 +32,7 @@ static ktime_t gate_get_time(struct tcf_gate *gact)
return KTIME_MAX;
}
-static int gate_get_start_time(struct tcf_gate *gact, ktime_t *start)
+static void gate_get_start_time(struct tcf_gate *gact, ktime_t *start)
{
struct tcf_gate_params *param = &gact->param;
ktime_t now, base, cycle;
@@ -43,18 +43,13 @@ static int gate_get_start_time(struct tcf_gate *gact, ktime_t *start)
if (ktime_after(base, now)) {
*start = base;
- return 0;
+ return;
}
cycle = param->tcfg_cycletime;
- /* cycle time should not be zero */
- if (!cycle)
- return -EFAULT;
-
n = div64_u64(ktime_sub_ns(now, base), cycle);
*start = ktime_add_ns(base, (n + 1) * cycle);
- return 0;
}
static void gate_start_timer(struct tcf_gate *gact, ktime_t start)
@@ -277,6 +272,27 @@ release_list:
return err;
}
+static void gate_setup_timer(struct tcf_gate *gact, u64 basetime,
+ enum tk_offsets tko, s32 clockid,
+ bool do_init)
+{
+ if (!do_init) {
+ if (basetime == gact->param.tcfg_basetime &&
+ tko == gact->tk_offset &&
+ clockid == gact->param.tcfg_clockid)
+ return;
+
+ spin_unlock_bh(&gact->tcf_lock);
+ hrtimer_cancel(&gact->hitimer);
+ spin_lock_bh(&gact->tcf_lock);
+ }
+ gact->param.tcfg_basetime = basetime;
+ gact->param.tcfg_clockid = clockid;
+ gact->tk_offset = tko;
+ hrtimer_init(&gact->hitimer, clockid, HRTIMER_MODE_ABS_SOFT);
+ gact->hitimer.function = gate_timer_func;
+}
+
static int tcf_gate_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
@@ -287,12 +303,12 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
enum tk_offsets tk_offset = TK_OFFS_TAI;
struct nlattr *tb[TCA_GATE_MAX + 1];
struct tcf_chain *goto_ch = NULL;
+ u64 cycletime = 0, basetime = 0;
struct tcf_gate_params *p;
s32 clockid = CLOCK_TAI;
struct tcf_gate *gact;
struct tc_gate *parm;
int ret = 0, err;
- u64 basetime = 0;
u32 gflags = 0;
s32 prio = -1;
ktime_t start;
@@ -308,6 +324,27 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
if (!tb[TCA_GATE_PARMS])
return -EINVAL;
+ if (tb[TCA_GATE_CLOCKID]) {
+ clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]);
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ tk_offset = TK_OFFS_REAL;
+ break;
+ case CLOCK_MONOTONIC:
+ tk_offset = TK_OFFS_MAX;
+ break;
+ case CLOCK_BOOTTIME:
+ tk_offset = TK_OFFS_BOOT;
+ break;
+ case CLOCK_TAI:
+ tk_offset = TK_OFFS_TAI;
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
+ return -EINVAL;
+ }
+ }
+
parm = nla_data(tb[TCA_GATE_PARMS]);
index = parm->index;
@@ -331,10 +368,6 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
tcf_idr_release(*a, bind);
return -EEXIST;
}
- if (ret == ACT_P_CREATED) {
- to_gate(*a)->param.tcfg_clockid = -1;
- INIT_LIST_HEAD(&(to_gate(*a)->param.entries));
- }
if (tb[TCA_GATE_PRIORITY])
prio = nla_get_s32(tb[TCA_GATE_PRIORITY]);
@@ -345,41 +378,19 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
if (tb[TCA_GATE_FLAGS])
gflags = nla_get_u32(tb[TCA_GATE_FLAGS]);
- if (tb[TCA_GATE_CLOCKID]) {
- clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]);
- switch (clockid) {
- case CLOCK_REALTIME:
- tk_offset = TK_OFFS_REAL;
- break;
- case CLOCK_MONOTONIC:
- tk_offset = TK_OFFS_MAX;
- break;
- case CLOCK_BOOTTIME:
- tk_offset = TK_OFFS_BOOT;
- break;
- case CLOCK_TAI:
- tk_offset = TK_OFFS_TAI;
- break;
- default:
- NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
- goto release_idr;
- }
- }
+ gact = to_gate(*a);
+ if (ret == ACT_P_CREATED)
+ INIT_LIST_HEAD(&gact->param.entries);
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
if (err < 0)
goto release_idr;
- gact = to_gate(*a);
-
spin_lock_bh(&gact->tcf_lock);
p = &gact->param;
- if (tb[TCA_GATE_CYCLE_TIME]) {
- p->tcfg_cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]);
- if (!p->tcfg_cycletime_ext)
- goto chain_put;
- }
+ if (tb[TCA_GATE_CYCLE_TIME])
+ cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]);
if (tb[TCA_GATE_ENTRY_LIST]) {
err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack);
@@ -387,35 +398,29 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
goto chain_put;
}
- if (!p->tcfg_cycletime) {
+ if (!cycletime) {
struct tcfg_gate_entry *entry;
ktime_t cycle = 0;
list_for_each_entry(entry, &p->entries, list)
cycle = ktime_add_ns(cycle, entry->interval);
- p->tcfg_cycletime = cycle;
+ cycletime = cycle;
+ if (!cycletime) {
+ err = -EINVAL;
+ goto chain_put;
+ }
}
+ p->tcfg_cycletime = cycletime;
if (tb[TCA_GATE_CYCLE_TIME_EXT])
p->tcfg_cycletime_ext =
nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]);
+ gate_setup_timer(gact, basetime, tk_offset, clockid,
+ ret == ACT_P_CREATED);
p->tcfg_priority = prio;
- p->tcfg_basetime = basetime;
- p->tcfg_clockid = clockid;
p->tcfg_flags = gflags;
-
- gact->tk_offset = tk_offset;
- hrtimer_init(&gact->hitimer, clockid, HRTIMER_MODE_ABS_SOFT);
- gact->hitimer.function = gate_timer_func;
-
- err = gate_get_start_time(gact, &start);
- if (err < 0) {
- NL_SET_ERR_MSG(extack,
- "Internal error: failed get start time");
- release_entry_list(&p->entries);
- goto chain_put;
- }
+ gate_get_start_time(gact, &start);
gact->current_close_time = start;
gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING;
@@ -443,6 +448,13 @@ chain_put:
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
release_idr:
+ /* action is not inserted in any list: it's safe to init hitimer
+ * without taking tcf_lock.
+ */
+ if (ret == ACT_P_CREATED)
+ gate_setup_timer(gact, gact->param.tcfg_basetime,
+ gact->tk_offset, gact->param.tcfg_clockid,
+ true);
tcf_idr_release(*a, bind);
return err;
}
@@ -453,9 +465,7 @@ static void tcf_gate_cleanup(struct tc_action *a)
struct tcf_gate_params *p;
p = &gact->param;
- if (p->tcfg_clockid != -1)
- hrtimer_cancel(&gact->hitimer);
-
+ hrtimer_cancel(&gact->hitimer);
release_entry_list(&p->entries);
}
@@ -568,13 +578,13 @@ static int tcf_gate_walker(struct net *net, struct sk_buff *skb,
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static void tcf_gate_stats_update(struct tc_action *a, u64 bytes, u32 packets,
- u64 lastuse, bool hw)
+static void tcf_gate_stats_update(struct tc_action *a, u64 bytes, u64 packets,
+ u64 drops, u64 lastuse, bool hw)
{
struct tcf_gate *gact = to_gate(a);
struct tcf_t *tm = &gact->tcf_tm;
- tcf_action_update_stats(a, bytes, packets, false, hw);
+ tcf_action_update_stats(a, bytes, packets, drops, hw);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 83dd82fc9f40..b2705318993b 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -312,13 +312,13 @@ out:
return retval;
}
-static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
- u64 lastuse, bool hw)
+static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets,
+ u64 drops, u64 lastuse, bool hw)
{
struct tcf_mirred *m = to_mirred(a);
struct tcf_t *tm = &m->tcf_tm;
- tcf_action_update_stats(a, bytes, packets, false, hw);
+ tcf_action_update_stats(a, bytes, packets, drops, hw);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index d41d6200d9de..66986db062ed 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -409,13 +409,13 @@ done:
return p->tcf_action;
}
-static void tcf_pedit_stats_update(struct tc_action *a, u64 bytes, u32 packets,
- u64 lastuse, bool hw)
+static void tcf_pedit_stats_update(struct tc_action *a, u64 bytes, u64 packets,
+ u64 drops, u64 lastuse, bool hw)
{
struct tcf_pedit *d = to_pedit(a);
struct tcf_t *tm = &d->tcf_tm;
- tcf_action_update_stats(a, bytes, packets, false, hw);
+ tcf_action_update_stats(a, bytes, packets, drops, hw);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 8b7a0ac96c51..0b431d493768 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -288,13 +288,13 @@ static void tcf_police_cleanup(struct tc_action *a)
}
static void tcf_police_stats_update(struct tc_action *a,
- u64 bytes, u32 packets,
+ u64 bytes, u64 packets, u64 drops,
u64 lastuse, bool hw)
{
struct tcf_police *police = to_police(a);
struct tcf_t *tm = &police->tcf_tm;
- tcf_action_update_stats(a, bytes, packets, false, hw);
+ tcf_action_update_stats(a, bytes, packets, drops, hw);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index b125b2be4467..361b863e0634 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -74,12 +74,13 @@ err:
}
static void tcf_skbedit_stats_update(struct tc_action *a, u64 bytes,
- u32 packets, u64 lastuse, bool hw)
+ u64 packets, u64 drops,
+ u64 lastuse, bool hw)
{
struct tcf_skbedit *d = to_skbedit(a);
struct tcf_t *tm = &d->tcf_tm;
- tcf_action_update_stats(a, bytes, packets, false, hw);
+ tcf_action_update_stats(a, bytes, packets, drops, hw);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index c91d3958fcbb..a5ff9f68ab02 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -302,13 +302,13 @@ static int tcf_vlan_walker(struct net *net, struct sk_buff *skb,
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static void tcf_vlan_stats_update(struct tc_action *a, u64 bytes, u32 packets,
- u64 lastuse, bool hw)
+static void tcf_vlan_stats_update(struct tc_action *a, u64 bytes, u64 packets,
+ u64 drops, u64 lastuse, bool hw)
{
struct tcf_vlan *v = to_vlan(a);
struct tcf_t *tm = &v->tcf_tm;
- tcf_action_update_stats(a, bytes, packets, false, hw);
+ tcf_action_update_stats(a, bytes, packets, drops, hw);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index a00a203b2ef5..e9e119ea6813 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -652,6 +652,7 @@ static void tc_block_indr_cleanup(struct flow_block_cb *block_cb)
&block->flow_block, tcf_block_shared(block),
&extack);
down_write(&block->cb_lock);
+ list_del(&block_cb->driver_list);
list_move(&block_cb->list, &bo.cb_list);
up_write(&block->cb_lock);
rtnl_lock();
@@ -671,25 +672,29 @@ static int tcf_block_offload_cmd(struct tcf_block *block,
struct netlink_ext_ack *extack)
{
struct flow_block_offload bo = {};
- int err;
tcf_block_offload_init(&bo, dev, command, ei->binder_type,
&block->flow_block, tcf_block_shared(block),
extack);
- if (dev->netdev_ops->ndo_setup_tc)
+ if (dev->netdev_ops->ndo_setup_tc) {
+ int err;
+
err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
- else
- err = flow_indr_dev_setup_offload(dev, TC_SETUP_BLOCK, block,
- &bo, tc_block_indr_cleanup);
+ if (err < 0) {
+ if (err != -EOPNOTSUPP)
+ NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed");
+ return err;
+ }
- if (err < 0) {
- if (err != -EOPNOTSUPP)
- NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed");
- return err;
+ return tcf_block_setup(block, &bo);
}
- return tcf_block_setup(block, &bo);
+ flow_indr_dev_setup_offload(dev, TC_SETUP_BLOCK, block, &bo,
+ tc_block_indr_cleanup);
+ tcf_block_setup(block, &bo);
+
+ return -EOPNOTSUPP;
}
static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
@@ -3655,9 +3660,11 @@ int tc_setup_flow_action(struct flow_action *flow_action,
tcf_sample_get_group(entry, act);
} else if (is_tcf_police(act)) {
entry->id = FLOW_ACTION_POLICE;
- entry->police.burst = tcf_police_tcfp_burst(act);
+ entry->police.burst = tcf_police_burst(act);
entry->police.rate_bytes_ps =
tcf_police_rate_bytes_ps(act);
+ entry->police.mtu = tcf_police_tcfp_mtu(act);
+ entry->police.index = act->tcfa_index;
} else if (is_tcf_ct(act)) {
entry->id = FLOW_ACTION_CT;
entry->ct.action = tcf_ct_action(act);
@@ -3741,6 +3748,125 @@ unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
}
EXPORT_SYMBOL(tcf_exts_num_actions);
+#ifdef CONFIG_NET_CLS_ACT
+static int tcf_qevent_parse_block_index(struct nlattr *block_index_attr,
+ u32 *p_block_index,
+ struct netlink_ext_ack *extack)
+{
+ *p_block_index = nla_get_u32(block_index_attr);
+ if (!*p_block_index) {
+ NL_SET_ERR_MSG(extack, "Block number may not be zero");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch,
+ enum flow_block_binder_type binder_type,
+ struct nlattr *block_index_attr,
+ struct netlink_ext_ack *extack)
+{
+ u32 block_index;
+ int err;
+
+ if (!block_index_attr)
+ return 0;
+
+ err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack);
+ if (err)
+ return err;
+
+ if (!block_index)
+ return 0;
+
+ qe->info.binder_type = binder_type;
+ qe->info.chain_head_change = tcf_chain_head_change_dflt;
+ qe->info.chain_head_change_priv = &qe->filter_chain;
+ qe->info.block_index = block_index;
+
+ return tcf_block_get_ext(&qe->block, sch, &qe->info, extack);
+}
+EXPORT_SYMBOL(tcf_qevent_init);
+
+void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch)
+{
+ if (qe->info.block_index)
+ tcf_block_put_ext(qe->block, sch, &qe->info);
+}
+EXPORT_SYMBOL(tcf_qevent_destroy);
+
+int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr,
+ struct netlink_ext_ack *extack)
+{
+ u32 block_index;
+ int err;
+
+ if (!block_index_attr)
+ return 0;
+
+ err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack);
+ if (err)
+ return err;
+
+ /* Bounce newly-configured block or change in block. */
+ if (block_index != qe->info.block_index) {
+ NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(tcf_qevent_validate_change);
+
+struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb,
+ spinlock_t *root_lock, struct sk_buff **to_free, int *ret)
+{
+ struct tcf_result cl_res;
+ struct tcf_proto *fl;
+
+ if (!qe->info.block_index)
+ return skb;
+
+ fl = rcu_dereference_bh(qe->filter_chain);
+
+ if (root_lock)
+ spin_unlock(root_lock);
+
+ switch (tcf_classify(skb, fl, &cl_res, false)) {
+ case TC_ACT_SHOT:
+ qdisc_qstats_drop(sch);
+ __qdisc_drop(skb, to_free);
+ *ret = __NET_XMIT_BYPASS;
+ return NULL;
+ case TC_ACT_STOLEN:
+ case TC_ACT_QUEUED:
+ case TC_ACT_TRAP:
+ __qdisc_drop(skb, to_free);
+ *ret = __NET_XMIT_STOLEN;
+ return NULL;
+ case TC_ACT_REDIRECT:
+ skb_do_redirect(skb);
+ *ret = __NET_XMIT_STOLEN;
+ return NULL;
+ }
+
+ if (root_lock)
+ spin_lock(root_lock);
+
+ return skb;
+}
+EXPORT_SYMBOL(tcf_qevent_handle);
+
+int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe)
+{
+ if (!qe->info.block_index)
+ return 0;
+ return nla_put_u32(skb, attr_name, qe->info.block_index);
+}
+EXPORT_SYMBOL(tcf_qevent_dump);
+#endif
+
static __net_init int tcf_net_init(struct net *net)
{
struct tcf_net *tn = net_generic(net, tcf_net_id);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index b2da37286082..391971672d54 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -491,6 +491,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f,
tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes,
cls_flower.stats.pkts,
+ cls_flower.stats.drops,
cls_flower.stats.lastused,
cls_flower.stats.used_hw_stats,
cls_flower.stats.used_hw_stats_valid);
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 8d39dbcf1746..cafb84480bab 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -338,7 +338,8 @@ static void mall_stats_hw_filter(struct tcf_proto *tp,
tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false, true);
tcf_exts_stats_update(&head->exts, cls_mall.stats.bytes,
- cls_mall.stats.pkts, cls_mall.stats.lastused,
+ cls_mall.stats.pkts, cls_mall.stats.drops,
+ cls_mall.stats.lastused,
cls_mall.stats.used_hw_stats,
cls_mall.stats.used_hw_stats_valid);
}
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 61e95029c18f..78bec347b8b6 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -533,7 +533,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
"p %p,r %p,*arg %p\n",
- tp, handle, tca, arg, opt, p, r, arg ? *arg : NULL);
+ tp, handle, tca, arg, opt, p, r, *arg);
if (!opt)
return 0;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index e15ff335953d..771b068f8254 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -796,9 +796,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
struct tc_u32_sel *s = &n->sel;
struct tc_u_knode *new;
- new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
- GFP_KERNEL);
-
+ new = kzalloc(struct_size(new, sel.keys, s->nkeys), GFP_KERNEL);
if (!new)
return NULL;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 9a3449b56bd6..11ebba60da3b 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1093,8 +1093,7 @@ skip:
int err;
/* Only support running class lockless if parent is lockless */
- if (new && (new->flags & TCQ_F_NOLOCK) &&
- parent && !(parent->flags & TCQ_F_NOLOCK))
+ if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK))
qdisc_clear_nolock(new);
if (!cops || !cops->graft)
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index ee12ca9f55b4..fb6b16c4e46d 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -374,7 +374,7 @@ static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl,
/* --------------------------- Qdisc operations ---------------------------- */
-static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
@@ -432,7 +432,7 @@ done:
#endif
}
- ret = qdisc_enqueue(skb, flow->q, to_free);
+ ret = qdisc_enqueue(skb, flow->q, root_lock, to_free);
if (ret != NET_XMIT_SUCCESS) {
drop: __maybe_unused
if (net_xmit_drop_count(ret)) {
diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c
index a7f7667ae984..187644657c4f 100644
--- a/net/sched/sch_blackhole.c
+++ b/net/sched/sch_blackhole.c
@@ -13,7 +13,7 @@
#include <linux/skbuff.h>
#include <net/pkt_sched.h>
-static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
qdisc_drop(skb, sch, to_free);
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 60f8ae578819..e9c502dd29a2 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -312,8 +312,8 @@ static const u8 precedence[] = {
};
static const u8 diffserv8[] = {
- 2, 5, 1, 2, 4, 2, 2, 2,
- 0, 2, 1, 2, 1, 2, 1, 2,
+ 2, 0, 1, 2, 4, 2, 2, 2,
+ 1, 2, 1, 2, 1, 2, 1, 2,
5, 2, 4, 2, 4, 2, 4, 2,
3, 2, 3, 2, 3, 2, 3, 2,
6, 2, 3, 2, 3, 2, 3, 2,
@@ -323,7 +323,7 @@ static const u8 diffserv8[] = {
};
static const u8 diffserv4[] = {
- 0, 2, 0, 0, 2, 0, 0, 0,
+ 0, 1, 0, 0, 2, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0,
2, 0, 2, 0, 2, 0, 2, 0,
2, 0, 2, 0, 2, 0, 2, 0,
@@ -334,7 +334,7 @@ static const u8 diffserv4[] = {
};
static const u8 diffserv3[] = {
- 0, 0, 0, 0, 2, 0, 0, 0,
+ 0, 1, 0, 0, 2, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -1551,32 +1551,51 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
return idx + (tin << 16);
}
-static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
+static u8 cake_handle_diffserv(struct sk_buff *skb, bool wash)
{
- int wlen = skb_network_offset(skb);
+ const int offset = skb_network_offset(skb);
+ u16 *buf, buf_;
u8 dscp;
switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
- wlen += sizeof(struct iphdr);
- if (!pskb_may_pull(skb, wlen) ||
- skb_try_make_writable(skb, wlen))
+ buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
+ if (unlikely(!buf))
return 0;
- dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
- if (wash && dscp)
+ /* ToS is in the second byte of iphdr */
+ dscp = ipv4_get_dsfield((struct iphdr *)buf) >> 2;
+
+ if (wash && dscp) {
+ const int wlen = offset + sizeof(struct iphdr);
+
+ if (!pskb_may_pull(skb, wlen) ||
+ skb_try_make_writable(skb, wlen))
+ return 0;
+
ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
+ }
+
return dscp;
case htons(ETH_P_IPV6):
- wlen += sizeof(struct ipv6hdr);
- if (!pskb_may_pull(skb, wlen) ||
- skb_try_make_writable(skb, wlen))
+ buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
+ if (unlikely(!buf))
return 0;
- dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
- if (wash && dscp)
+ /* Traffic class is in the first and second bytes of ipv6hdr */
+ dscp = ipv6_get_dsfield((struct ipv6hdr *)buf) >> 2;
+
+ if (wash && dscp) {
+ const int wlen = offset + sizeof(struct ipv6hdr);
+
+ if (!pskb_may_pull(skb, wlen) ||
+ skb_try_make_writable(skb, wlen))
+ return 0;
+
ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
+ }
+
return dscp;
case htons(ETH_P_ARP):
@@ -1593,14 +1612,17 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
{
struct cake_sched_data *q = qdisc_priv(sch);
u32 tin, mark;
+ bool wash;
u8 dscp;
/* Tin selection: Default to diffserv-based selection, allow overriding
- * using firewall marks or skb->priority.
+ * using firewall marks or skb->priority. Call DSCP parsing early if
+ * wash is enabled, otherwise defer to below to skip unneeded parsing.
*/
- dscp = cake_handle_diffserv(skb,
- q->rate_flags & CAKE_FLAG_WASH);
mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft;
+ wash = !!(q->rate_flags & CAKE_FLAG_WASH);
+ if (wash)
+ dscp = cake_handle_diffserv(skb, wash);
if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT)
tin = 0;
@@ -1614,6 +1636,8 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
tin = q->tin_order[TC_H_MIN(skb->priority) - 1];
else {
+ if (!wash)
+ dscp = cake_handle_diffserv(skb, wash);
tin = q->tin_index[dscp];
if (unlikely(tin >= q->tin_cnt))
@@ -1663,7 +1687,7 @@ hash:
static void cake_reconfigure(struct Qdisc *sch);
-static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct cake_sched_data *q = qdisc_priv(sch);
@@ -2691,7 +2715,7 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt,
qdisc_watchdog_init(&q->watchdog, sch);
if (opt) {
- int err = cake_change(sch, opt, extack);
+ err = cake_change(sch, opt, extack);
if (err)
return err;
@@ -3008,7 +3032,7 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl,
PUT_STAT_S32(BLUE_TIMER_US,
ktime_to_us(
ktime_sub(now,
- flow->cvars.blue_timer)));
+ flow->cvars.blue_timer)));
}
if (flow->cvars.dropping) {
PUT_STAT_S32(DROP_NEXT_US,
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 39b427dc7512..052d4a1af69a 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -356,7 +356,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
}
static int
-cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct cbq_sched_data *q = qdisc_priv(sch);
@@ -373,7 +373,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return ret;
}
- ret = qdisc_enqueue(skb, cl->q, to_free);
+ ret = qdisc_enqueue(skb, cl->q, root_lock, to_free);
if (ret == NET_XMIT_SUCCESS) {
sch->q.qlen++;
cbq_mark_toplevel(q, cl);
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index 2eaac2ff380f..7af15ebe07f7 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -77,7 +77,7 @@ struct cbs_sched_data {
s64 sendslope; /* in bytes/s */
s64 idleslope; /* in bytes/s */
struct qdisc_watchdog watchdog;
- int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch,
+ int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free);
struct sk_buff *(*dequeue)(struct Qdisc *sch);
struct Qdisc *qdisc;
@@ -85,13 +85,13 @@ struct cbs_sched_data {
};
static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch,
- struct Qdisc *child,
+ struct Qdisc *child, spinlock_t *root_lock,
struct sk_buff **to_free)
{
unsigned int len = qdisc_pkt_len(skb);
int err;
- err = child->ops->enqueue(skb, child, to_free);
+ err = child->ops->enqueue(skb, child, root_lock, to_free);
if (err != NET_XMIT_SUCCESS)
return err;
@@ -101,16 +101,16 @@ static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return NET_XMIT_SUCCESS;
}
-static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch,
+static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct cbs_sched_data *q = qdisc_priv(sch);
struct Qdisc *qdisc = q->qdisc;
- return cbs_child_enqueue(skb, sch, qdisc, to_free);
+ return cbs_child_enqueue(skb, sch, qdisc, root_lock, to_free);
}
-static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch,
+static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct cbs_sched_data *q = qdisc_priv(sch);
@@ -124,15 +124,15 @@ static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch,
q->last = ktime_get_ns();
}
- return cbs_child_enqueue(skb, sch, qdisc, to_free);
+ return cbs_child_enqueue(skb, sch, qdisc, root_lock, to_free);
}
-static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct cbs_sched_data *q = qdisc_priv(sch);
- return q->enqueue(skb, sch, to_free);
+ return q->enqueue(skb, sch, root_lock, to_free);
}
/* timediff is in ns, slope is in bytes/s */
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index bd618b00d319..baf3faee31aa 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -210,7 +210,7 @@ static bool choke_match_random(const struct choke_sched_data *q,
return choke_match_flow(oskb, nskb);
}
-static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct choke_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 30169b3adbbb..1d94837abdd8 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -108,7 +108,7 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
return skb;
}
-static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct codel_sched_data *q;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 07a2b0b35495..0d5c9a8ec61d 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -337,7 +337,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
return NULL;
}
-static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
unsigned int len = qdisc_pkt_len(skb);
@@ -355,7 +355,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
first = !cl->qdisc->q.qlen;
- err = qdisc_enqueue(skb, cl->qdisc, to_free);
+ err = qdisc_enqueue(skb, cl->qdisc, root_lock, to_free);
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
cl->qstats.drops++;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 05605b30bef3..fbe49fffcdbb 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -198,7 +198,7 @@ static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl,
/* --------------------------- Qdisc operations ---------------------------- */
-static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
unsigned int len = qdisc_pkt_len(skb);
@@ -267,7 +267,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
}
- err = qdisc_enqueue(skb, p->q, to_free);
+ err = qdisc_enqueue(skb, p->q, root_lock, to_free);
if (err != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(err))
qdisc_qstats_drop(sch);
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index c48f91075b5c..7a7c50a68115 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c
@@ -160,7 +160,7 @@ static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
}
static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
- struct sk_buff **to_free)
+ spinlock_t *root_lock, struct sk_buff **to_free)
{
struct etf_sched_data *q = qdisc_priv(sch);
struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL;
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index a87e9159338c..373dc5855d4e 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -415,7 +415,7 @@ static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch,
return &q->classes[band];
}
-static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
unsigned int len = qdisc_pkt_len(skb);
@@ -433,7 +433,7 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
first = !cl->qdisc->q.qlen;
- err = qdisc_enqueue(skb, cl->qdisc, to_free);
+ err = qdisc_enqueue(skb, cl->qdisc, root_lock, to_free);
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
cl->qstats.drops++;
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index a579a4131d22..b4da5b624ad8 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -16,7 +16,7 @@
/* 1 band FIFO pseudo-"scheduler" */
-static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit))
@@ -25,7 +25,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return qdisc_drop(skb, sch, to_free);
}
-static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
if (likely(sch->q.qlen < sch->limit))
@@ -34,7 +34,7 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return qdisc_drop(skb, sch, to_free);
}
-static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
unsigned int prev_backlog;
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 8f06a808c59a..a90d745c41e0 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -439,7 +439,7 @@ static bool fq_packet_beyond_horizon(const struct sk_buff *skb,
return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon));
}
-static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct fq_sched_data *q = qdisc_priv(sch);
@@ -1075,3 +1075,4 @@ module_init(fq_module_init)
module_exit(fq_module_exit)
MODULE_AUTHOR("Eric Dumazet");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Fair Queue Packet Scheduler");
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 436160be9c18..6bf979f95509 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -181,7 +181,7 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets,
return idx;
}
-static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
@@ -721,3 +721,4 @@ module_init(fq_codel_module_init)
module_exit(fq_codel_module_exit)
MODULE_AUTHOR("Eric Dumazet");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Fair Queue CoDel discipline");
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index fb760cee824e..a27a250ab8f9 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -125,7 +125,7 @@ static inline void flow_queue_add(struct fq_pie_flow *flow,
skb->next = NULL;
}
-static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct fq_pie_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 265a61d011df..715cde1df9e4 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -520,7 +520,7 @@ EXPORT_SYMBOL(netif_carrier_off);
cheaper.
*/
-static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
+static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, spinlock_t *root_lock,
struct sk_buff **to_free)
{
__qdisc_drop(skb, to_free);
@@ -614,7 +614,7 @@ static inline struct skb_array *band2list(struct pfifo_fast_priv *priv,
return &priv->q[band];
}
-static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
+static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, spinlock_t *root_lock,
struct sk_buff **to_free)
{
int band = prio2band[skb->priority & TC_PRIO_MAX];
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 8599c6f31b05..7d67c6cd6605 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -161,7 +161,7 @@ static bool gred_per_vq_red_flags_used(struct gred_sched *table)
return false;
}
-static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct gred_sched_data *q = NULL;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 433f2190960f..7f6670044f0a 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1528,8 +1528,8 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
return -1;
}
-static int
-hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
+static int hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
+ struct sk_buff **to_free)
{
unsigned int len = qdisc_pkt_len(skb);
struct hfsc_class *cl;
@@ -1545,7 +1545,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
}
first = !cl->qdisc->q.qlen;
- err = qdisc_enqueue(skb, cl->qdisc, to_free);
+ err = qdisc_enqueue(skb, cl->qdisc, root_lock, to_free);
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
cl->qstats.drops++;
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index be35f03b657b..ddc6bf1d85d0 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -368,7 +368,7 @@ static unsigned int hhf_drop(struct Qdisc *sch, struct sk_buff **to_free)
return bucket - q->buckets;
}
-static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct hhf_sched_data *q = qdisc_priv(sch);
@@ -721,3 +721,4 @@ module_exit(hhf_module_exit)
MODULE_AUTHOR("Terry Lam");
MODULE_AUTHOR("Nandita Dukkipati");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Heavy-Hitter Filter (HHF)");
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 8184c87da8be..52fc513688b1 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -576,7 +576,7 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
cl->prio_activity = 0;
}
-static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
int uninitialized_var(ret);
@@ -599,7 +599,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
__qdisc_drop(skb, to_free);
return ret;
#endif
- } else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
+ } else if ((ret = qdisc_enqueue(skb, cl->leaf.q, root_lock,
to_free)) != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret)) {
qdisc_qstats_drop(sch);
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 1330ad224931..648611f5c105 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -57,7 +57,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
}
static int
-multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct Qdisc *qdisc;
@@ -74,7 +74,7 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
#endif
- ret = qdisc_enqueue(skb, qdisc, to_free);
+ ret = qdisc_enqueue(skb, qdisc, root_lock, to_free);
if (ret == NET_XMIT_SUCCESS) {
sch->q.qlen++;
return NET_XMIT_SUCCESS;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 84f82771cdf5..8fb17483a34f 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -431,7 +431,7 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch,
* NET_XMIT_DROP: queue length didn't change.
* NET_XMIT_SUCCESS: one skb was queued.
*/
-static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct netem_sched_data *q = qdisc_priv(sch);
@@ -480,7 +480,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
q->duplicate = 0;
- rootq->enqueue(skb2, rootq, to_free);
+ rootq->enqueue(skb2, rootq, root_lock, to_free);
q->duplicate = dupsave;
rc_drop = NET_XMIT_SUCCESS;
}
@@ -604,7 +604,7 @@ finish_segs:
skb_mark_not_on_list(segs);
qdisc_skb_cb(segs)->pkt_len = segs->len;
last_len = segs->len;
- rc = qdisc_enqueue(segs, sch, to_free);
+ rc = qdisc_enqueue(segs, sch, root_lock, to_free);
if (rc != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(rc))
qdisc_qstats_drop(sch);
@@ -720,7 +720,7 @@ deliver:
struct sk_buff *to_free = NULL;
int err;
- err = qdisc_enqueue(skb, q->qdisc, &to_free);
+ err = qdisc_enqueue(skb, q->qdisc, NULL, &to_free);
kfree_skb_list(to_free);
if (err != NET_XMIT_SUCCESS &&
net_xmit_drop_count(err)) {
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index c65077f0c0f3..b305313b64e3 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -82,7 +82,7 @@ bool pie_drop_early(struct Qdisc *sch, struct pie_params *params,
}
EXPORT_SYMBOL_GPL(pie_drop_early);
-static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct pie_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
index cbc2ebca4548..e5f8b4769b4d 100644
--- a/net/sched/sch_plug.c
+++ b/net/sched/sch_plug.c
@@ -84,7 +84,7 @@ struct plug_sched_data {
u32 pkts_to_release;
};
-static int plug_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int plug_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct plug_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 647941702f9f..a3e187f2603c 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -65,8 +65,8 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
return q->queues[band];
}
-static int
-prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
+static int prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
+ struct sk_buff **to_free)
{
unsigned int len = qdisc_pkt_len(skb);
struct Qdisc *qdisc;
@@ -83,7 +83,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
}
#endif
- ret = qdisc_enqueue(skb, qdisc, to_free);
+ ret = qdisc_enqueue(skb, qdisc, root_lock, to_free);
if (ret == NET_XMIT_SUCCESS) {
sch->qstats.backlog += len;
sch->q.qlen++;
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 0b05ac7c848e..ede854516825 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -1194,7 +1194,7 @@ static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q)
return agg;
}
-static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
unsigned int len = qdisc_pkt_len(skb), gso_segs;
@@ -1225,7 +1225,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
first = !cl->qdisc->q.qlen;
- err = qdisc_enqueue(skb, cl->qdisc, to_free);
+ err = qdisc_enqueue(skb, cl->qdisc, root_lock, to_free);
if (unlikely(err != NET_XMIT_SUCCESS)) {
pr_debug("qfq_enqueue: enqueue failed %d\n", err);
if (net_xmit_drop_count(err)) {
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 555a1b9e467f..de2be4d04ed6 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -46,6 +46,8 @@ struct red_sched_data {
struct red_vars vars;
struct red_stats stats;
struct Qdisc *qdisc;
+ struct tcf_qevent qe_early_drop;
+ struct tcf_qevent qe_mark;
};
#define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
@@ -65,7 +67,7 @@ static int red_use_nodrop(struct red_sched_data *q)
return q->flags & TC_RED_NODROP;
}
-static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct red_sched_data *q = qdisc_priv(sch);
@@ -92,6 +94,9 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (INET_ECN_set_ce(skb)) {
q->stats.prob_mark++;
+ skb = tcf_qevent_handle(&q->qe_mark, sch, skb, root_lock, to_free, &ret);
+ if (!skb)
+ return NET_XMIT_CN | ret;
} else if (!red_use_nodrop(q)) {
q->stats.prob_drop++;
goto congestion_drop;
@@ -109,6 +114,9 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (INET_ECN_set_ce(skb)) {
q->stats.forced_mark++;
+ skb = tcf_qevent_handle(&q->qe_mark, sch, skb, root_lock, to_free, &ret);
+ if (!skb)
+ return NET_XMIT_CN | ret;
} else if (!red_use_nodrop(q)) {
q->stats.forced_drop++;
goto congestion_drop;
@@ -118,7 +126,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
break;
}
- ret = qdisc_enqueue(skb, child, to_free);
+ ret = qdisc_enqueue(skb, child, root_lock, to_free);
if (likely(ret == NET_XMIT_SUCCESS)) {
qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
@@ -129,6 +137,10 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return ret;
congestion_drop:
+ skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, root_lock, to_free, &ret);
+ if (!skb)
+ return NET_XMIT_CN | ret;
+
qdisc_drop(skb, sch, to_free);
return NET_XMIT_CN;
}
@@ -202,6 +214,8 @@ static void red_destroy(struct Qdisc *sch)
{
struct red_sched_data *q = qdisc_priv(sch);
+ tcf_qevent_destroy(&q->qe_mark, sch);
+ tcf_qevent_destroy(&q->qe_early_drop, sch);
del_timer_sync(&q->adapt_timer);
red_offload(sch, false);
qdisc_put(q->qdisc);
@@ -213,14 +227,15 @@ static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
[TCA_RED_STAB] = { .len = RED_STAB_SIZE },
[TCA_RED_MAX_P] = { .type = NLA_U32 },
[TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
+ [TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 },
+ [TCA_RED_MARK_BLOCK] = { .type = NLA_U32 },
};
-static int red_change(struct Qdisc *sch, struct nlattr *opt,
- struct netlink_ext_ack *extack)
+static int __red_change(struct Qdisc *sch, struct nlattr **tb,
+ struct netlink_ext_ack *extack)
{
struct Qdisc *old_child = NULL, *child = NULL;
struct red_sched_data *q = qdisc_priv(sch);
- struct nlattr *tb[TCA_RED_MAX + 1];
struct nla_bitfield32 flags_bf;
struct tc_red_qopt *ctl;
unsigned char userbits;
@@ -228,14 +243,6 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
int err;
u32 max_P;
- if (opt == NULL)
- return -EINVAL;
-
- err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
- NULL);
- if (err < 0)
- return err;
-
if (tb[TCA_RED_PARMS] == NULL ||
tb[TCA_RED_STAB] == NULL)
return -EINVAL;
@@ -323,11 +330,74 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
struct red_sched_data *q = qdisc_priv(sch);
+ struct nlattr *tb[TCA_RED_MAX + 1];
+ int err;
+
+ if (!opt)
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
+ extack);
+ if (err < 0)
+ return err;
q->qdisc = &noop_qdisc;
q->sch = sch;
timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
- return red_change(sch, opt, extack);
+
+ err = __red_change(sch, tb, extack);
+ if (err)
+ return err;
+
+ err = tcf_qevent_init(&q->qe_early_drop, sch,
+ FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
+ tb[TCA_RED_EARLY_DROP_BLOCK], extack);
+ if (err)
+ goto err_early_drop_init;
+
+ err = tcf_qevent_init(&q->qe_mark, sch,
+ FLOW_BLOCK_BINDER_TYPE_RED_MARK,
+ tb[TCA_RED_MARK_BLOCK], extack);
+ if (err)
+ goto err_mark_init;
+
+ return 0;
+
+err_mark_init:
+ tcf_qevent_destroy(&q->qe_early_drop, sch);
+err_early_drop_init:
+ del_timer_sync(&q->adapt_timer);
+ red_offload(sch, false);
+ qdisc_put(q->qdisc);
+ return err;
+}
+
+static int red_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct red_sched_data *q = qdisc_priv(sch);
+ struct nlattr *tb[TCA_RED_MAX + 1];
+ int err;
+
+ if (!opt)
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
+ extack);
+ if (err < 0)
+ return err;
+
+ err = tcf_qevent_validate_change(&q->qe_early_drop,
+ tb[TCA_RED_EARLY_DROP_BLOCK], extack);
+ if (err)
+ return err;
+
+ err = tcf_qevent_validate_change(&q->qe_mark,
+ tb[TCA_RED_MARK_BLOCK], extack);
+ if (err)
+ return err;
+
+ return __red_change(sch, tb, extack);
}
static int red_dump_offload_stats(struct Qdisc *sch)
@@ -371,7 +441,9 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
nla_put_bitfield32(skb, TCA_RED_FLAGS,
- q->flags, TC_RED_SUPPORTED_FLAGS))
+ q->flags, TC_RED_SUPPORTED_FLAGS) ||
+ tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) ||
+ tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop))
goto nla_put_failure;
return nla_nest_end(skb, opts);
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 4074c50ac3d7..d2a6e78262bb 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -276,7 +276,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
return false;
}
-static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
@@ -399,7 +399,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
enqueue:
- ret = qdisc_enqueue(skb, child, to_free);
+ ret = qdisc_enqueue(skb, child, root_lock, to_free);
if (likely(ret == NET_XMIT_SUCCESS)) {
qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 5a6def5e4e6d..46cdefd69e44 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -343,7 +343,7 @@ static int sfq_headdrop(const struct sfq_sched_data *q)
}
static int
-sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
+sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free)
{
struct sfq_sched_data *q = qdisc_priv(sch);
unsigned int hash, dropped;
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
index 7a5e4c454715..f75f237c4436 100644
--- a/net/sched/sch_skbprio.c
+++ b/net/sched/sch_skbprio.c
@@ -65,7 +65,7 @@ static u16 calc_new_low_prio(const struct skbprio_sched_data *q)
return SKBPRIO_MAX_PRIORITY - 1;
}
-static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
const unsigned int max_priority = SKBPRIO_MAX_PRIORITY - 1;
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index b1eb12d33b9a..daef2ff60a98 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -410,7 +410,7 @@ done:
return txtime;
}
-static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct taprio_sched *q = qdisc_priv(sch);
@@ -435,7 +435,7 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
- return qdisc_enqueue(skb, child, to_free);
+ return qdisc_enqueue(skb, child, root_lock, to_free);
}
static struct sk_buff *taprio_peek_soft(struct Qdisc *sch)
@@ -1108,11 +1108,10 @@ static void setup_txtime(struct taprio_sched *q,
static struct tc_taprio_qopt_offload *taprio_offload_alloc(int num_entries)
{
- size_t size = sizeof(struct tc_taprio_sched_entry) * num_entries +
- sizeof(struct __tc_taprio_qopt_offload);
struct __tc_taprio_qopt_offload *__offload;
- __offload = kzalloc(size, GFP_KERNEL);
+ __offload = kzalloc(struct_size(__offload, offload.entries, num_entries),
+ GFP_KERNEL);
if (!__offload)
return NULL;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 78e79029dc63..c3eb5cdb83a8 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -187,7 +187,7 @@ static int tbf_offload_dump(struct Qdisc *sch)
/* GSO packet is too big, segment it so that tbf can transmit
* each segment in time
*/
-static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
+static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct tbf_sched_data *q = qdisc_priv(sch);
@@ -206,7 +206,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
skb_mark_not_on_list(segs);
qdisc_skb_cb(segs)->pkt_len = segs->len;
len += segs->len;
- ret = qdisc_enqueue(segs, q->qdisc, to_free);
+ ret = qdisc_enqueue(segs, q->qdisc, root_lock, to_free);
if (ret != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret))
qdisc_qstats_drop(sch);
@@ -221,7 +221,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
}
-static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct tbf_sched_data *q = qdisc_priv(sch);
@@ -231,10 +231,10 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (qdisc_pkt_len(skb) > q->max_size) {
if (skb_is_gso(skb) &&
skb_gso_validate_mac_len(skb, q->max_size))
- return tbf_segment(skb, sch, to_free);
+ return tbf_segment(skb, sch, root_lock, to_free);
return qdisc_drop(skb, sch, to_free);
}
- ret = qdisc_enqueue(skb, q->qdisc, to_free);
+ ret = qdisc_enqueue(skb, q->qdisc, root_lock, to_free);
if (ret != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret))
qdisc_qstats_drop(sch);
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 689ef6f3ded8..511964653476 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -72,8 +72,8 @@ struct teql_sched_data {
/* "teql*" qdisc routines */
-static int
-teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
+static int teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
+ struct sk_buff **to_free)
{
struct net_device *dev = qdisc_dev(sch);
struct teql_sched_data *q = qdisc_priv(sch);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 72315137d7e7..8d735461fa19 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1565,12 +1565,15 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len)
int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc,
enum sctp_scope scope, gfp_t gfp)
{
+ struct sock *sk = asoc->base.sk;
int flags;
/* Use scoping rules to determine the subset of addresses from
* the endpoint.
*/
- flags = (PF_INET6 == asoc->base.sk->sk_family) ? SCTP_ADDR6_ALLOWED : 0;
+ flags = (PF_INET6 == sk->sk_family) ? SCTP_ADDR6_ALLOWED : 0;
+ if (!inet_v6_ipv6only(sk))
+ flags |= SCTP_ADDR4_ALLOWED;
if (asoc->peer.ipv4_address)
flags |= SCTP_ADDR4_PEERSUPP;
if (asoc->peer.ipv6_address)
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 53bc61537f44..701c5a4e441d 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -461,6 +461,7 @@ static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest,
* well as the remote peer.
*/
if ((((AF_INET == addr->sa.sa_family) &&
+ (flags & SCTP_ADDR4_ALLOWED) &&
(flags & SCTP_ADDR4_PEERSUPP))) ||
(((AF_INET6 == addr->sa.sa_family) &&
(flags & SCTP_ADDR6_ALLOWED) &&
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 092d1afdee0d..cde29f3c7fb3 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -148,7 +148,8 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp,
* sock as well as the remote peer.
*/
if (addr->a.sa.sa_family == AF_INET &&
- !(copy_flags & SCTP_ADDR4_PEERSUPP))
+ (!(copy_flags & SCTP_ADDR4_ALLOWED) ||
+ !(copy_flags & SCTP_ADDR4_PEERSUPP)))
continue;
if (addr->a.sa.sa_family == AF_INET6 &&
(!(copy_flags & SCTP_ADDR6_ALLOWED) ||
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 383f87bc1061..940d176e0e87 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -250,8 +250,8 @@ static void tipc_bcast_select_xmit_method(struct net *net, int dests,
* Consumes the buffer chain.
* Returns 0 if success, otherwise errno: -EHOSTUNREACH,-EMSGSIZE
*/
-static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
- u16 *cong_link_cnt)
+int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
+ u16 *cong_link_cnt)
{
struct tipc_link *l = tipc_bc_sndlink(net);
struct sk_buff_head xmitq;
@@ -752,7 +752,7 @@ void tipc_nlist_purge(struct tipc_nlist *nl)
nl->local = false;
}
-u32 tipc_bcast_get_broadcast_mode(struct net *net)
+u32 tipc_bcast_get_mode(struct net *net)
{
struct tipc_bc_base *bb = tipc_bc_base(net);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 4240c95188b1..2d9352dc7b0e 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -90,6 +90,8 @@ void tipc_bcast_toggle_rcast(struct net *net, bool supp);
int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts,
struct tipc_mc_method *method, struct tipc_nlist *dests,
u16 *cong_link_cnt);
+int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
+ u16 *cong_link_cnt);
int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb);
void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l,
struct tipc_msg *hdr);
@@ -101,7 +103,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg,
int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]);
int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l);
-u32 tipc_bcast_get_broadcast_mode(struct net *net);
+u32 tipc_bcast_get_mode(struct net *net);
u32 tipc_bcast_get_broadcast_ratio(struct net *net);
void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq,
diff --git a/net/tipc/link.c b/net/tipc/link.c
index ee3b8d0576b8..1c579357ccdf 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1385,12 +1385,12 @@ u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l,
p = (struct tipc_gap_ack_blks *)msg_data(hdr);
sz = ntohs(p->len);
/* Sanity check */
- if (sz == tipc_gap_ack_blks_sz(p->ugack_cnt + p->bgack_cnt)) {
+ if (sz == struct_size(p, gacks, p->ugack_cnt + p->bgack_cnt)) {
/* Good, check if the desired type exists */
if ((uc && p->ugack_cnt) || (!uc && p->bgack_cnt))
goto ok;
/* Backward compatible: peer might not support bc, but uc? */
- } else if (uc && sz == tipc_gap_ack_blks_sz(p->ugack_cnt)) {
+ } else if (uc && sz == struct_size(p, gacks, p->ugack_cnt)) {
if (p->ugack_cnt) {
p->bgack_cnt = 0;
goto ok;
@@ -1472,7 +1472,7 @@ static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr)
__tipc_build_gap_ack_blks(ga, l, ga->bgack_cnt) : 0;
/* Total len */
- len = tipc_gap_ack_blks_sz(ga->bgack_cnt + ga->ugack_cnt);
+ len = struct_size(ga, gacks, ga->bgack_cnt + ga->ugack_cnt);
ga->len = htons(len);
return len;
}
@@ -1521,7 +1521,7 @@ static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r,
gacks = &ga->gacks[ga->bgack_cnt];
} else if (ga) {
/* Copy the Gap ACKs, bc part, for later renewal if needed */
- this_ga = kmemdup(ga, tipc_gap_ack_blks_sz(ga->bgack_cnt),
+ this_ga = kmemdup(ga, struct_size(ga, gacks, ga->bgack_cnt),
GFP_ATOMIC);
if (likely(this_ga)) {
this_ga->start_index = 0;
@@ -2745,7 +2745,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg,
void *hdr;
struct nlattr *attrs;
struct nlattr *prop;
- u32 bc_mode = tipc_bcast_get_broadcast_mode(net);
+ u32 bc_mode = tipc_bcast_get_mode(net);
u32 bc_ratio = tipc_bcast_get_broadcast_ratio(net);
if (!bcl)
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 58660d56bc83..1016e96db5c4 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -189,11 +189,9 @@ struct tipc_gap_ack_blks {
struct tipc_gap_ack gacks[];
};
-#define tipc_gap_ack_blks_sz(n) (sizeof(struct tipc_gap_ack_blks) + \
- sizeof(struct tipc_gap_ack) * (n))
-
#define MAX_GAP_ACK_BLKS 128
-#define MAX_GAP_ACK_BLKS_SZ tipc_gap_ack_blks_sz(MAX_GAP_ACK_BLKS)
+#define MAX_GAP_ACK_BLKS_SZ (sizeof(struct tipc_gap_ack_blks) + \
+ sizeof(struct tipc_gap_ack) * MAX_GAP_ACK_BLKS)
static inline struct tipc_msg *buf_msg(struct sk_buff *skb)
{
@@ -438,6 +436,36 @@ static inline void msg_set_errcode(struct tipc_msg *m, u32 err)
msg_set_bits(m, 1, 25, 0xf, err);
}
+static inline void msg_set_bulk(struct tipc_msg *m)
+{
+ msg_set_bits(m, 1, 28, 0x1, 1);
+}
+
+static inline u32 msg_is_bulk(struct tipc_msg *m)
+{
+ return msg_bits(m, 1, 28, 0x1);
+}
+
+static inline void msg_set_last_bulk(struct tipc_msg *m)
+{
+ msg_set_bits(m, 1, 27, 0x1, 1);
+}
+
+static inline u32 msg_is_last_bulk(struct tipc_msg *m)
+{
+ return msg_bits(m, 1, 27, 0x1);
+}
+
+static inline void msg_set_non_legacy(struct tipc_msg *m)
+{
+ msg_set_bits(m, 1, 26, 0x1, 1);
+}
+
+static inline u32 msg_is_legacy(struct tipc_msg *m)
+{
+ return !msg_bits(m, 1, 26, 0x1);
+}
+
static inline u32 msg_reroute_cnt(struct tipc_msg *m)
{
return msg_bits(m, 1, 21, 0xf);
@@ -567,6 +595,16 @@ static inline void msg_set_origport(struct tipc_msg *m, u32 p)
msg_set_word(m, 4, p);
}
+static inline u16 msg_named_seqno(struct tipc_msg *m)
+{
+ return msg_bits(m, 4, 0, 0xffff);
+}
+
+static inline void msg_set_named_seqno(struct tipc_msg *m, u16 n)
+{
+ msg_set_bits(m, 4, 0, 0xffff, n);
+}
+
static inline u32 msg_destport(struct tipc_msg *m)
{
return msg_word(m, 5);
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 5feaf3b67380..2f9c148f17e2 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -102,7 +102,8 @@ struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ)
pr_warn("Publication distribution failure\n");
return NULL;
}
-
+ msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++);
+ msg_set_non_legacy(buf_msg(skb));
item = (struct distr_item *)msg_data(buf_msg(skb));
publ_to_item(item, publ);
return skb;
@@ -114,8 +115,8 @@ struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ)
struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
{
struct name_table *nt = tipc_name_table(net);
- struct sk_buff *buf;
struct distr_item *item;
+ struct sk_buff *skb;
write_lock_bh(&nt->cluster_scope_lock);
list_del(&publ->binding_node);
@@ -123,15 +124,16 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
if (publ->scope == TIPC_NODE_SCOPE)
return NULL;
- buf = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0);
- if (!buf) {
+ skb = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0);
+ if (!skb) {
pr_warn("Withdrawal distribution failure\n");
return NULL;
}
-
- item = (struct distr_item *)msg_data(buf_msg(buf));
+ msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++);
+ msg_set_non_legacy(buf_msg(skb));
+ item = (struct distr_item *)msg_data(buf_msg(skb));
publ_to_item(item, publ);
- return buf;
+ return skb;
}
/**
@@ -141,7 +143,7 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
* @pls: linked list of publication items to be packed into buffer chain
*/
static void named_distribute(struct net *net, struct sk_buff_head *list,
- u32 dnode, struct list_head *pls)
+ u32 dnode, struct list_head *pls, u16 seqno)
{
struct publication *publ;
struct sk_buff *skb = NULL;
@@ -149,6 +151,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0, false) - INT_H_SIZE) /
ITEM_SIZE) * ITEM_SIZE;
u32 msg_rem = msg_dsz;
+ struct tipc_msg *hdr;
list_for_each_entry(publ, pls, binding_node) {
/* Prepare next buffer: */
@@ -159,8 +162,11 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
pr_warn("Bulk publication failure\n");
return;
}
- msg_set_bc_ack_invalid(buf_msg(skb), true);
- item = (struct distr_item *)msg_data(buf_msg(skb));
+ hdr = buf_msg(skb);
+ msg_set_bc_ack_invalid(hdr, true);
+ msg_set_bulk(hdr);
+ msg_set_non_legacy(hdr);
+ item = (struct distr_item *)msg_data(hdr);
}
/* Pack publication into message: */
@@ -176,24 +182,35 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
}
}
if (skb) {
- msg_set_size(buf_msg(skb), INT_H_SIZE + (msg_dsz - msg_rem));
+ hdr = buf_msg(skb);
+ msg_set_size(hdr, INT_H_SIZE + (msg_dsz - msg_rem));
skb_trim(skb, INT_H_SIZE + (msg_dsz - msg_rem));
__skb_queue_tail(list, skb);
}
+ hdr = buf_msg(skb_peek_tail(list));
+ msg_set_last_bulk(hdr);
+ msg_set_named_seqno(hdr, seqno);
}
/**
* tipc_named_node_up - tell specified node about all publications by this node
*/
-void tipc_named_node_up(struct net *net, u32 dnode)
+void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities)
{
struct name_table *nt = tipc_name_table(net);
+ struct tipc_net *tn = tipc_net(net);
struct sk_buff_head head;
+ u16 seqno;
__skb_queue_head_init(&head);
+ spin_lock_bh(&tn->nametbl_lock);
+ if (!(capabilities & TIPC_NAMED_BCAST))
+ nt->rc_dests++;
+ seqno = nt->snd_nxt;
+ spin_unlock_bh(&tn->nametbl_lock);
read_lock_bh(&nt->cluster_scope_lock);
- named_distribute(net, &head, dnode, &nt->cluster_scope);
+ named_distribute(net, &head, dnode, &nt->cluster_scope, seqno);
tipc_node_xmit(net, &head, dnode, 0);
read_unlock_bh(&nt->cluster_scope_lock);
}
@@ -245,13 +262,21 @@ static void tipc_dist_queue_purge(struct net *net, u32 addr)
spin_unlock_bh(&tn->nametbl_lock);
}
-void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr)
+void tipc_publ_notify(struct net *net, struct list_head *nsub_list,
+ u32 addr, u16 capabilities)
{
+ struct name_table *nt = tipc_name_table(net);
+ struct tipc_net *tn = tipc_net(net);
+
struct publication *publ, *tmp;
list_for_each_entry_safe(publ, tmp, nsub_list, binding_node)
tipc_publ_purge(net, publ, addr);
tipc_dist_queue_purge(net, addr);
+ spin_lock_bh(&tn->nametbl_lock);
+ if (!(capabilities & TIPC_NAMED_BCAST))
+ nt->rc_dests--;
+ spin_unlock_bh(&tn->nametbl_lock);
}
/**
@@ -295,29 +320,62 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
return false;
}
+static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq,
+ u16 *rcv_nxt, bool *open)
+{
+ struct sk_buff *skb, *tmp;
+ struct tipc_msg *hdr;
+ u16 seqno;
+
+ skb_queue_walk_safe(namedq, skb, tmp) {
+ skb_linearize(skb);
+ hdr = buf_msg(skb);
+ seqno = msg_named_seqno(hdr);
+ if (msg_is_last_bulk(hdr)) {
+ *rcv_nxt = seqno;
+ *open = true;
+ }
+
+ if (msg_is_bulk(hdr) || msg_is_legacy(hdr)) {
+ __skb_unlink(skb, namedq);
+ return skb;
+ }
+
+ if (*open && (*rcv_nxt == seqno)) {
+ (*rcv_nxt)++;
+ __skb_unlink(skb, namedq);
+ return skb;
+ }
+
+ if (less(seqno, *rcv_nxt)) {
+ __skb_unlink(skb, namedq);
+ kfree_skb(skb);
+ continue;
+ }
+ }
+ return NULL;
+}
+
/**
* tipc_named_rcv - process name table update messages sent by another node
*/
-void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq)
+void tipc_named_rcv(struct net *net, struct sk_buff_head *namedq,
+ u16 *rcv_nxt, bool *open)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_msg *msg;
+ struct tipc_net *tn = tipc_net(net);
struct distr_item *item;
- uint count;
- u32 node;
+ struct tipc_msg *hdr;
struct sk_buff *skb;
- int mtype;
+ u32 count, node;
spin_lock_bh(&tn->nametbl_lock);
- for (skb = skb_dequeue(inputq); skb; skb = skb_dequeue(inputq)) {
- skb_linearize(skb);
- msg = buf_msg(skb);
- mtype = msg_type(msg);
- item = (struct distr_item *)msg_data(msg);
- count = msg_data_sz(msg) / ITEM_SIZE;
- node = msg_orignode(msg);
+ while ((skb = tipc_named_dequeue(namedq, rcv_nxt, open))) {
+ hdr = buf_msg(skb);
+ node = msg_orignode(hdr);
+ item = (struct distr_item *)msg_data(hdr);
+ count = msg_data_sz(hdr) / ITEM_SIZE;
while (count--) {
- tipc_update_nametbl(net, item, node, mtype);
+ tipc_update_nametbl(net, item, node, msg_type(hdr));
item++;
}
kfree_skb(skb);
@@ -345,6 +403,6 @@ void tipc_named_reinit(struct net *net)
publ->node = self;
list_for_each_entry_rcu(publ, &nt->cluster_scope, binding_node)
publ->node = self;
-
+ nt->rc_dests = 0;
spin_unlock_bh(&tn->nametbl_lock);
}
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index 63fc73e0fa6c..092323158f06 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h
@@ -67,11 +67,14 @@ struct distr_item {
__be32 key;
};
+void tipc_named_bcast(struct net *net, struct sk_buff *skb);
struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ);
struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ);
-void tipc_named_node_up(struct net *net, u32 dnode);
-void tipc_named_rcv(struct net *net, struct sk_buff_head *msg_queue);
+void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities);
+void tipc_named_rcv(struct net *net, struct sk_buff_head *namedq,
+ u16 *rcv_nxt, bool *open);
void tipc_named_reinit(struct net *net);
-void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr);
+void tipc_publ_notify(struct net *net, struct list_head *nsub_list,
+ u32 addr, u16 capabilities);
#endif
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 359b2bc888cf..2ac33d32edc2 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -729,6 +729,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
struct tipc_net *tn = tipc_net(net);
struct publication *p = NULL;
struct sk_buff *skb = NULL;
+ u32 rc_dests;
spin_lock_bh(&tn->nametbl_lock);
@@ -743,12 +744,14 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
nt->local_publ_count++;
skb = tipc_named_publish(net, p);
}
+ rc_dests = nt->rc_dests;
exit:
spin_unlock_bh(&tn->nametbl_lock);
if (skb)
- tipc_node_broadcast(net, skb);
+ tipc_node_broadcast(net, skb, rc_dests);
return p;
+
}
/**
@@ -762,6 +765,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower,
u32 self = tipc_own_addr(net);
struct sk_buff *skb = NULL;
struct publication *p;
+ u32 rc_dests;
spin_lock_bh(&tn->nametbl_lock);
@@ -775,10 +779,11 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower,
pr_err("Failed to remove local publication {%u,%u,%u}/%u\n",
type, lower, upper, key);
}
+ rc_dests = nt->rc_dests;
spin_unlock_bh(&tn->nametbl_lock);
if (skb) {
- tipc_node_broadcast(net, skb);
+ tipc_node_broadcast(net, skb, rc_dests);
return 1;
}
return 0;
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 728bc7016c38..8064e1986e2c 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -106,6 +106,8 @@ struct name_table {
struct list_head cluster_scope;
rwlock_t cluster_scope_lock;
u32 local_publ_count;
+ u32 rc_dests;
+ u32 snd_nxt;
};
int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index a4c2816c3746..030a51c4d1fa 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -75,6 +75,8 @@ struct tipc_bclink_entry {
struct sk_buff_head arrvq;
struct sk_buff_head inputq2;
struct sk_buff_head namedq;
+ u16 named_rcv_nxt;
+ bool named_open;
};
/**
@@ -396,10 +398,10 @@ static void tipc_node_write_unlock(struct tipc_node *n)
write_unlock_bh(&n->lock);
if (flags & TIPC_NOTIFY_NODE_DOWN)
- tipc_publ_notify(net, publ_list, addr);
+ tipc_publ_notify(net, publ_list, addr, n->capabilities);
if (flags & TIPC_NOTIFY_NODE_UP)
- tipc_named_node_up(net, addr);
+ tipc_named_node_up(net, addr, n->capabilities);
if (flags & TIPC_NOTIFY_LINK_UP) {
tipc_mon_peer_up(net, addr, bearer_id);
@@ -1483,6 +1485,7 @@ static void node_lost_contact(struct tipc_node *n,
/* Clean up broadcast state */
tipc_bcast_remove_peer(n->net, n->bc_entry.link);
+ __skb_queue_purge(&n->bc_entry.namedq);
/* Abort any ongoing link failover */
for (i = 0; i < MAX_BEARERS; i++) {
@@ -1729,12 +1732,23 @@ int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *xmitq)
return 0;
}
-void tipc_node_broadcast(struct net *net, struct sk_buff *skb)
+void tipc_node_broadcast(struct net *net, struct sk_buff *skb, int rc_dests)
{
+ struct sk_buff_head xmitq;
struct sk_buff *txskb;
struct tipc_node *n;
+ u16 dummy;
u32 dst;
+ /* Use broadcast if all nodes support it */
+ if (!rc_dests && tipc_bcast_get_mode(net) != BCLINK_MODE_RCAST) {
+ __skb_queue_head_init(&xmitq);
+ __skb_queue_tail(&xmitq, skb);
+ tipc_bcast_xmit(net, &xmitq, &dummy);
+ return;
+ }
+
+ /* Otherwise use legacy replicast method */
rcu_read_lock();
list_for_each_entry_rcu(n, tipc_nodes(net), list) {
dst = n->addr;
@@ -1749,7 +1763,6 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb)
tipc_node_xmit_skb(net, txskb, dst, 0);
}
rcu_read_unlock();
-
kfree_skb(skb);
}
@@ -1844,7 +1857,9 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id
/* Handle NAME_DISTRIBUTOR messages sent from 1.7 nodes */
if (!skb_queue_empty(&n->bc_entry.namedq))
- tipc_named_rcv(net, &n->bc_entry.namedq);
+ tipc_named_rcv(net, &n->bc_entry.namedq,
+ &n->bc_entry.named_rcv_nxt,
+ &n->bc_entry.named_open);
/* If reassembly or retransmission failure => reset all links to peer */
if (rc & TIPC_LINK_DOWN_EVT)
@@ -2114,7 +2129,9 @@ rcv:
tipc_node_link_down(n, bearer_id, false);
if (unlikely(!skb_queue_empty(&n->bc_entry.namedq)))
- tipc_named_rcv(net, &n->bc_entry.namedq);
+ tipc_named_rcv(net, &n->bc_entry.namedq,
+ &n->bc_entry.named_rcv_nxt,
+ &n->bc_entry.named_open);
if (unlikely(!skb_queue_empty(&n->bc_entry.inputq1)))
tipc_node_mcast_rcv(n);
diff --git a/net/tipc/node.h b/net/tipc/node.h
index a6803b449a2c..9f6f13f1604f 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -55,7 +55,8 @@ enum {
TIPC_MCAST_RBCTL = (1 << 7),
TIPC_GAP_ACK_BLOCK = (1 << 8),
TIPC_TUNNEL_ENHANCED = (1 << 9),
- TIPC_NAGLE = (1 << 10)
+ TIPC_NAGLE = (1 << 10),
+ TIPC_NAMED_BCAST = (1 << 11)
};
#define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT | \
@@ -68,7 +69,8 @@ enum {
TIPC_MCAST_RBCTL | \
TIPC_GAP_ACK_BLOCK | \
TIPC_TUNNEL_ENHANCED | \
- TIPC_NAGLE)
+ TIPC_NAGLE | \
+ TIPC_NAMED_BCAST)
#define INVALID_BEARER_ID -1
@@ -101,7 +103,7 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest,
u32 selector);
void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr);
void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr);
-void tipc_node_broadcast(struct net *net, struct sk_buff *skb);
+void tipc_node_broadcast(struct net *net, struct sk_buff *skb, int rc_dests);
int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port);
void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port);
int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel, bool connected);
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 0e55f8365ce2..18fa6067bb7f 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -690,15 +690,55 @@ static void tls_device_resync_rx(struct tls_context *tls_ctx,
TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICERESYNC);
}
+static bool
+tls_device_rx_resync_async(struct tls_offload_resync_async *resync_async,
+ s64 resync_req, u32 *seq)
+{
+ u32 is_async = resync_req & RESYNC_REQ_ASYNC;
+ u32 req_seq = resync_req >> 32;
+ u32 req_end = req_seq + ((resync_req >> 16) & 0xffff);
+
+ if (is_async) {
+ /* asynchronous stage: log all headers seq such that
+ * req_seq <= seq <= end_seq, and wait for real resync request
+ */
+ if (between(*seq, req_seq, req_end) &&
+ resync_async->loglen < TLS_DEVICE_RESYNC_ASYNC_LOGMAX)
+ resync_async->log[resync_async->loglen++] = *seq;
+
+ return false;
+ }
+
+ /* synchronous stage: check against the logged entries and
+ * proceed to check the next entries if no match was found
+ */
+ while (resync_async->loglen) {
+ if (req_seq == resync_async->log[resync_async->loglen - 1] &&
+ atomic64_try_cmpxchg(&resync_async->req,
+ &resync_req, 0)) {
+ resync_async->loglen = 0;
+ *seq = req_seq;
+ return true;
+ }
+ resync_async->loglen--;
+ }
+
+ if (req_seq == *seq &&
+ atomic64_try_cmpxchg(&resync_async->req,
+ &resync_req, 0))
+ return true;
+
+ return false;
+}
+
void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_offload_context_rx *rx_ctx;
- bool is_req_pending, is_force_resync;
u8 rcd_sn[TLS_MAX_REC_SEQ_SIZE];
+ u32 sock_data, is_req_pending;
struct tls_prot_info *prot;
s64 resync_req;
- u32 sock_data;
u32 req_seq;
if (tls_ctx->rx_conf != TLS_HW)
@@ -713,11 +753,9 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
resync_req = atomic64_read(&rx_ctx->resync_req);
req_seq = resync_req >> 32;
seq += TLS_HEADER_SIZE - 1;
- is_req_pending = resync_req & RESYNC_REQ;
- is_force_resync = resync_req & RESYNC_REQ_FORCE;
+ is_req_pending = resync_req;
- if (likely(!is_req_pending) ||
- (!is_force_resync && req_seq != seq) ||
+ if (likely(!is_req_pending) || req_seq != seq ||
!atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0))
return;
break;
@@ -739,6 +777,16 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
seq += rcd_len;
tls_bigint_increment(rcd_sn, prot->rec_seq_size);
break;
+ case TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ_ASYNC:
+ resync_req = atomic64_read(&rx_ctx->resync_async->req);
+ is_req_pending = resync_req;
+ if (likely(!is_req_pending))
+ return;
+
+ if (!tls_device_rx_resync_async(rx_ctx->resync_async,
+ resync_req, &seq))
+ return;
+ break;
}
tls_device_resync_rx(tls_ctx, sk, seq, rcd_sn);
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index b5d4a1ef04b9..5b9a5ab48111 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -67,6 +67,30 @@ config XFRM_STATISTICS
If unsure, say N.
+# This option selects XFRM_ALGO along with the AH authentication algorithms that
+# RFC 8221 lists as MUST be implemented.
+config XFRM_AH
+ tristate
+ select XFRM_ALGO
+ select CRYPTO
+ select CRYPTO_HMAC
+ select CRYPTO_SHA256
+
+# This option selects XFRM_ALGO along with the ESP encryption and authentication
+# algorithms that RFC 8221 lists as MUST be implemented.
+config XFRM_ESP
+ tristate
+ select XFRM_ALGO
+ select CRYPTO
+ select CRYPTO_AES
+ select CRYPTO_AUTHENC
+ select CRYPTO_CBC
+ select CRYPTO_ECHAINIV
+ select CRYPTO_GCM
+ select CRYPTO_HMAC
+ select CRYPTO_SEQIV
+ select CRYPTO_SHA256
+
config XFRM_IPCOMP
tristate
select XFRM_ALGO
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index f50d1f97cf8e..edf11893dbe8 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -106,9 +106,10 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
struct sk_buff *skb2, *nskb, *pskb = NULL;
netdev_features_t esp_features = features;
struct xfrm_offload *xo = xfrm_offload(skb);
+ struct net_device *dev = skb->dev;
struct sec_path *sp;
- if (!xo)
+ if (!xo || (xo->flags & XFRM_XMIT))
return skb;
if (!(features & NETIF_F_HW_ESP))
@@ -119,6 +120,10 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND)
return skb;
+ /* This skb was already validated on the upper/virtual dev */
+ if ((x->xso.dev != dev) && (x->xso.real_dev == dev))
+ return skb;
+
local_irq_save(flags);
sd = this_cpu_ptr(&softnet_data);
err = !skb_queue_empty(&sd->xfrm_backlog);
@@ -129,25 +134,22 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
return skb;
}
- if (skb_is_gso(skb)) {
- struct net_device *dev = skb->dev;
+ xo->flags |= XFRM_XMIT;
- if (unlikely(x->xso.dev != dev)) {
- struct sk_buff *segs;
+ if (skb_is_gso(skb) && unlikely(x->xso.dev != dev)) {
+ struct sk_buff *segs;
- /* Packet got rerouted, fixup features and segment it. */
- esp_features = esp_features & ~(NETIF_F_HW_ESP
- | NETIF_F_GSO_ESP);
+ /* Packet got rerouted, fixup features and segment it. */
+ esp_features = esp_features & ~(NETIF_F_HW_ESP | NETIF_F_GSO_ESP);
- segs = skb_gso_segment(skb, esp_features);
- if (IS_ERR(segs)) {
- kfree_skb(skb);
- atomic_long_inc(&dev->tx_dropped);
- return NULL;
- } else {
- consume_skb(skb);
- skb = segs;
- }
+ segs = skb_gso_segment(skb, esp_features);
+ if (IS_ERR(segs)) {
+ kfree_skb(skb);
+ atomic_long_inc(&dev->tx_dropped);
+ return NULL;
+ } else {
+ consume_skb(skb);
+ skb = segs;
}
}
@@ -259,6 +261,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
}
xso->dev = dev;
+ xso->real_dev = dev;
xso->num_exthdrs = 1;
xso->flags = xuo->flags;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index e4c23f69f69f..a7ab19353313 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -574,16 +574,12 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
switch (x->outer_mode.family) {
case AF_INET:
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
-#ifdef CONFIG_NETFILTER
IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
-#endif
break;
case AF_INET6:
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
-#ifdef CONFIG_NETFILTER
IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
-#endif
break;
}