summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/client.c2
-rw-r--r--net/9p/trans_fd.c39
-rw-r--r--net/9p/trans_rdma.c7
-rw-r--r--net/Kconfig1
-rw-r--r--net/appletalk/atalk_proc.c2
-rw-r--r--net/appletalk/ddp.c2
-rw-r--r--net/atm/common.c20
-rw-r--r--net/atm/common.h2
-rw-r--r--net/atm/lec_arpc.h2
-rw-r--r--net/atm/pvc.c2
-rw-r--r--net/atm/svc.c6
-rw-r--r--net/ax25/Kconfig10
-rw-r--r--net/ax25/af_ax25.c6
-rw-r--r--net/batman-adv/bat_iv_ogm.c25
-rw-r--r--net/batman-adv/bat_v_elp.c10
-rw-r--r--net/batman-adv/bat_v_ogm.c27
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c6
-rw-r--r--net/batman-adv/distributed-arp-table.c2
-rw-r--r--net/batman-adv/fragmentation.c6
-rw-r--r--net/batman-adv/hard-interface.c16
-rw-r--r--net/batman-adv/log.h6
-rw-r--r--net/batman-adv/main.c2
-rw-r--r--net/batman-adv/main.h8
-rw-r--r--net/batman-adv/multicast.c21
-rw-r--r--net/batman-adv/netlink.c14
-rw-r--r--net/batman-adv/network-coding.c14
-rw-r--r--net/batman-adv/originator.c8
-rw-r--r--net/batman-adv/routing.c4
-rw-r--r--net/batman-adv/send.c4
-rw-r--r--net/batman-adv/soft-interface.c2
-rw-r--r--net/batman-adv/tp_meter.c12
-rw-r--r--net/batman-adv/translation-table.c10
-rw-r--r--net/batman-adv/tvlv.c4
-rw-r--r--net/batman-adv/types.h18
-rw-r--r--net/bluetooth/6lowpan.c5
-rw-r--r--net/bluetooth/Kconfig2
-rw-r--r--net/bluetooth/Makefile2
-rw-r--r--net/bluetooth/af_bluetooth.c5
-rw-r--r--net/bluetooth/bnep/sock.c2
-rw-r--r--net/bluetooth/cmtp/sock.c2
-rw-r--r--net/bluetooth/hci_conn.c51
-rw-r--r--net/bluetooth/hci_core.c212
-rw-r--r--net/bluetooth/hci_event.c97
-rw-r--r--net/bluetooth/hci_request.c286
-rw-r--r--net/bluetooth/hci_request.h5
-rw-r--r--net/bluetooth/hci_sock.c15
-rw-r--r--net/bluetooth/hidp/sock.c2
-rw-r--r--net/bluetooth/l2cap_core.c25
-rw-r--r--net/bluetooth/l2cap_sock.c26
-rw-r--r--net/bluetooth/mgmt.c577
-rw-r--r--net/bluetooth/mgmt_config.c283
-rw-r--r--net/bluetooth/mgmt_config.h17
-rw-r--r--net/bluetooth/msft.c7
-rw-r--r--net/bluetooth/msft.h9
-rw-r--r--net/bluetooth/rfcomm/core.c2
-rw-r--r--net/bluetooth/rfcomm/sock.c14
-rw-r--r--net/bluetooth/sco.c38
-rw-r--r--net/bluetooth/selftest.c2
-rw-r--r--net/bluetooth/smp.c8
-rw-r--r--net/bpf/test_run.c43
-rw-r--r--net/bpfilter/Kconfig10
-rw-r--r--net/bpfilter/Makefile2
-rw-r--r--net/bpfilter/bpfilter_kern.c98
-rw-r--r--net/bpfilter/bpfilter_umh_blob.S2
-rw-r--r--net/bridge/br_device.c2
-rw-r--r--net/bridge/br_fdb.c127
-rw-r--r--net/bridge/br_mrp.c576
-rw-r--r--net/bridge/br_mrp_netlink.c246
-rw-r--r--net/bridge/br_mrp_switchdev.c62
-rw-r--r--net/bridge/br_netlink.c28
-rw-r--r--net/bridge/br_netlink_tunnel.c49
-rw-r--r--net/bridge/br_private.h17
-rw-r--r--net/bridge/br_private_mrp.h27
-rw-r--r--net/bridge/netfilter/ebtables.c252
-rw-r--r--net/caif/caif_socket.c10
-rw-r--r--net/can/af_can.c2
-rw-r--r--net/can/bcm.c2
-rw-r--r--net/can/j1939/socket.c12
-rw-r--r--net/can/raw.c16
-rw-r--r--net/compat.c122
-rw-r--r--net/core/bpf_sk_storage.c260
-rw-r--r--net/core/dev.c583
-rw-r--r--net/core/dev_ioctl.c29
-rw-r--r--net/core/devlink.c653
-rw-r--r--net/core/fib_rules.c31
-rw-r--r--net/core/filter.c386
-rw-r--r--net/core/flow_dissector.c17
-rw-r--r--net/core/flow_offload.c12
-rw-r--r--net/core/neighbour.c1
-rw-r--r--net/core/rtnetlink.c118
-rw-r--r--net/core/skbuff.c7
-rw-r--r--net/core/sock.c119
-rw-r--r--net/core/sock_map.c88
-rw-r--r--net/core/tso.c44
-rw-r--r--net/core/xdp.c9
-rw-r--r--net/dcb/dcbnl.c2
-rw-r--r--net/dccp/Kconfig2
-rw-r--r--net/dccp/ccids/Kconfig4
-rw-r--r--net/dccp/ccids/ccid3.c2
-rw-r--r--net/dccp/ccids/ccid3.h2
-rw-r--r--net/dccp/ccids/lib/packet_history.c4
-rw-r--r--net/dccp/ccids/lib/packet_history.h2
-rw-r--r--net/dccp/dccp.h8
-rw-r--r--net/dccp/feat.c6
-rw-r--r--net/dccp/input.c1
-rw-r--r--net/dccp/ipv4.c14
-rw-r--r--net/dccp/ipv6.c14
-rw-r--r--net/dccp/options.c4
-rw-r--r--net/dccp/proto.c54
-rw-r--r--net/dccp/timer.c2
-rw-r--r--net/decnet/af_decnet.c26
-rw-r--r--net/decnet/dn_dev.c8
-rw-r--r--net/decnet/dn_route.c4
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c2
-rw-r--r--net/devres.c4
-rw-r--r--net/dsa/Kconfig7
-rw-r--r--net/dsa/Makefile1
-rw-r--r--net/dsa/dsa2.c25
-rw-r--r--net/dsa/dsa_priv.h2
-rw-r--r--net/dsa/master.c62
-rw-r--r--net/dsa/slave.c6
-rw-r--r--net/dsa/tag_ksz.c9
-rw-r--r--net/dsa/tag_lan9303.c17
-rw-r--r--net/dsa/tag_mtk.c3
-rw-r--r--net/dsa/tag_ocelot.c21
-rw-r--r--net/dsa/tag_qca.c8
-rw-r--r--net/dsa/tag_rtl4_a.c130
-rw-r--r--net/ethtool/Makefile3
-rw-r--r--net/ethtool/cabletest.c18
-rw-r--r--net/ethtool/common.c35
-rw-r--r--net/ethtool/common.h3
-rw-r--r--net/ethtool/ioctl.c27
-rw-r--r--net/ethtool/linkmodes.c18
-rw-r--r--net/ethtool/linkstate.c52
-rw-r--r--net/ethtool/netlink.c12
-rw-r--r--net/ethtool/netlink.h4
-rw-r--r--net/ethtool/strset.c16
-rw-r--r--net/ethtool/tunnels.c312
-rw-r--r--net/hsr/Kconfig35
-rw-r--r--net/hsr/hsr_debugfs.c41
-rw-r--r--net/hsr/hsr_device.c183
-rw-r--r--net/hsr/hsr_device.h2
-rw-r--r--net/hsr/hsr_forward.c319
-rw-r--r--net/hsr/hsr_forward.h16
-rw-r--r--net/hsr/hsr_framereg.c95
-rw-r--r--net/hsr/hsr_framereg.h31
-rw-r--r--net/hsr/hsr_main.c2
-rw-r--r--net/hsr/hsr_main.h120
-rw-r--r--net/hsr/hsr_netlink.c38
-rw-r--r--net/hsr/hsr_netlink.h2
-rw-r--r--net/hsr/hsr_slave.c26
-rw-r--r--net/hsr/hsr_slave.h4
-rw-r--r--net/ieee802154/socket.c14
-rw-r--r--net/ipv4/Kconfig8
-rw-r--r--net/ipv4/Makefile3
-rw-r--r--net/ipv4/af_inet.c15
-rw-r--r--net/ipv4/bpfilter/sockopt.c36
-rw-r--r--net/ipv4/cipso_ipv4.c10
-rw-r--r--net/ipv4/fib_rules.c12
-rw-r--r--net/ipv4/fib_trie.c2
-rw-r--r--net/ipv4/gre_offload.c13
-rw-r--r--net/ipv4/icmp.c59
-rw-r--r--net/ipv4/inet_connection_sock.c61
-rw-r--r--net/ipv4/inet_diag.c65
-rw-r--r--net/ipv4/inet_hashtables.c60
-rw-r--r--net/ipv4/ip_options.c43
-rw-r--r--net/ipv4/ip_output.c6
-rw-r--r--net/ipv4/ip_sockglue.c601
-rw-r--r--net/ipv4/ip_tunnel_core.c244
-rw-r--r--net/ipv4/ip_vti.c80
-rw-r--r--net/ipv4/ipcomp.c1
-rw-r--r--net/ipv4/ipmr.c17
-rw-r--r--net/ipv4/netfilter/arp_tables.c105
-rw-r--r--net/ipv4/netfilter/ip_tables.c104
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c2
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c21
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/raw.c30
-rw-r--r--net/ipv4/route.c5
-rw-r--r--net/ipv4/syncookies.c45
-rw-r--r--net/ipv4/tcp.c67
-rw-r--r--net/ipv4/tcp_highspeed.c2
-rw-r--r--net/ipv4/tcp_htcp.c2
-rw-r--r--net/ipv4/tcp_input.c152
-rw-r--r--net/ipv4/tcp_ipv4.c171
-rw-r--r--net/ipv4/tcp_output.c16
-rw-r--r--net/ipv4/tcp_timer.c2
-rw-r--r--net/ipv4/tcp_veno.c2
-rw-r--r--net/ipv4/tunnel4.c43
-rw-r--r--net/ipv4/udp.c287
-rw-r--r--net/ipv4/udp_impl.h10
-rw-r--r--net/ipv4/udp_tunnel_core.c (renamed from net/ipv4/udp_tunnel.c)0
-rw-r--r--net/ipv4/udp_tunnel_nic.c897
-rw-r--r--net/ipv4/udp_tunnel_stub.c7
-rw-r--r--net/ipv4/udplite.c4
-rw-r--r--net/ipv6/Kconfig2
-rw-r--r--net/ipv6/addrconf.c24
-rw-r--r--net/ipv6/af_inet6.c4
-rw-r--r--net/ipv6/datagram.c16
-rw-r--r--net/ipv6/esp6.c13
-rw-r--r--net/ipv6/exthdrs.c3
-rw-r--r--net/ipv6/fib6_rules.c21
-rw-r--r--net/ipv6/icmp.c5
-rw-r--r--net/ipv6/inet6_hashtables.c66
-rw-r--r--net/ipv6/ip6_fib.c3
-rw-r--r--net/ipv6/ip6_flowlabel.c316
-rw-r--r--net/ipv6/ip6_icmp.c10
-rw-r--r--net/ipv6/ip6_offload.c8
-rw-r--r--net/ipv6/ip6_output.c6
-rw-r--r--net/ipv6/ip6_tunnel.c10
-rw-r--r--net/ipv6/ip6_vti.c52
-rw-r--r--net/ipv6/ip6mr.c17
-rw-r--r--net/ipv6/ipcomp6.c1
-rw-r--r--net/ipv6/ipv6_sockglue.c724
-rw-r--r--net/ipv6/netfilter/ip6_tables.c104
-rw-r--r--net/ipv6/netfilter/ip6t_ah.c3
-rw-r--r--net/ipv6/netfilter/ip6t_frag.c3
-rw-r--r--net/ipv6/netfilter/ip6t_hbh.c3
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c3
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c26
-rw-r--r--net/ipv6/ping.c1
-rw-r--r--net/ipv6/raw.c62
-rw-r--r--net/ipv6/route.c21
-rw-r--r--net/ipv6/rpl_iptunnel.c3
-rw-r--r--net/ipv6/seg6_iptunnel.c17
-rw-r--r--net/ipv6/syncookies.c5
-rw-r--r--net/ipv6/tcp_ipv6.c23
-rw-r--r--net/ipv6/tunnel6.c41
-rw-r--r--net/ipv6/udp.c135
-rw-r--r--net/ipv6/udp_impl.h10
-rw-r--r--net/ipv6/udplite.c4
-rw-r--r--net/iucv/af_iucv.c4
-rw-r--r--net/kcm/kcmsock.c6
-rw-r--r--net/key/af_key.c13
-rw-r--r--net/l2tp/l2tp_core.c247
-rw-r--r--net/l2tp/l2tp_core.h205
-rw-r--r--net/l2tp/l2tp_debugfs.c34
-rw-r--r--net/l2tp/l2tp_eth.c21
-rw-r--r--net/l2tp/l2tp_ip.c41
-rw-r--r--net/l2tp/l2tp_ip6.c43
-rw-r--r--net/l2tp/l2tp_netlink.c259
-rw-r--r--net/l2tp/l2tp_ppp.c97
-rw-r--r--net/l3mdev/l3mdev.c93
-rw-r--r--net/llc/af_llc.c5
-rw-r--r--net/llc/llc_conn.c7
-rw-r--r--net/llc/llc_input.c1
-rw-r--r--net/llc/llc_pdu.c2
-rw-r--r--net/llc/llc_sap.c3
-rw-r--r--net/mac80211/agg-rx.c2
-rw-r--r--net/mac80211/airtime.c26
-rw-r--r--net/mac80211/cfg.c22
-rw-r--r--net/mac80211/chan.c9
-rw-r--r--net/mac80211/debugfs_netdev.c5
-rw-r--r--net/mac80211/driver-ops.h11
-rw-r--r--net/mac80211/ht.c4
-rw-r--r--net/mac80211/ibss.c4
-rw-r--r--net/mac80211/ieee80211_i.h16
-rw-r--r--net/mac80211/iface.c25
-rw-r--r--net/mac80211/key.c2
-rw-r--r--net/mac80211/mesh.c22
-rw-r--r--net/mac80211/mesh_hwmp.c41
-rw-r--r--net/mac80211/mesh_pathtbl.c5
-rw-r--r--net/mac80211/mesh_plink.c2
-rw-r--r--net/mac80211/mlme.c22
-rw-r--r--net/mac80211/offchannel.c6
-rw-r--r--net/mac80211/rx.c66
-rw-r--r--net/mac80211/scan.c8
-rw-r--r--net/mac80211/sta_info.c12
-rw-r--r--net/mac80211/sta_info.h2
-rw-r--r--net/mac80211/status.c4
-rw-r--r--net/mac80211/tdls.c8
-rw-r--r--net/mac80211/trace.h3
-rw-r--r--net/mac80211/tx.c106
-rw-r--r--net/mac80211/util.c24
-rw-r--r--net/mac80211/wme.c2
-rw-r--r--net/mpls/af_mpls.c17
-rw-r--r--net/mptcp/Kconfig24
-rw-r--r--net/mptcp/Makefile7
-rw-r--r--net/mptcp/crypto.c63
-rw-r--r--net/mptcp/crypto_test.c72
-rw-r--r--net/mptcp/ctrl.c1
-rw-r--r--net/mptcp/mptcp_diag.c169
-rw-r--r--net/mptcp/options.c71
-rw-r--r--net/mptcp/pm.c46
-rw-r--r--net/mptcp/pm_netlink.c2
-rw-r--r--net/mptcp/protocol.c853
-rw-r--r--net/mptcp/protocol.h130
-rw-r--r--net/mptcp/subflow.c335
-rw-r--r--net/mptcp/syncookies.c130
-rw-r--r--net/mptcp/token.c373
-rw-r--r--net/mptcp/token_test.c140
-rw-r--r--net/ncsi/ncsi-rsp.c2
-rw-r--r--net/netfilter/Kconfig2
-rw-r--r--net/netfilter/ipset/ip_set_core.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c92
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c139
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c55
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c6
-rw-r--r--net/netfilter/nf_conntrack_proto.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c2
-rw-r--r--net/netfilter/nf_conntrack_standalone.c2
-rw-r--r--net/netfilter/nf_flow_table_offload.c2
-rw-r--r--net/netfilter/nf_nat_core.c12
-rw-r--r--net/netfilter/nf_sockopt.c60
-rw-r--r--net/netfilter/nf_synproxy_core.c6
-rw-r--r--net/netfilter/nf_tables_api.c227
-rw-r--r--net/netfilter/nf_tables_core.c2
-rw-r--r--net/netfilter/nf_tables_offload.c2
-rw-r--r--net/netfilter/nfnetlink_acct.c2
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c2
-rw-r--r--net/netfilter/nft_cmp.c4
-rw-r--r--net/netfilter/nft_ct.c6
-rw-r--r--net/netfilter/nft_fib.c2
-rw-r--r--net/netfilter/nft_immediate.c51
-rw-r--r--net/netfilter/nft_payload.c2
-rw-r--r--net/netfilter/nft_reject.c3
-rw-r--r--net/netfilter/nft_set_pipapo.c14
-rw-r--r--net/netfilter/utils.c8
-rw-r--r--net/netfilter/x_tables.c30
-rw-r--r--net/netfilter/xt_CONNSECMARK.c2
-rw-r--r--net/netfilter/xt_connmark.c2
-rw-r--r--net/netfilter/xt_nfacct.c2
-rw-r--r--net/netfilter/xt_time.c2
-rw-r--r--net/netlabel/netlabel_domainhash.c2
-rw-r--r--net/netlink/af_netlink.c17
-rw-r--r--net/netrom/af_netrom.c4
-rw-r--r--net/nfc/core.c3
-rw-r--r--net/nfc/llcp_sock.c8
-rw-r--r--net/nfc/nci/core.c4
-rw-r--r--net/nfc/rawsock.c4
-rw-r--r--net/openvswitch/conntrack.c38
-rw-r--r--net/openvswitch/datapath.c45
-rw-r--r--net/openvswitch/datapath.h9
-rw-r--r--net/openvswitch/flow.c1
-rw-r--r--net/openvswitch/flow_netlink.c6
-rw-r--r--net/openvswitch/flow_table.c287
-rw-r--r--net/openvswitch/flow_table.h24
-rw-r--r--net/openvswitch/vport.c3
-rw-r--r--net/packet/af_packet.c96
-rw-r--r--net/packet/internal.h2
-rw-r--r--net/phonet/pep.c4
-rw-r--r--net/phonet/socket.c10
-rw-r--r--net/qrtr/qrtr.c2
-rw-r--r--net/rds/af_rds.c30
-rw-r--r--net/rds/rdma.c14
-rw-r--r--net/rds/rdma_transport.h2
-rw-r--r--net/rds/rds.h6
-rw-r--r--net/rds/recv.c3
-rw-r--r--net/rose/af_rose.c4
-rw-r--r--net/rxrpc/af_rxrpc.c10
-rw-r--r--net/rxrpc/ar-internal.h4
-rw-r--r--net/rxrpc/key.c9
-rw-r--r--net/sched/Kconfig4
-rw-r--r--net/sched/act_api.c12
-rw-r--r--net/sched/act_csum.c3
-rw-r--r--net/sched/act_ct.c20
-rw-r--r--net/sched/act_gact.c7
-rw-r--r--net/sched/act_gate.c6
-rw-r--r--net/sched/act_mirred.c6
-rw-r--r--net/sched/act_pedit.c9
-rw-r--r--net/sched/act_police.c4
-rw-r--r--net/sched/act_skbedit.c5
-rw-r--r--net/sched/act_vlan.c6
-rw-r--r--net/sched/cls_api.c134
-rw-r--r--net/sched/cls_flower.c17
-rw-r--r--net/sched/cls_matchall.c3
-rw-r--r--net/sched/cls_tcindex.c2
-rw-r--r--net/sched/cls_u32.c24
-rw-r--r--net/sched/em_canid.c1
-rw-r--r--net/sched/ematch.c3
-rw-r--r--net/sched/sch_api.c6
-rw-r--r--net/sched/sch_cake.c8
-rw-r--r--net/sched/sch_cbq.c2
-rw-r--r--net/sched/sch_drr.c2
-rw-r--r--net/sched/sch_ets.c2
-rw-r--r--net/sched/sch_fq_codel.c2
-rw-r--r--net/sched/sch_fq_pie.c2
-rw-r--r--net/sched/sch_hfsc.c2
-rw-r--r--net/sched/sch_htb.c2
-rw-r--r--net/sched/sch_multiq.c2
-rw-r--r--net/sched/sch_prio.c2
-rw-r--r--net/sched/sch_qfq.c2
-rw-r--r--net/sched/sch_red.c98
-rw-r--r--net/sched/sch_sfb.c2
-rw-r--r--net/sched/sch_sfq.c2
-rw-r--r--net/sched/sch_taprio.c5
-rw-r--r--net/sctp/ipv6.c6
-rw-r--r--net/sctp/protocol.c20
-rw-r--r--net/sctp/socket.c1239
-rw-r--r--net/smc/af_smc.c26
-rw-r--r--net/smc/smc_clc.h1
-rw-r--r--net/smc/smc_core.c4
-rw-r--r--net/socket.c133
-rw-r--r--net/switchdev/switchdev.c3
-rw-r--r--net/tipc/bcast.c6
-rw-r--r--net/tipc/bcast.h4
-rw-r--r--net/tipc/bearer.c2
-rw-r--r--net/tipc/discover.c5
-rw-r--r--net/tipc/eth_media.c4
-rw-r--r--net/tipc/link.c16
-rw-r--r--net/tipc/msg.c2
-rw-r--r--net/tipc/msg.h46
-rw-r--r--net/tipc/name_distr.c116
-rw-r--r--net/tipc/name_distr.h9
-rw-r--r--net/tipc/name_table.c9
-rw-r--r--net/tipc/name_table.h2
-rw-r--r--net/tipc/node.c33
-rw-r--r--net/tipc/node.h8
-rw-r--r--net/tipc/socket.c16
-rw-r--r--net/tipc/udp_media.c2
-rw-r--r--net/tls/tls_device.c60
-rw-r--r--net/tls/tls_main.c17
-rw-r--r--net/unix/af_unix.c6
-rw-r--r--net/vmw_vsock/af_vsock.c6
-rw-r--r--net/wireless/chan.c35
-rw-r--r--net/wireless/core.c5
-rw-r--r--net/wireless/mesh.c1
-rw-r--r--net/wireless/nl80211.c83
-rw-r--r--net/wireless/reg.c4
-rw-r--r--net/wireless/scan.c10
-rw-r--r--net/wireless/trace.h4
-rw-r--r--net/wireless/util.c8
-rw-r--r--net/wireless/wext-compat.c1
-rw-r--r--net/x25/Kconfig2
-rw-r--r--net/x25/af_x25.c4
-rw-r--r--net/x25/x25_link.c2
-rw-r--r--net/x25/x25_route.c2
-rw-r--r--net/xdp/xsk.c46
-rw-r--r--net/xdp/xsk_buff_pool.c1
-rw-r--r--net/xdp/xsk_diag.c17
-rw-r--r--net/xdp/xsk_queue.h6
-rw-r--r--net/xdp/xskmap.c3
-rw-r--r--net/xfrm/espintcp.c62
-rw-r--r--net/xfrm/xfrm_device.c35
-rw-r--r--net/xfrm/xfrm_input.c24
-rw-r--r--net/xfrm/xfrm_interface.c136
-rw-r--r--net/xfrm/xfrm_policy.c54
-rw-r--r--net/xfrm/xfrm_replay.c12
-rw-r--r--net/xfrm/xfrm_state.c6
-rw-r--r--net/xfrm/xfrm_user.c18
441 files changed, 14933 insertions, 6265 deletions
diff --git a/net/9p/client.c b/net/9p/client.c
index fc1f3635e5dd..09f1ec589b80 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -811,7 +811,7 @@ reterr:
* @uodata: source for zero copy write
* @inlen: read buffer size
* @olen: write buffer size
- * @hdrlen: reader header size, This is the size of response protocol data
+ * @in_hdrlen: reader header size, This is the size of response protocol data
* @fmt: protocol format string (see protocol.c)
*
* Returns request structure (which client must free using p9_tag_remove)
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 13cd683a658a..12ecacf0c55f 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -362,6 +362,10 @@ static void p9_read_work(struct work_struct *work)
if (m->rreq->status == REQ_STATUS_SENT) {
list_del(&m->rreq->req_list);
p9_client_cb(m->client, m->rreq, REQ_STATUS_RCVD);
+ } else if (m->rreq->status == REQ_STATUS_FLSHD) {
+ /* Ignore replies associated with a cancelled request. */
+ p9_debug(P9_DEBUG_TRANS,
+ "Ignore replies associated with a cancelled request\n");
} else {
spin_unlock(&m->client->lock);
p9_debug(P9_DEBUG_ERROR,
@@ -703,11 +707,20 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
{
p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
+ spin_lock(&client->lock);
+ /* Ignore cancelled request if message has been received
+ * before lock.
+ */
+ if (req->status == REQ_STATUS_RCVD) {
+ spin_unlock(&client->lock);
+ return 0;
+ }
+
/* we haven't received a response for oldreq,
* remove it from the list.
*/
- spin_lock(&client->lock);
list_del(&req->req_list);
+ req->status = REQ_STATUS_FLSHD;
spin_unlock(&client->lock);
p9_req_put(req);
@@ -803,20 +816,28 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
return -ENOMEM;
ts->rd = fget(rfd);
+ if (!ts->rd)
+ goto out_free_ts;
+ if (!(ts->rd->f_mode & FMODE_READ))
+ goto out_put_rd;
ts->wr = fget(wfd);
- if (!ts->rd || !ts->wr) {
- if (ts->rd)
- fput(ts->rd);
- if (ts->wr)
- fput(ts->wr);
- kfree(ts);
- return -EIO;
- }
+ if (!ts->wr)
+ goto out_put_rd;
+ if (!(ts->wr->f_mode & FMODE_WRITE))
+ goto out_put_wr;
client->trans = ts;
client->status = Connected;
return 0;
+
+out_put_wr:
+ fput(ts->wr);
+out_put_rd:
+ fput(ts->rd);
+out_free_ts:
+ kfree(ts);
+ return -EIO;
}
static int p9_socket_open(struct p9_client *client, struct socket *csocket)
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index b21c3c209815..2885ff9c76f0 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -94,14 +94,15 @@ struct p9_trans_rdma {
struct completion cm_done;
};
+struct p9_rdma_req;
+
/**
- * p9_rdma_context - Keeps track of in-process WR
+ * struct p9_rdma_context - Keeps track of in-process WR
*
* @busa: Bus address to unmap when the WR completes
* @req: Keeps track of requests (send)
* @rc: Keepts track of replies (receive)
*/
-struct p9_rdma_req;
struct p9_rdma_context {
struct ib_cqe cqe;
dma_addr_t busa;
@@ -112,7 +113,7 @@ struct p9_rdma_context {
};
/**
- * p9_rdma_opts - Collection of mount options
+ * struct p9_rdma_opts - Collection of mount options
* @port: port of connection
* @sq_depth: The requested depth of the SQ. This really doesn't need
* to be any deeper than the number of threads used in the client
diff --git a/net/Kconfig b/net/Kconfig
index d1672280d6a4..3831206977a1 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -455,7 +455,6 @@ config FAILOVER
config ETHTOOL_NETLINK
bool "Netlink interface for ethtool"
default y
- depends on PHYLIB=y || PHYLIB=n
help
An alternative userspace interface for ethtool based on generic
netlink. It provides better extensibility and some new features,
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index 550c6ca007cc..9c1241292d1d 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -229,6 +229,8 @@ int __init atalk_proc_init(void)
sizeof(struct aarp_iter_state), NULL))
goto out;
+ return 0;
+
out:
remove_proc_subtree("atalk", init_net.proc_net);
return -ENOMEM;
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 15787e8c0629..1d48708c5a2e 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1917,8 +1917,6 @@ static const struct proto_ops atalk_dgram_ops = {
#endif
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
- .setsockopt = sock_no_setsockopt,
- .getsockopt = sock_no_getsockopt,
.sendmsg = atalk_sendmsg,
.recvmsg = atalk_recvmsg,
.mmap = sock_no_mmap,
diff --git a/net/atm/common.c b/net/atm/common.c
index 8575f5d52087..84367b844b14 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -745,7 +745,7 @@ static int check_qos(const struct atm_qos *qos)
}
int vcc_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct atm_vcc *vcc;
unsigned long value;
@@ -760,7 +760,7 @@ int vcc_setsockopt(struct socket *sock, int level, int optname,
{
struct atm_qos qos;
- if (copy_from_user(&qos, optval, sizeof(qos)))
+ if (copy_from_sockptr(&qos, optval, sizeof(qos)))
return -EFAULT;
error = check_qos(&qos);
if (error)
@@ -774,7 +774,7 @@ int vcc_setsockopt(struct socket *sock, int level, int optname,
return 0;
}
case SO_SETCLP:
- if (get_user(value, (unsigned long __user *)optval))
+ if (copy_from_sockptr(&value, optval, sizeof(value)))
return -EFAULT;
if (value)
vcc->atm_options |= ATM_ATMOPT_CLP;
@@ -782,13 +782,8 @@ int vcc_setsockopt(struct socket *sock, int level, int optname,
vcc->atm_options &= ~ATM_ATMOPT_CLP;
return 0;
default:
- if (level == SOL_SOCKET)
- return -EINVAL;
- break;
- }
- if (!vcc->dev || !vcc->dev->ops->setsockopt)
return -EINVAL;
- return vcc->dev->ops->setsockopt(vcc, level, optname, optval, optlen);
+ }
}
int vcc_getsockopt(struct socket *sock, int level, int optname,
@@ -826,13 +821,8 @@ int vcc_getsockopt(struct socket *sock, int level, int optname,
return copy_to_user(optval, &pvc, sizeof(pvc)) ? -EFAULT : 0;
}
default:
- if (level == SOL_SOCKET)
- return -EINVAL;
- break;
- }
- if (!vcc->dev || !vcc->dev->ops->getsockopt)
return -EINVAL;
- return vcc->dev->ops->getsockopt(vcc, level, optname, optval, len);
+ }
}
int register_atmdevice_notifier(struct notifier_block *nb)
diff --git a/net/atm/common.h b/net/atm/common.h
index 5850649068bb..a1e56e8de698 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -21,7 +21,7 @@ __poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait);
int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int vcc_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen);
+ sockptr_t optval, unsigned int optlen);
int vcc_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen);
void vcc_process_recv_queue(struct atm_vcc *vcc);
diff --git a/net/atm/lec_arpc.h b/net/atm/lec_arpc.h
index 1205d8792d28..39115fe074c4 100644
--- a/net/atm/lec_arpc.h
+++ b/net/atm/lec_arpc.h
@@ -44,7 +44,7 @@ struct lec_arp_table {
u8 *tlvs;
u32 sizeoftlvs; /*
* LANE2: Each MAC address can have TLVs
- * associated with it. sizeoftlvs tells the
+ * associated with it. sizeoftlvs tells
* the length of the tlvs array
*/
struct sk_buff_head tx_wait; /* wait queue for outgoing packets */
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 02bd2a436bdf..53e7d3f39e26 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -63,7 +63,7 @@ static int pvc_connect(struct socket *sock, struct sockaddr *sockaddr,
}
static int pvc_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
int error;
diff --git a/net/atm/svc.c b/net/atm/svc.c
index ba144d035e3d..4a02bcaad279 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -451,7 +451,7 @@ int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos)
}
static int svc_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct atm_vcc *vcc = ATM_SD(sock);
@@ -464,7 +464,7 @@ static int svc_setsockopt(struct socket *sock, int level, int optname,
error = -EINVAL;
goto out;
}
- if (copy_from_user(&vcc->sap, optval, optlen)) {
+ if (copy_from_sockptr(&vcc->sap, optval, optlen)) {
error = -EFAULT;
goto out;
}
@@ -475,7 +475,7 @@ static int svc_setsockopt(struct socket *sock, int level, int optname,
error = -EINVAL;
goto out;
}
- if (get_user(value, (int __user *)optval)) {
+ if (copy_from_sockptr(&value, optval, sizeof(int))) {
error = -EFAULT;
goto out;
}
diff --git a/net/ax25/Kconfig b/net/ax25/Kconfig
index 97d686d115c0..d3a9843a043d 100644
--- a/net/ax25/Kconfig
+++ b/net/ax25/Kconfig
@@ -8,7 +8,7 @@ menuconfig HAMRADIO
bool "Amateur Radio support"
help
If you want to connect your Linux box to an amateur radio, answer Y
- here. You want to read <http://www.tapr.org/>
+ here. You want to read <https://www.tapr.org/>
and more specifically about AX.25 on Linux
<http://www.linux-ax25.org/>.
@@ -39,11 +39,11 @@ config AX25
Information about where to get supporting software for Linux amateur
radio as well as information about how to configure an AX.25 port is
contained in the AX25-HOWTO, available from
- <http://www.tldp.org/docs.html#howto>. You might also want to
+ <https://www.tldp.org/docs.html#howto>. You might also want to
check out the file <file:Documentation/networking/ax25.rst> in the
kernel source. More information about digital amateur radio in
general is on the WWW at
- <http://www.tapr.org/>.
+ <https://www.tapr.org/>.
To compile this driver as a module, choose M here: the
module will be called ax25.
@@ -90,7 +90,7 @@ config NETROM
<http://www.linux-ax25.org>. You also might want to check out the
file <file:Documentation/networking/ax25.rst>. More information about
digital amateur radio in general is on the WWW at
- <http://www.tapr.org/>.
+ <https://www.tapr.org/>.
To compile this driver as a module, choose M here: the
module will be called netrom.
@@ -109,7 +109,7 @@ config ROSE
<http://www.linux-ax25.org>. You also might want to check out the
file <file:Documentation/networking/ax25.rst>. More information about
digital amateur radio in general is on the WWW at
- <http://www.tapr.org/>.
+ <https://www.tapr.org/>.
To compile this driver as a module, choose M here: the
module will be called rose.
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index dec3f35467c9..269ee89d2c2b 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -528,7 +528,7 @@ ax25_cb *ax25_create_cb(void)
*/
static int ax25_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
ax25_cb *ax25;
@@ -543,7 +543,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(unsigned int))
return -EINVAL;
- if (get_user(opt, (unsigned int __user *)optval))
+ if (copy_from_sockptr(&opt, optval, sizeof(unsigned int)))
return -EFAULT;
lock_sock(sk);
@@ -640,7 +640,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
memset(devname, 0, sizeof(devname));
- if (copy_from_user(devname, optval, optlen)) {
+ if (copy_from_sockptr(devname, optval, optlen)) {
res = -EFAULT;
break;
}
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index e87f19c82e8d..a4faf5f904d9 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -134,7 +134,7 @@ static u8 batadv_ring_buffer_avg(const u8 lq_recv[])
*
* Return: the originator object corresponding to the passed mac address or NULL
* on failure.
- * If the object does not exists it is created an initialised.
+ * If the object does not exist, it is created and initialised.
*/
static struct batadv_orig_node *
batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr)
@@ -871,7 +871,7 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
}
/**
- * batadv_iv_orig_ifinfo_sum() - Get bcast_own sum for originator over iterface
+ * batadv_iv_orig_ifinfo_sum() - Get bcast_own sum for originator over interface
* @orig_node: originator which reproadcasted the OGMs directly
* @if_outgoing: interface which transmitted the original OGM and received the
* direct rebroadcast
@@ -1075,10 +1075,10 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
struct batadv_neigh_ifinfo *neigh_ifinfo;
u8 total_count;
u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own;
+ unsigned int tq_iface_hop_penalty = BATADV_TQ_MAX_VALUE;
unsigned int neigh_rq_inv_cube, neigh_rq_max_cube;
unsigned int tq_asym_penalty, inv_asym_penalty;
unsigned int combined_tq;
- unsigned int tq_iface_penalty;
bool ret = false;
/* find corresponding one hop neighbor */
@@ -1157,31 +1157,32 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
inv_asym_penalty = BATADV_TQ_MAX_VALUE * neigh_rq_inv_cube;
inv_asym_penalty /= neigh_rq_max_cube;
tq_asym_penalty = BATADV_TQ_MAX_VALUE - inv_asym_penalty;
+ tq_iface_hop_penalty -= atomic_read(&if_incoming->hop_penalty);
/* penalize if the OGM is forwarded on the same interface. WiFi
* interfaces and other half duplex devices suffer from throughput
* drops as they can't send and receive at the same time.
*/
- tq_iface_penalty = BATADV_TQ_MAX_VALUE;
if (if_outgoing && if_incoming == if_outgoing &&
batadv_is_wifi_hardif(if_outgoing))
- tq_iface_penalty = batadv_hop_penalty(BATADV_TQ_MAX_VALUE,
- bat_priv);
+ tq_iface_hop_penalty = batadv_hop_penalty(tq_iface_hop_penalty,
+ bat_priv);
combined_tq = batadv_ogm_packet->tq *
tq_own *
tq_asym_penalty *
- tq_iface_penalty;
+ tq_iface_hop_penalty;
combined_tq /= BATADV_TQ_MAX_VALUE *
BATADV_TQ_MAX_VALUE *
BATADV_TQ_MAX_VALUE;
batadv_ogm_packet->tq = combined_tq;
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "bidirectional: orig = %pM neigh = %pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, iface_penalty: %3i, total tq: %3i, if_incoming = %s, if_outgoing = %s\n",
+ "bidirectional: orig = %pM neigh = %pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, iface_hop_penalty: %3i, total tq: %3i, if_incoming = %s, if_outgoing = %s\n",
orig_node->orig, orig_neigh_node->orig, total_count,
- neigh_rq_count, tq_own, tq_asym_penalty, tq_iface_penalty,
- batadv_ogm_packet->tq, if_incoming->net_dev->name,
+ neigh_rq_count, tq_own, tq_asym_penalty,
+ tq_iface_hop_penalty, batadv_ogm_packet->tq,
+ if_incoming->net_dev->name,
if_outgoing ? if_outgoing->net_dev->name : "DEFAULT");
/* if link has the minimum required transmission quality
@@ -1554,7 +1555,7 @@ static void batadv_iv_ogm_process_reply(struct batadv_ogm_packet *ogm_packet,
* batadv_iv_ogm_process() - process an incoming batman iv OGM
* @skb: the skb containing the OGM
* @ogm_offset: offset to the OGM which should be processed (for aggregates)
- * @if_incoming: the interface where this packet was receved
+ * @if_incoming: the interface where this packet was received
*/
static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
struct batadv_hard_iface *if_incoming)
@@ -2288,7 +2289,7 @@ batadv_iv_ogm_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq,
* @msg: Netlink message to dump into
* @cb: Control block containing additional options
* @bat_priv: The bat priv with all the soft interface information
- * @single_hardif: Limit dump to this hard interfaace
+ * @single_hardif: Limit dump to this hard interface
*/
static void
batadv_iv_ogm_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb,
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 0bdefa35da98..d35aca0e969a 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -60,7 +60,7 @@ static void batadv_v_elp_start_timer(struct batadv_hard_iface *hard_iface)
* @neigh: the neighbour for which the throughput has to be obtained
*
* Return: The throughput towards the given neighbour in multiples of 100kpbs
- * (a value of '1' equals to 0.1Mbps, '10' equals 1Mbps, etc).
+ * (a value of '1' equals 0.1Mbps, '10' equals 1Mbps, etc).
*/
static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
{
@@ -183,8 +183,8 @@ void batadv_v_elp_throughput_metric_update(struct work_struct *work)
*
* Sends a predefined number of unicast wifi packets to a given neighbour in
* order to trigger the throughput estimation on this link by the RC algorithm.
- * Packets are sent only if there there is not enough payload unicast traffic
- * towards this neighbour..
+ * Packets are sent only if there is not enough payload unicast traffic towards
+ * this neighbour..
*
* Return: True on success and false in case of error during skb preparation.
*/
@@ -244,7 +244,7 @@ batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh)
* batadv_v_elp_periodic_work() - ELP periodic task per interface
* @work: work queue item
*
- * Emits broadcast ELP message in regular intervals.
+ * Emits broadcast ELP messages in regular intervals.
*/
static void batadv_v_elp_periodic_work(struct work_struct *work)
{
@@ -499,7 +499,7 @@ orig_free:
* @skb: the received packet
* @if_incoming: the interface this packet was received through
*
- * Return: NET_RX_SUCCESS and consumes the skb if the packet was peoperly
+ * Return: NET_RX_SUCCESS and consumes the skb if the packet was properly
* processed or NET_RX_DROP in case of failure.
*/
int batadv_v_elp_packet_recv(struct sk_buff *skb,
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 18028b9f95f0..0f8495b9eeb1 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -47,9 +47,9 @@
* @bat_priv: the bat priv with all the soft interface information
* @addr: the address of the originator
*
- * Return: the orig_node corresponding to the specified address. If such object
- * does not exist it is allocated here. In case of allocation failure returns
- * NULL.
+ * Return: the orig_node corresponding to the specified address. If such an
+ * object does not exist, it is allocated here. In case of allocation failure
+ * returns NULL.
*/
struct batadv_orig_node *batadv_v_ogm_orig_get(struct batadv_priv *bat_priv,
const u8 *addr)
@@ -172,7 +172,7 @@ static bool batadv_v_ogm_queue_left(struct sk_buff *skb,
* batadv_v_ogm_aggr_list_free - free all elements in an aggregation queue
* @hard_iface: the interface holding the aggregation queue
*
- * Empties the OGMv2 aggregation queue and frees all the skbs it contained.
+ * Empties the OGMv2 aggregation queue and frees all the skbs it contains.
*
* Caller needs to hold the hard_iface->bat_v.aggr_list.lock.
*/
@@ -378,7 +378,7 @@ static void batadv_v_ogm_send(struct work_struct *work)
* batadv_v_ogm_aggr_work() - OGM queue periodic task per interface
* @work: work queue item
*
- * Emits aggregated OGM message in regular intervals.
+ * Emits aggregated OGM messages in regular intervals.
*/
void batadv_v_ogm_aggr_work(struct work_struct *work)
{
@@ -399,7 +399,7 @@ void batadv_v_ogm_aggr_work(struct work_struct *work)
* batadv_v_ogm_iface_enable() - prepare an interface for B.A.T.M.A.N. V
* @hard_iface: the interface to prepare
*
- * Takes care of scheduling own OGM sending routine for this interface.
+ * Takes care of scheduling its own OGM sending routine for this interface.
*
* Return: 0 on success or a negative error code otherwise
*/
@@ -455,15 +455,17 @@ unlock:
* @throughput: the current throughput
*
* Apply a penalty on the current throughput metric value based on the
- * characteristic of the interface where the OGM has been received. The return
- * value is computed as follows:
+ * characteristic of the interface where the OGM has been received.
+ *
+ * Initially the per hardif hop penalty is applied to the throughput. After
+ * that the return value is then computed as follows:
* - throughput * 50% if the incoming and outgoing interface are the
* same WiFi interface and the throughput is above
* 1MBit/s
* - throughput if the outgoing interface is the default
* interface (i.e. this OGM is processed for the
* internal table and not forwarded)
- * - throughput * hop penalty otherwise
+ * - throughput * node hop penalty otherwise
*
* Return: the penalised throughput metric.
*/
@@ -472,9 +474,14 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv,
struct batadv_hard_iface *if_outgoing,
u32 throughput)
{
+ int if_hop_penalty = atomic_read(&if_incoming->hop_penalty);
int hop_penalty = atomic_read(&bat_priv->hop_penalty);
int hop_penalty_max = BATADV_TQ_MAX_VALUE;
+ /* Apply per hardif hop penalty */
+ throughput = throughput * (hop_penalty_max - if_hop_penalty) /
+ hop_penalty_max;
+
/* Don't apply hop penalty in default originator table. */
if (if_outgoing == BATADV_IF_DEFAULT)
return throughput;
@@ -847,7 +854,7 @@ batadv_v_ogm_aggr_packet(int buff_pos, int packet_len,
* batadv_v_ogm_process() - process an incoming batman v OGM
* @skb: the skb containing the OGM
* @ogm_offset: offset to the OGM which should be processed (for aggregates)
- * @if_incoming: the interface where this packet was receved
+ * @if_incoming: the interface where this packet was received
*/
static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
struct batadv_hard_iface *if_incoming)
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 41cc87f06b14..91a04ca373dc 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -992,7 +992,7 @@ static bool batadv_handle_claim(struct batadv_priv *bat_priv,
* @hw_dst: the Hardware destination in the ARP Header
* @ethhdr: pointer to the Ethernet header of the claim frame
*
- * checks if it is a claim packet and if its on the same group.
+ * checks if it is a claim packet and if it's on the same group.
* This function also applies the group ID of the sender
* if it is in the same mesh.
*
@@ -1757,7 +1757,7 @@ void batadv_bla_free(struct batadv_priv *bat_priv)
* @vid: the VLAN ID of the frame
*
* Checks if this packet is a loop detect frame which has been sent by us,
- * throw an uevent and log the event if that is the case.
+ * throws an uevent and logs the event if that is the case.
*
* Return: true if it is a loop detect frame which is to be dropped, false
* otherwise.
@@ -1815,7 +1815,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb,
* * we have to race for a claim
* * if the frame is allowed on the LAN
*
- * in these cases, the skb is further handled by this function
+ * In these cases, the skb is further handled by this function
*
* Return: true if handled, otherwise it returns false and the caller shall
* further process the skb.
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index b85da4b7a77b..0e6e53e9b5f3 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -666,7 +666,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst,
* @vid: VLAN identifier
* @packet_subtype: unicast4addr packet subtype to use
*
- * This function copies the skb with pskb_copy() and is sent as unicast packet
+ * This function copies the skb with pskb_copy() and is sent as a unicast packet
* to each of the selected candidates.
*
* Return: true if the packet is sent to at least one candidate, false
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 7cad97644d05..9fdbe3068153 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -102,8 +102,8 @@ static int batadv_frag_size_limit(void)
*
* Caller must hold chain->lock.
*
- * Return: true if chain is empty and caller can just insert the new fragment
- * without searching for the right position.
+ * Return: true if chain is empty and the caller can just insert the new
+ * fragment without searching for the right position.
*/
static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain,
u16 seqno)
@@ -306,7 +306,7 @@ free:
* set *skb to merged packet; 2) Packet is buffered: Return true and set *skb
* to NULL; 3) Error: Return false and free skb.
*
- * Return: true when packet is merged or buffered, false when skb is not not
+ * Return: true when the packet is merged or buffered, false when skb is not not
* used.
*/
bool batadv_frag_skb_buffer(struct sk_buff **skb,
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 3a256af92784..fa06b51c0144 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -138,10 +138,10 @@ static bool batadv_mutual_parents(const struct net_device *dev1,
* @net_dev: the device to check
*
* If the user creates any virtual device on top of a batman-adv interface, it
- * is important to prevent this new interface to be used to create a new mesh
- * network (this behaviour would lead to a batman-over-batman configuration).
- * This function recursively checks all the fathers of the device passed as
- * argument looking for a batman-adv soft interface.
+ * is important to prevent this new interface from being used to create a new
+ * mesh network (this behaviour would lead to a batman-over-batman
+ * configuration). This function recursively checks all the fathers of the
+ * device passed as argument looking for a batman-adv soft interface.
*
* Return: true if the device is descendant of a batman-adv mesh interface (or
* if it is a batman-adv interface itself), false otherwise
@@ -680,8 +680,8 @@ batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface)
* @slave: the interface enslaved in another master
* @master: the master from which slave has to be removed
*
- * Invoke ndo_del_slave on master passing slave as argument. In this way slave
- * is free'd and master can correctly change its internal state.
+ * Invoke ndo_del_slave on master passing slave as argument. In this way the
+ * slave is free'd and the master can correctly change its internal state.
*
* Return: 0 on success, a negative value representing the error otherwise
*/
@@ -818,7 +818,7 @@ err:
* @soft_iface: soft interface to check
*
* This function is only using RCU for locking - the result can therefore be
- * off when another functions is modifying the list at the same time. The
+ * off when another function is modifying the list at the same time. The
* caller can use the rtnl_lock to make sure that the count is accurate.
*
* Return: number of connected/enslaved hard interfaces
@@ -939,6 +939,8 @@ batadv_hardif_add_interface(struct net_device *net_dev)
if (batadv_is_wifi_hardif(hard_iface))
hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS;
+ atomic_set(&hard_iface->hop_penalty, 0);
+
batadv_v_hardif_init(hard_iface);
batadv_check_known_mac_addr(hard_iface->net_dev);
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
index f9884dc56cf3..979864c0fa6b 100644
--- a/net/batman-adv/log.h
+++ b/net/batman-adv/log.h
@@ -69,7 +69,7 @@ int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
__printf(2, 3);
/**
- * _batadv_dbg() - Store debug output with(out) ratelimiting
+ * _batadv_dbg() - Store debug output with(out) rate limiting
* @type: type of debug message
* @bat_priv: the bat priv with all the soft interface information
* @ratelimited: whether output should be rate limited
@@ -95,7 +95,7 @@ static inline void _batadv_dbg(int type __always_unused,
#endif
/**
- * batadv_dbg() - Store debug output without ratelimiting
+ * batadv_dbg() - Store debug output without rate limiting
* @type: type of debug message
* @bat_priv: the bat priv with all the soft interface information
* @arg: format string and variable arguments
@@ -104,7 +104,7 @@ static inline void _batadv_dbg(int type __always_unused,
_batadv_dbg(type, bat_priv, 0, ## arg)
/**
- * batadv_dbg_ratelimited() - Store debug output with ratelimiting
+ * batadv_dbg_ratelimited() - Store debug output with rate limiting
* @type: type of debug message
* @bat_priv: the bat priv with all the soft interface information
* @arg: format string and variable arguments
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index d8a255c85e77..519c08c2cfba 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -666,7 +666,7 @@ unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len)
* @vid: the VLAN identifier for which the AP isolation attributed as to be
* looked up
*
- * Return: true if AP isolation is on for the VLAN idenfied by vid, false
+ * Return: true if AP isolation is on for the VLAN identified by vid, false
* otherwise
*/
bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid)
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 61d8dbe8c954..0393bb9ed3d0 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2020.2"
+#define BATADV_SOURCE_VERSION "2020.3"
#endif
/* B.A.T.M.A.N. parameters */
@@ -308,7 +308,7 @@ static inline bool batadv_has_timed_out(unsigned long timestamp,
* @y: value to compare @x against
*
* It handles overflows/underflows and can correctly check for a predecessor
- * unless the variable sequence number has grown by more then
+ * unless the variable sequence number has grown by more than
* 2**(bitwidth(x)-1)-1.
*
* This means that for a u8 with the maximum value 255, it would think:
@@ -330,11 +330,11 @@ static inline bool batadv_has_timed_out(unsigned long timestamp,
/**
* batadv_seq_after() - Checks if a sequence number x is a successor of y
- * @x: potential sucessor of @y
+ * @x: potential successor of @y
* @y: value to compare @x against
*
* It handles overflows/underflows and can correctly check for a successor
- * unless the variable sequence number has grown by more then
+ * unless the variable sequence number has grown by more than
* 2**(bitwidth(x)-1)-1.
*
* This means that for a u8 with the maximum value 255, it would think:
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 9ebdc1e864b9..bdc4a1fba1c6 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -510,7 +510,7 @@ batadv_mcast_mla_softif_get_ipv6(struct net_device *dev,
* the given mcast_list. In general, multicast listeners provided by
* your multicast receiving applications run directly on this node.
*
- * If there is a bridge interface on top of dev, collects from that one
+ * If there is a bridge interface on top of dev, collect from that one
* instead. Just like with IP addresses and routes, multicast listeners
* will(/should) register to the bridge interface instead of an
* enslaved bat0.
@@ -832,8 +832,8 @@ batadv_mcast_bridge_log(struct batadv_priv *bat_priv,
* @bat_priv: the bat priv with all the soft interface information
* @flags: TVLV flags indicating the new multicast state
*
- * Whenever the multicast TVLV flags this nodes announces change this notifies
- * userspace via the 'mcast' log level.
+ * Whenever the multicast TVLV flags this node announces change, this function
+ * should be used to notify userspace about the change.
*/
static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags)
{
@@ -1244,7 +1244,7 @@ batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv)
* @ethhdr: an ethernet header to determine the protocol family from
*
* Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 or
- * BATADV_MCAST_WANT_ALL_IPV6 flag, depending on the provided ethhdr, set and
+ * BATADV_MCAST_WANT_ALL_IPV6 flag, depending on the provided ethhdr, sets and
* increases its refcount.
*/
static struct batadv_orig_node *
@@ -1693,7 +1693,7 @@ batadv_mcast_forw_want_rtr(struct batadv_priv *bat_priv,
}
/**
- * batadv_mcast_forw_send() - send packet to any detected multicast recpient
+ * batadv_mcast_forw_send() - send packet to any detected multicast recipient
* @bat_priv: the bat priv with all the soft interface information
* @skb: the multicast packet to transmit
* @vid: the vlan identifier
@@ -1742,7 +1742,8 @@ int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb,
* @mcast_flags: flags indicating the new multicast state
*
* If the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag of this originator,
- * orig, has toggled then this method updates counter and list accordingly.
+ * orig, has toggled then this method updates the counter and the list
+ * accordingly.
*
* Caller needs to hold orig->mcast_handler_lock.
*/
@@ -1787,7 +1788,7 @@ static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv,
* @mcast_flags: flags indicating the new multicast state
*
* If the BATADV_MCAST_WANT_ALL_IPV4 flag of this originator, orig, has
- * toggled then this method updates counter and list accordingly.
+ * toggled then this method updates the counter and the list accordingly.
*
* Caller needs to hold orig->mcast_handler_lock.
*/
@@ -1832,7 +1833,7 @@ static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv,
* @mcast_flags: flags indicating the new multicast state
*
* If the BATADV_MCAST_WANT_ALL_IPV6 flag of this originator, orig, has
- * toggled then this method updates counter and list accordingly.
+ * toggled then this method updates the counter and the list accordingly.
*
* Caller needs to hold orig->mcast_handler_lock.
*/
@@ -1877,7 +1878,7 @@ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv,
* @mcast_flags: flags indicating the new multicast state
*
* If the BATADV_MCAST_WANT_NO_RTR4 flag of this originator, orig, has
- * toggled then this method updates counter and list accordingly.
+ * toggled then this method updates the counter and the list accordingly.
*
* Caller needs to hold orig->mcast_handler_lock.
*/
@@ -1922,7 +1923,7 @@ static void batadv_mcast_want_rtr4_update(struct batadv_priv *bat_priv,
* @mcast_flags: flags indicating the new multicast state
*
* If the BATADV_MCAST_WANT_NO_RTR6 flag of this originator, orig, has
- * toggled then this method updates counter and list accordingly.
+ * toggled then this method updates the counter and the list accordingly.
*
* Caller needs to hold orig->mcast_handler_lock.
*/
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 02ed073f95a9..dc193618a761 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -640,7 +640,7 @@ batadv_netlink_tp_meter_put(struct sk_buff *msg, u32 cookie)
* @bat_priv: the bat priv with all the soft interface information
* @dst: destination of tp_meter session
* @result: reason for tp meter session stop
- * @test_time: total time ot the tp_meter session
+ * @test_time: total time of the tp_meter session
* @total_bytes: bytes acked to the receiver
* @cookie: cookie of tp_meter session
*
@@ -826,6 +826,10 @@ static int batadv_netlink_hardif_fill(struct sk_buff *msg,
goto nla_put_failure;
}
+ if (nla_put_u8(msg, BATADV_ATTR_HOP_PENALTY,
+ atomic_read(&hard_iface->hop_penalty)))
+ goto nla_put_failure;
+
#ifdef CONFIG_BATMAN_ADV_BATMAN_V
if (nla_put_u32(msg, BATADV_ATTR_ELP_INTERVAL,
atomic_read(&hard_iface->bat_v.elp_interval)))
@@ -920,9 +924,15 @@ static int batadv_netlink_set_hardif(struct sk_buff *skb,
{
struct batadv_hard_iface *hard_iface = info->user_ptr[1];
struct batadv_priv *bat_priv = info->user_ptr[0];
+ struct nlattr *attr;
+
+ if (info->attrs[BATADV_ATTR_HOP_PENALTY]) {
+ attr = info->attrs[BATADV_ATTR_HOP_PENALTY];
+
+ atomic_set(&hard_iface->hop_penalty, nla_get_u8(attr));
+ }
#ifdef CONFIG_BATMAN_ADV_BATMAN_V
- struct nlattr *attr;
if (info->attrs[BATADV_ATTR_ELP_INTERVAL]) {
attr = info->attrs[BATADV_ATTR_ELP_INTERVAL];
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index b0469d15da0e..48d707850f3e 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -134,7 +134,7 @@ static void batadv_nc_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
}
/**
- * batadv_nc_mesh_init() - initialise coding hash table and start house keeping
+ * batadv_nc_mesh_init() - initialise coding hash table and start housekeeping
* @bat_priv: the bat priv with all the soft interface information
*
* Return: 0 on success or negative error number in case of failure
@@ -700,7 +700,7 @@ batadv_nc_process_nc_paths(struct batadv_priv *bat_priv,
}
/**
- * batadv_nc_worker() - periodic task for house keeping related to network
+ * batadv_nc_worker() - periodic task for housekeeping related to network
* coding
* @work: kernel work struct
*/
@@ -1316,7 +1316,7 @@ batadv_nc_path_search(struct batadv_priv *bat_priv,
}
/**
- * batadv_nc_skb_src_search() - Loops through the list of neighoring nodes of
+ * batadv_nc_skb_src_search() - Loops through the list of neighboring nodes of
* the skb's sender (may be equal to the originator).
* @bat_priv: the bat priv with all the soft interface information
* @skb: data skb to forward
@@ -1402,10 +1402,10 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv,
* @neigh_node: next hop to forward packet to
* @ethhdr: pointer to the ethernet header inside the skb
*
- * Loops through list of neighboring nodes the next hop has a good connection to
- * (receives OGMs with a sufficient quality). We need to find a neighbor of our
- * next hop that potentially sent a packet which our next hop also received
- * (overheard) and has stored for later decoding.
+ * Loops through the list of neighboring nodes the next hop has a good
+ * connection to (receives OGMs with a sufficient quality). We need to find a
+ * neighbor of our next hop that potentially sent a packet which our next hop
+ * also received (overheard) and has stored for later decoding.
*
* Return: true if the skb was consumed (encoded packet sent) or false otherwise
*/
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 5b0c2fffc214..805d8969bdfb 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -325,7 +325,7 @@ void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node)
* @if_outgoing: the interface where the payload packet has been received or
* the OGM should be sent to
*
- * Return: the neighbor which should be router for this orig_node/iface.
+ * Return: the neighbor which should be the router for this orig_node/iface.
*
* The object is returned with refcounter increased by 1.
*/
@@ -515,7 +515,7 @@ out:
* Looks for and possibly returns a neighbour belonging to this originator list
* which is connected through the provided hard interface.
*
- * Return: neighbor when found. Othwerwise NULL
+ * Return: neighbor when found. Otherwise NULL
*/
static struct batadv_neigh_node *
batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
@@ -620,7 +620,7 @@ batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface,
*
* Looks for and possibly returns a neighbour belonging to this hard interface.
*
- * Return: neighbor when found. Othwerwise NULL
+ * Return: neighbor when found. Otherwise NULL
*/
struct batadv_hardif_neigh_node *
batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
@@ -999,7 +999,7 @@ void batadv_originator_free(struct batadv_priv *bat_priv)
* @bat_priv: the bat priv with all the soft interface information
* @addr: the mac address of the originator
*
- * Creates a new originator object and initialise all the generic fields.
+ * Creates a new originator object and initialises all the generic fields.
* The new object is not added to the originator list.
*
* Return: the newly created object or NULL on failure.
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index d343382e9664..27cdf5e4349a 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -449,7 +449,7 @@ free_skb:
* @skb: packet to check
* @hdr_size: size of header to pull
*
- * Check for short header and bad addresses in given packet.
+ * Checks for short header and bad addresses in the given packet.
*
* Return: negative value when check fails and 0 otherwise. The negative value
* depends on the reason: -ENODATA for bad header, -EBADR for broadcast
@@ -1113,7 +1113,7 @@ free_skb:
* @recv_if: interface that the skb is received on
*
* This function does one of the three following things: 1) Forward fragment, if
- * the assembled packet will exceed our MTU; 2) Buffer fragment, if we till
+ * the assembled packet will exceed our MTU; 2) Buffer fragment, if we still
* lack further fragments; 3) Merge fragments, if we have all needed parts.
*
* Return: NET_RX_DROP if the skb is not consumed, NET_RX_SUCCESS otherwise.
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 7f8ade04e08e..d267b94800d6 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -605,8 +605,8 @@ bool batadv_forw_packet_steal(struct batadv_forw_packet *forw_packet,
* given hard_iface. If hard_iface is NULL forwarding packets on all hard
* interfaces will be claimed.
*
- * The packets are being moved from the forw_list to the cleanup_list and
- * by that allows already running threads to notice the claiming.
+ * The packets are being moved from the forw_list to the cleanup_list. This
+ * makes it possible for already running threads to notice the claim.
*/
static void
batadv_forw_packet_list_steal(struct hlist_head *forw_list,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index f1f1c86f3419..23833a0ba5e6 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -406,7 +406,7 @@ end:
* @hdr_size: size of already parsed batman-adv header
* @orig_node: originator from which the batman-adv packet was sent
*
- * Sends a ethernet frame to the receive path of the local @soft_iface.
+ * Sends an ethernet frame to the receive path of the local @soft_iface.
* skb->data has still point to the batman-adv header with the size @hdr_size.
* The caller has to have parsed this header already and made sure that at least
* @hdr_size bytes are still available for pull in @skb.
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index bd2ac570c42c..db7e3774825b 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -66,7 +66,7 @@
/**
* BATADV_TP_MAX_RTO - Maximum sender timeout. If the sender RTO gets beyond
- * such amound of milliseconds, the receiver is considered unreachable and the
+ * such amount of milliseconds, the receiver is considered unreachable and the
* connection is killed
*/
#define BATADV_TP_MAX_RTO 30000
@@ -108,10 +108,10 @@ static u32 batadv_tp_session_cookie(const u8 session[2], u8 icmp_uid)
* batadv_tp_cwnd() - compute the new cwnd size
* @base: base cwnd size value
* @increment: the value to add to base to get the new size
- * @min: minumim cwnd value (usually MSS)
+ * @min: minimum cwnd value (usually MSS)
*
- * Return the new cwnd size and ensures it does not exceed the Advertised
- * Receiver Window size. It is wrap around safe.
+ * Return the new cwnd size and ensure it does not exceed the Advertised
+ * Receiver Window size. It is wrapped around safely.
* For details refer to Section 3.1 of RFC5681
*
* Return: new congestion window size in bytes
@@ -254,7 +254,7 @@ static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason,
* @dst: the other endpoint MAC address to look for
*
* Look for a tp_vars object matching dst as end_point and return it after
- * having incremented the refcounter. Return NULL is not found
+ * having increment the refcounter. Return NULL is not found
*
* Return: matching tp_vars or NULL when no tp_vars with @dst was found
*/
@@ -291,7 +291,7 @@ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv,
* @session: session identifier
*
* Look for a tp_vars object matching dst as end_point, session as tp meter
- * session and return it after having incremented the refcounter. Return NULL
+ * session and return it after having increment the refcounter. Return NULL
* is not found
*
* Return: matching tp_vars or NULL when no tp_vars was found
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index a9635c882fe0..98a0aaaf0d50 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -301,7 +301,7 @@ void batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry)
* @vid: VLAN identifier
*
* Return: the number of originators advertising the given address/data
- * (excluding ourself).
+ * (excluding our self).
*/
int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
const u8 *addr, unsigned short vid)
@@ -842,7 +842,7 @@ out:
* table. In case of success the value is updated with the real amount of
* reserved bytes
* Allocate the needed amount of memory for the entire TT TVLV and write its
- * header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data
+ * header made up of one tvlv_tt_data object and a series of tvlv_tt_vlan_data
* objects, one per active VLAN served by the originator node.
*
* Return: the size of the allocated buffer or 0 in case of failure.
@@ -1674,7 +1674,7 @@ out:
* the function argument.
* If a TT local entry exists for this non-mesh client remove it.
*
- * The caller must hold orig_node refcount.
+ * The caller must hold the orig_node refcount.
*
* Return: true if the new entry has been added, false otherwise
*/
@@ -1839,7 +1839,7 @@ out:
* @bat_priv: the bat priv with all the soft interface information
* @tt_global_entry: global translation table entry to be analyzed
*
- * This functon assumes the caller holds rcu_read_lock().
+ * This function assumes the caller holds rcu_read_lock().
* Return: best originator list entry or NULL on errors.
*/
static struct batadv_tt_orig_list_entry *
@@ -1887,7 +1887,7 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv,
* @tt_global_entry: global translation table entry to be printed
* @seq: debugfs table seq_file struct
*
- * This functon assumes the caller holds rcu_read_lock().
+ * This function assumes the caller holds rcu_read_lock().
*/
static void
batadv_tt_global_print_entry(struct batadv_priv *bat_priv,
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 0963a43ad996..6a23a566cde1 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -353,8 +353,8 @@ end:
* @tvlv_value: tvlv content
* @tvlv_value_len: tvlv content length
*
- * Return: success if handler was not found or the return value of the handler
- * callback.
+ * Return: success if the handler was not found or the return value of the
+ * handler callback.
*/
static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
struct batadv_tvlv_handler *tvlv_handler,
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index d152b8e81f61..ed519efa3c36 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -208,6 +208,12 @@ struct batadv_hard_iface {
/** @rcu: struct used for freeing in an RCU-safe manner */
struct rcu_head rcu;
+ /**
+ * @hop_penalty: penalty which will be applied to the tq-field
+ * of an OGM received via this interface
+ */
+ atomic_t hop_penalty;
+
/** @bat_iv: per hard-interface B.A.T.M.A.N. IV data */
struct batadv_hard_iface_bat_iv bat_iv;
@@ -455,8 +461,8 @@ struct batadv_orig_node {
spinlock_t tt_buff_lock;
/**
- * @tt_lock: prevents from updating the table while reading it. Table
- * update is made up by two operations (data structure update and
+ * @tt_lock: avoids concurrent read from and write to the table. Table
+ * update is made up of two operations (data structure update and
* metadata -CRC/TTVN-recalculation) and they have to be executed
* atomically in order to avoid another thread to read the
* table/metadata between those.
@@ -748,7 +754,7 @@ struct batadv_neigh_ifinfo {
* struct batadv_bcast_duplist_entry - structure for LAN broadcast suppression
*/
struct batadv_bcast_duplist_entry {
- /** @orig: mac address of orig node orginating the broadcast */
+ /** @orig: mac address of orig node originating the broadcast */
u8 orig[ETH_ALEN];
/** @crc: crc32 checksum of broadcast payload */
@@ -1010,7 +1016,7 @@ struct batadv_priv_tt {
/**
* @commit_lock: prevents from executing a local TT commit while reading
- * the local table. The local TT commit is made up by two operations
+ * the local table. The local TT commit is made up of two operations
* (data structure update and metadata -CRC/TTVN- recalculation) and
* they have to be executed atomically in order to avoid another thread
* to read the table/metadata between those.
@@ -1024,7 +1030,7 @@ struct batadv_priv_tt {
#ifdef CONFIG_BATMAN_ADV_BLA
/**
- * struct batadv_priv_bla - per mesh interface bridge loope avoidance data
+ * struct batadv_priv_bla - per mesh interface bridge loop avoidance data
*/
struct batadv_priv_bla {
/** @num_requests: number of bla requests in flight */
@@ -1718,7 +1724,7 @@ struct batadv_priv {
spinlock_t softif_vlan_list_lock;
#ifdef CONFIG_BATMAN_ADV_BLA
- /** @bla: bridge loope avoidance data */
+ /** @bla: bridge loop avoidance data */
struct batadv_priv_bla bla;
#endif
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index bb55d92691b0..cff4944d5b66 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -50,6 +50,7 @@ static bool enable_6lowpan;
/* We are listening incoming connections via this channel
*/
static struct l2cap_chan *listen_chan;
+static DEFINE_MUTEX(set_lock);
struct lowpan_peer {
struct list_head list;
@@ -1078,12 +1079,14 @@ static void do_enable_set(struct work_struct *work)
enable_6lowpan = set_enable->flag;
+ mutex_lock(&set_lock);
if (listen_chan) {
l2cap_chan_close(listen_chan, 0);
l2cap_chan_put(listen_chan);
}
listen_chan = bt_6lowpan_listen();
+ mutex_unlock(&set_lock);
kfree(set_enable);
}
@@ -1135,11 +1138,13 @@ static ssize_t lowpan_control_write(struct file *fp,
if (ret == -EINVAL)
return ret;
+ mutex_lock(&set_lock);
if (listen_chan) {
l2cap_chan_close(listen_chan, 0);
l2cap_chan_put(listen_chan);
listen_chan = NULL;
}
+ mutex_unlock(&set_lock);
if (conn) {
struct lowpan_peer *peer;
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index 1d6d243cdde9..e2497d764e97 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -21,7 +21,7 @@ menuconfig BT
It was designed as a replacement for cables and other short-range
technologies like IrDA. Bluetooth operates in personal area range
that typically extends up to 10 meters. More information about
- Bluetooth can be found at <http://www.bluetooth.com/>.
+ Bluetooth can be found at <https://www.bluetooth.com/>.
Linux Bluetooth subsystem consist of several layers:
Bluetooth Core
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index 41dd541a44a5..1c645fba8c49 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -14,7 +14,7 @@ bluetooth_6lowpan-y := 6lowpan.o
bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \
hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o lib.o \
- ecdh_helper.o hci_request.o mgmt_util.o
+ ecdh_helper.o hci_request.o mgmt_util.o mgmt_config.o
bluetooth-$(CONFIG_BT_BREDR) += sco.o
bluetooth-$(CONFIG_BT_HS) += a2mp.o amp.o
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 3fd124927d4d..4ef6a54403aa 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -286,6 +286,9 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (msg->msg_name && bt_sk(sk)->skb_msg_name)
bt_sk(sk)->skb_msg_name(skb, msg->msg_name,
&msg->msg_namelen);
+
+ if (bt_sk(sk)->skb_put_cmsg)
+ bt_sk(sk)->skb_put_cmsg(skb, msg, sk);
}
skb_free_datagram(sk, skb);
@@ -453,8 +456,6 @@ __poll_t bt_sock_poll(struct file *file, struct socket *sock,
struct sock *sk = sock->sk;
__poll_t mask = 0;
- BT_DBG("sock %p, sk %p", sock, sk);
-
poll_wait(file, sk_sleep(sk), wait);
if (sk->sk_state == BT_LISTEN)
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index cfd83c5521ae..d515571b2afb 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -182,8 +182,6 @@ static const struct proto_ops bnep_sock_ops = {
.recvmsg = sock_no_recvmsg,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
- .setsockopt = sock_no_setsockopt,
- .getsockopt = sock_no_getsockopt,
.connect = sock_no_connect,
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index defdd4871919..96d49d9fae96 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -185,8 +185,6 @@ static const struct proto_ops cmtp_sock_ops = {
.recvmsg = sock_no_recvmsg,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
- .setsockopt = sock_no_setsockopt,
- .getsockopt = sock_no_getsockopt,
.connect = sock_no_connect,
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 307800fd18e6..9832f8445d43 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -789,11 +789,8 @@ static void set_ext_conn_params(struct hci_conn *conn,
memset(p, 0, sizeof(*p));
- /* Set window to be the same value as the interval to
- * enable continuous scanning.
- */
- p->scan_interval = cpu_to_le16(hdev->le_scan_interval);
- p->scan_window = p->scan_interval;
+ p->scan_interval = cpu_to_le16(hdev->le_scan_int_connect);
+ p->scan_window = cpu_to_le16(hdev->le_scan_window_connect);
p->conn_interval_min = cpu_to_le16(conn->le_conn_min_interval);
p->conn_interval_max = cpu_to_le16(conn->le_conn_max_interval);
p->conn_latency = cpu_to_le16(conn->le_conn_latency);
@@ -875,11 +872,8 @@ static void hci_req_add_le_create_conn(struct hci_request *req,
memset(&cp, 0, sizeof(cp));
- /* Set window to be the same value as the interval to enable
- * continuous scanning.
- */
- cp.scan_interval = cpu_to_le16(hdev->le_scan_interval);
- cp.scan_window = cp.scan_interval;
+ cp.scan_interval = cpu_to_le16(hdev->le_scan_int_connect);
+ cp.scan_window = cpu_to_le16(hdev->le_scan_window_connect);
bacpy(&cp.peer_addr, &conn->dst);
cp.peer_addr_type = conn->dst_type;
@@ -937,7 +931,7 @@ static void hci_req_directed_advertising(struct hci_request *req,
* So it is required to remove adv set for handle 0x00. since we use
* instance 0 for directed adv.
*/
- hci_req_add(req, HCI_OP_LE_REMOVE_ADV_SET, sizeof(cp.handle), &cp.handle);
+ __hci_req_remove_ext_adv_instance(req, cp.handle);
hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(cp), &cp);
@@ -1009,6 +1003,11 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
struct hci_request req;
int err;
+ /* This ensures that during disable le_scan address resolution
+ * will not be disabled if it is followed by le_create_conn
+ */
+ bool rpa_le_conn = true;
+
/* Let's make sure that le is enabled.*/
if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) {
if (lmp_le_capable(hdev))
@@ -1109,7 +1108,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
* state.
*/
if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) {
- hci_req_add_le_scan_disable(&req);
+ hci_req_add_le_scan_disable(&req, rpa_le_conn);
hci_dev_set_flag(hdev, HCI_LE_SCAN_INTERRUPTED);
}
@@ -1180,7 +1179,8 @@ static int hci_explicit_conn_params_set(struct hci_dev *hdev,
/* This function requires the caller holds hdev->lock */
struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst,
u8 dst_type, u8 sec_level,
- u16 conn_timeout)
+ u16 conn_timeout,
+ enum conn_reasons conn_reason)
{
struct hci_conn *conn;
@@ -1225,6 +1225,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst,
conn->sec_level = BT_SECURITY_LOW;
conn->pending_sec_level = sec_level;
conn->conn_timeout = conn_timeout;
+ conn->conn_reason = conn_reason;
hci_update_background_scan(hdev);
@@ -1234,7 +1235,8 @@ done:
}
struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
- u8 sec_level, u8 auth_type)
+ u8 sec_level, u8 auth_type,
+ enum conn_reasons conn_reason)
{
struct hci_conn *acl;
@@ -1254,6 +1256,7 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
hci_conn_hold(acl);
+ acl->conn_reason = conn_reason;
if (acl->state == BT_OPEN || acl->state == BT_CLOSED) {
acl->sec_level = BT_SECURITY_LOW;
acl->pending_sec_level = sec_level;
@@ -1270,7 +1273,8 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
struct hci_conn *acl;
struct hci_conn *sco;
- acl = hci_connect_acl(hdev, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING);
+ acl = hci_connect_acl(hdev, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING,
+ CONN_REASON_SCO_CONNECT);
if (IS_ERR(acl))
return acl;
@@ -1323,6 +1327,23 @@ int hci_conn_check_link_mode(struct hci_conn *conn)
return 0;
}
+ /* AES encryption is required for Level 4:
+ *
+ * BLUETOOTH CORE SPECIFICATION Version 5.2 | Vol 3, Part C
+ * page 1319:
+ *
+ * 128-bit equivalent strength for link and encryption keys
+ * required using FIPS approved algorithms (E0 not allowed,
+ * SAFER+ not allowed, and P-192 not allowed; encryption key
+ * not shortened)
+ */
+ if (conn->sec_level == BT_SECURITY_FIPS &&
+ !test_bit(HCI_CONN_AES_CCM, &conn->flags)) {
+ bt_dev_err(conn->hdev,
+ "Invalid security: Missing AES-CCM usage");
+ return 0;
+ }
+
if (hci_conn_ssp_enabled(conn) &&
!test_bit(HCI_CONN_ENCRYPT, &conn->flags))
return 0;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index dbe2d79f233f..68bfe57b6625 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -26,7 +26,6 @@
/* Bluetooth HCI core. */
#include <linux/export.h>
-#include <linux/idr.h>
#include <linux/rfkill.h>
#include <linux/debugfs.h>
#include <linux/crypto.h>
@@ -606,7 +605,8 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt)
if (hdev->commands[8] & 0x01)
hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL);
- if (hdev->commands[18] & 0x04)
+ if (hdev->commands[18] & 0x04 &&
+ !test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks))
hci_req_add(req, HCI_OP_READ_DEF_ERR_DATA_REPORTING, 0, NULL);
/* Some older Broadcom based Bluetooth 1.2 controllers do not
@@ -763,6 +763,14 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt)
hci_req_add(req, HCI_OP_LE_CLEAR_RESOLV_LIST, 0, NULL);
}
+ if (hdev->commands[35] & 0x40) {
+ __le16 rpa_timeout = cpu_to_le16(hdev->rpa_timeout);
+
+ /* Set RPA timeout */
+ hci_req_add(req, HCI_OP_LE_SET_RPA_TIMEOUT, 2,
+ &rpa_timeout);
+ }
+
if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) {
/* Read LE Maximum Data Length */
hci_req_add(req, HCI_OP_LE_READ_MAX_DATA_LEN, 0, NULL);
@@ -851,7 +859,8 @@ static int hci_init4_req(struct hci_request *req, unsigned long opt)
/* Set erroneous data reporting if supported to the wideband speech
* setting value
*/
- if (hdev->commands[18] & 0x08) {
+ if (hdev->commands[18] & 0x08 &&
+ !test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks)) {
bool enabled = hci_dev_test_flag(hdev,
HCI_WIDEBAND_SPEECH_ENABLED);
@@ -2982,7 +2991,7 @@ int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags,
adv_instance->remaining_time = timeout;
if (duration == 0)
- adv_instance->duration = HCI_DEFAULT_ADV_DURATION;
+ adv_instance->duration = hdev->def_multi_adv_rotation_duration;
else
adv_instance->duration = duration;
@@ -2996,6 +3005,94 @@ int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags,
return 0;
}
+/* This function requires the caller holds hdev->lock */
+void hci_adv_monitors_clear(struct hci_dev *hdev)
+{
+ struct adv_monitor *monitor;
+ int handle;
+
+ idr_for_each_entry(&hdev->adv_monitors_idr, monitor, handle)
+ hci_free_adv_monitor(monitor);
+
+ idr_destroy(&hdev->adv_monitors_idr);
+}
+
+void hci_free_adv_monitor(struct adv_monitor *monitor)
+{
+ struct adv_pattern *pattern;
+ struct adv_pattern *tmp;
+
+ if (!monitor)
+ return;
+
+ list_for_each_entry_safe(pattern, tmp, &monitor->patterns, list)
+ kfree(pattern);
+
+ kfree(monitor);
+}
+
+/* This function requires the caller holds hdev->lock */
+int hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor)
+{
+ int min, max, handle;
+
+ if (!monitor)
+ return -EINVAL;
+
+ min = HCI_MIN_ADV_MONITOR_HANDLE;
+ max = HCI_MIN_ADV_MONITOR_HANDLE + HCI_MAX_ADV_MONITOR_NUM_HANDLES;
+ handle = idr_alloc(&hdev->adv_monitors_idr, monitor, min, max,
+ GFP_KERNEL);
+ if (handle < 0)
+ return handle;
+
+ hdev->adv_monitors_cnt++;
+ monitor->handle = handle;
+
+ hci_update_background_scan(hdev);
+
+ return 0;
+}
+
+static int free_adv_monitor(int id, void *ptr, void *data)
+{
+ struct hci_dev *hdev = data;
+ struct adv_monitor *monitor = ptr;
+
+ idr_remove(&hdev->adv_monitors_idr, monitor->handle);
+ hci_free_adv_monitor(monitor);
+
+ return 0;
+}
+
+/* This function requires the caller holds hdev->lock */
+int hci_remove_adv_monitor(struct hci_dev *hdev, u16 handle)
+{
+ struct adv_monitor *monitor;
+
+ if (handle) {
+ monitor = idr_find(&hdev->adv_monitors_idr, handle);
+ if (!monitor)
+ return -ENOENT;
+
+ idr_remove(&hdev->adv_monitors_idr, monitor->handle);
+ hci_free_adv_monitor(monitor);
+ } else {
+ /* Remove all monitors if handle is 0. */
+ idr_for_each(&hdev->adv_monitors_idr, &free_adv_monitor, hdev);
+ }
+
+ hci_update_background_scan(hdev);
+
+ return 0;
+}
+
+/* This function requires the caller holds hdev->lock */
+bool hci_is_adv_monitoring(struct hci_dev *hdev)
+{
+ return !idr_is_empty(&hdev->adv_monitors_idr);
+}
+
struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *bdaddr_list,
bdaddr_t *bdaddr, u8 type)
{
@@ -3023,6 +3120,20 @@ struct bdaddr_list_with_irk *hci_bdaddr_list_lookup_with_irk(
return NULL;
}
+struct bdaddr_list_with_flags *
+hci_bdaddr_list_lookup_with_flags(struct list_head *bdaddr_list,
+ bdaddr_t *bdaddr, u8 type)
+{
+ struct bdaddr_list_with_flags *b;
+
+ list_for_each_entry(b, bdaddr_list, list) {
+ if (!bacmp(&b->bdaddr, bdaddr) && b->bdaddr_type == type)
+ return b;
+ }
+
+ return NULL;
+}
+
void hci_bdaddr_list_clear(struct list_head *bdaddr_list)
{
struct bdaddr_list *b, *n;
@@ -3084,6 +3195,30 @@ int hci_bdaddr_list_add_with_irk(struct list_head *list, bdaddr_t *bdaddr,
return 0;
}
+int hci_bdaddr_list_add_with_flags(struct list_head *list, bdaddr_t *bdaddr,
+ u8 type, u32 flags)
+{
+ struct bdaddr_list_with_flags *entry;
+
+ if (!bacmp(bdaddr, BDADDR_ANY))
+ return -EBADF;
+
+ if (hci_bdaddr_list_lookup(list, bdaddr, type))
+ return -EEXIST;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ bacpy(&entry->bdaddr, bdaddr);
+ entry->bdaddr_type = type;
+ entry->current_flags = flags;
+
+ list_add(&entry->list, list);
+
+ return 0;
+}
+
int hci_bdaddr_list_del(struct list_head *list, bdaddr_t *bdaddr, u8 type)
{
struct bdaddr_list *entry;
@@ -3123,6 +3258,26 @@ int hci_bdaddr_list_del_with_irk(struct list_head *list, bdaddr_t *bdaddr,
return 0;
}
+int hci_bdaddr_list_del_with_flags(struct list_head *list, bdaddr_t *bdaddr,
+ u8 type)
+{
+ struct bdaddr_list_with_flags *entry;
+
+ if (!bacmp(bdaddr, BDADDR_ANY)) {
+ hci_bdaddr_list_clear(list);
+ return 0;
+ }
+
+ entry = hci_bdaddr_list_lookup_with_flags(list, bdaddr, type);
+ if (!entry)
+ return -ENOENT;
+
+ list_del(&entry->list);
+ kfree(entry);
+
+ return 0;
+}
+
/* This function requires the caller holds hdev->lock */
struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev,
bdaddr_t *addr, u8 addr_type)
@@ -3145,6 +3300,15 @@ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
{
struct hci_conn_params *param;
+ switch (addr_type) {
+ case ADDR_LE_DEV_PUBLIC_RESOLVED:
+ addr_type = ADDR_LE_DEV_PUBLIC;
+ break;
+ case ADDR_LE_DEV_RANDOM_RESOLVED:
+ addr_type = ADDR_LE_DEV_RANDOM;
+ break;
+ }
+
list_for_each_entry(param, list, action) {
if (bacmp(&param->addr, addr) == 0 &&
param->addr_type == addr_type)
@@ -3289,10 +3453,10 @@ static int hci_suspend_wait_event(struct hci_dev *hdev)
WAKE_COND, SUSPEND_NOTIFIER_TIMEOUT);
if (ret == 0) {
- bt_dev_dbg(hdev, "Timed out waiting for suspend");
+ bt_dev_err(hdev, "Timed out waiting for suspend events");
for (i = 0; i < __SUSPEND_NUM_TASKS; ++i) {
if (test_bit(i, hdev->suspend_tasks))
- bt_dev_dbg(hdev, "Bit %d is set", i);
+ bt_dev_err(hdev, "Suspend timeout bit: %d", i);
clear_bit(i, hdev->suspend_tasks);
}
@@ -3360,12 +3524,15 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action,
ret = hci_change_suspend_state(hdev, BT_RUNNING);
}
- /* If suspend failed, restore it to running */
- if (ret && action == PM_SUSPEND_PREPARE)
- hci_change_suspend_state(hdev, BT_RUNNING);
-
done:
- return ret ? notifier_from_errno(-EBUSY) : NOTIFY_STOP;
+ /* We always allow suspend even if suspend preparation failed and
+ * attempt to recover in resume.
+ */
+ if (ret)
+ bt_dev_err(hdev, "Suspend notifier action (%lu) failed: %d",
+ action, ret);
+
+ return NOTIFY_DONE;
}
/* Alloc HCI device */
@@ -3397,6 +3564,12 @@ struct hci_dev *hci_alloc_dev(void)
hdev->le_adv_max_interval = 0x0800;
hdev->le_scan_interval = 0x0060;
hdev->le_scan_window = 0x0030;
+ hdev->le_scan_int_suspend = 0x0400;
+ hdev->le_scan_window_suspend = 0x0012;
+ hdev->le_scan_int_discovery = DISCOV_LE_SCAN_INT;
+ hdev->le_scan_window_discovery = DISCOV_LE_SCAN_WIN;
+ hdev->le_scan_int_connect = 0x0060;
+ hdev->le_scan_window_connect = 0x0060;
hdev->le_conn_min_interval = 0x0018;
hdev->le_conn_max_interval = 0x0028;
hdev->le_conn_latency = 0x0000;
@@ -3412,6 +3585,8 @@ struct hci_dev *hci_alloc_dev(void)
hdev->le_tx_def_phys = HCI_LE_SET_PHY_1M;
hdev->le_rx_def_phys = HCI_LE_SET_PHY_1M;
hdev->le_num_of_adv_sets = HCI_MAX_ADV_INSTANCES;
+ hdev->def_multi_adv_rotation_duration = HCI_DEFAULT_ADV_DURATION;
+ hdev->def_le_autoconnect_timeout = HCI_LE_AUTOCONN_TIMEOUT;
hdev->rpa_timeout = HCI_DEFAULT_RPA_TIMEOUT;
hdev->discov_interleaved_timeout = DISCOV_INTERLEAVED_TIMEOUT;
@@ -3420,13 +3595,17 @@ struct hci_dev *hci_alloc_dev(void)
hdev->auth_payload_timeout = DEFAULT_AUTH_PAYLOAD_TIMEOUT;
hdev->min_enc_key_size = HCI_MIN_ENC_KEY_SIZE;
+ /* default 1.28 sec page scan */
+ hdev->def_page_scan_type = PAGE_SCAN_TYPE_STANDARD;
+ hdev->def_page_scan_int = 0x0800;
+ hdev->def_page_scan_window = 0x0012;
+
mutex_init(&hdev->lock);
mutex_init(&hdev->req_lock);
INIT_LIST_HEAD(&hdev->mgmt_pending);
INIT_LIST_HEAD(&hdev->blacklist);
INIT_LIST_HEAD(&hdev->whitelist);
- INIT_LIST_HEAD(&hdev->wakeable);
INIT_LIST_HEAD(&hdev->uuids);
INIT_LIST_HEAD(&hdev->link_keys);
INIT_LIST_HEAD(&hdev->long_term_keys);
@@ -3574,6 +3753,8 @@ int hci_register_dev(struct hci_dev *hdev)
queue_work(hdev->req_workqueue, &hdev->power_on);
+ idr_init(&hdev->adv_monitors_idr);
+
return id;
err_wqueue:
@@ -3603,9 +3784,10 @@ void hci_unregister_dev(struct hci_dev *hdev)
cancel_work_sync(&hdev->power_on);
- hci_dev_do_close(hdev);
-
unregister_pm_notifier(&hdev->suspend_notifier);
+ cancel_work_sync(&hdev->suspend_prepare);
+
+ hci_dev_do_close(hdev);
if (!test_bit(HCI_INIT, &hdev->flags) &&
!hci_dev_test_flag(hdev, HCI_SETUP) &&
@@ -3644,6 +3826,7 @@ void hci_unregister_dev(struct hci_dev *hdev)
hci_smp_irks_clear(hdev);
hci_remote_oob_data_clear(hdev);
hci_adv_instances_clear(hdev);
+ hci_adv_monitors_clear(hdev);
hci_bdaddr_list_clear(&hdev->le_white_list);
hci_bdaddr_list_clear(&hdev->le_resolv_list);
hci_conn_params_clear_all(hdev);
@@ -4551,6 +4734,7 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
if (conn) {
/* Send to upper protocol */
+ bt_cb(skb)->sco.pkt_status = flags & 0x03;
sco_recv_scodata(conn, skb);
return;
} else {
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index cfeaee347db3..4b7fc430793c 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1338,6 +1338,9 @@ static void store_pending_adv_report(struct hci_dev *hdev, bdaddr_t *bdaddr,
{
struct discovery_state *d = &hdev->discovery;
+ if (len > HCI_MAX_AD_LENGTH)
+ return;
+
bacpy(&d->last_adv_addr, bdaddr);
d->last_adv_addr_type = bdaddr_type;
d->last_adv_rssi = rssi;
@@ -2296,6 +2299,22 @@ static void cs_le_create_conn(struct hci_dev *hdev, bdaddr_t *peer_addr,
if (!conn)
return;
+ /* When using controller based address resolution, then the new
+ * address types 0x02 and 0x03 are used. These types need to be
+ * converted back into either public address or random address type
+ */
+ if (use_ll_privacy(hdev) &&
+ hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) {
+ switch (own_address_type) {
+ case ADDR_LE_DEV_PUBLIC_RESOLVED:
+ own_address_type = ADDR_LE_DEV_PUBLIC;
+ break;
+ case ADDR_LE_DEV_RANDOM_RESOLVED:
+ own_address_type = ADDR_LE_DEV_RANDOM;
+ break;
+ }
+ }
+
/* Store the initiator and responder address information which
* is needed for SMP. These values will not change during the
* lifetime of the connection.
@@ -2517,7 +2536,7 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb)
BT_DBG("%s num_rsp %d", hdev->name, num_rsp);
- if (!num_rsp)
+ if (!num_rsp || skb->len < num_rsp * sizeof(*info) + 1)
return;
if (hci_dev_test_flag(hdev, HCI_PERIODIC_INQ))
@@ -2697,10 +2716,10 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
*/
if (hci_dev_test_flag(hdev, HCI_MGMT) &&
!hci_dev_test_flag(hdev, HCI_CONNECTABLE) &&
- !hci_bdaddr_list_lookup(&hdev->whitelist, &ev->bdaddr,
- BDADDR_BREDR)) {
- hci_reject_conn(hdev, &ev->bdaddr);
- return;
+ !hci_bdaddr_list_lookup_with_flags(&hdev->whitelist, &ev->bdaddr,
+ BDADDR_BREDR)) {
+ hci_reject_conn(hdev, &ev->bdaddr);
+ return;
}
/* Connection accepted */
@@ -2825,7 +2844,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
case HCI_AUTO_CONN_LINK_LOSS:
if (ev->reason != HCI_ERROR_CONNECTION_TIMEOUT)
break;
- /* Fall through */
+ fallthrough;
case HCI_AUTO_CONN_DIRECT:
case HCI_AUTO_CONN_ALWAYS:
@@ -3065,27 +3084,23 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
+ /* Check link security requirements are met */
+ if (!hci_conn_check_link_mode(conn))
+ ev->status = HCI_ERROR_AUTH_FAILURE;
+
if (ev->status && conn->state == BT_CONNECTED) {
if (ev->status == HCI_ERROR_PIN_OR_KEY_MISSING)
set_bit(HCI_CONN_AUTH_FAILURE, &conn->flags);
+ /* Notify upper layers so they can cleanup before
+ * disconnecting.
+ */
+ hci_encrypt_cfm(conn, ev->status);
hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE);
hci_conn_drop(conn);
goto unlock;
}
- /* In Secure Connections Only mode, do not allow any connections
- * that are not encrypted with AES-CCM using a P-256 authenticated
- * combination key.
- */
- if (hci_dev_test_flag(hdev, HCI_SC_ONLY) &&
- (!test_bit(HCI_CONN_AES_CCM, &conn->flags) ||
- conn->key_type != HCI_LK_AUTH_COMBINATION_P256)) {
- hci_connect_cfm(conn, HCI_ERROR_AUTH_FAILURE);
- hci_conn_drop(conn);
- goto unlock;
- }
-
/* Try reading the encryption key size for encrypted ACL links */
if (!ev->status && ev->encrypt && conn->type == ACL_LINK) {
struct hci_cp_read_enc_key_size cp;
@@ -4163,6 +4178,9 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev,
struct inquiry_info_with_rssi_and_pscan_mode *info;
info = (void *) (skb->data + 1);
+ if (skb->len < num_rsp * sizeof(*info) + 1)
+ goto unlock;
+
for (; num_rsp; num_rsp--, info++) {
u32 flags;
@@ -4184,6 +4202,9 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev,
} else {
struct inquiry_info_with_rssi *info = (void *) (skb->data + 1);
+ if (skb->len < num_rsp * sizeof(*info) + 1)
+ goto unlock;
+
for (; num_rsp; num_rsp--, info++) {
u32 flags;
@@ -4204,6 +4225,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev,
}
}
+unlock:
hci_dev_unlock(hdev);
}
@@ -4324,7 +4346,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
if (hci_setup_sync(conn, conn->link->handle))
goto unlock;
}
- /* fall through */
+ fallthrough;
default:
conn->state = BT_CLOSED;
@@ -4379,7 +4401,7 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev,
BT_DBG("%s num_rsp %d", hdev->name, num_rsp);
- if (!num_rsp)
+ if (!num_rsp || skb->len < num_rsp * sizeof(*info) + 1)
return;
if (hci_dev_test_flag(hdev, HCI_PERIODIC_INQ))
@@ -5209,6 +5231,11 @@ static void hci_le_enh_conn_complete_evt(struct hci_dev *hdev,
le16_to_cpu(ev->interval),
le16_to_cpu(ev->latency),
le16_to_cpu(ev->supervision_timeout));
+
+ if (use_ll_privacy(hdev) &&
+ hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) &&
+ hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION))
+ hci_req_disable_address_resolution(hdev);
}
static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb)
@@ -5319,7 +5346,7 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
}
conn = hci_connect_le(hdev, addr, addr_type, BT_SECURITY_LOW,
- HCI_LE_AUTOCONN_TIMEOUT, HCI_ROLE_MASTER,
+ hdev->def_le_autoconnect_timeout, HCI_ROLE_MASTER,
direct_rpa);
if (!IS_ERR(conn)) {
/* If HCI_AUTO_CONN_EXPLICIT is set, conn is already owned
@@ -5355,7 +5382,8 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
u8 bdaddr_type, bdaddr_t *direct_addr,
- u8 direct_addr_type, s8 rssi, u8 *data, u8 len)
+ u8 direct_addr_type, s8 rssi, u8 *data, u8 len,
+ bool ext_adv)
{
struct discovery_state *d = &hdev->discovery;
struct smp_irk *irk;
@@ -5377,6 +5405,11 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
return;
}
+ if (!ext_adv && len > HCI_MAX_AD_LENGTH) {
+ bt_dev_err_ratelimited(hdev, "legacy adv larger than 31 bytes");
+ return;
+ }
+
/* Find the end of the data in case the report contains padded zero
* bytes at the end causing an invalid length value.
*
@@ -5437,7 +5470,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
*/
conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, type,
direct_addr);
- if (conn && type == LE_ADV_IND) {
+ if (!ext_adv && conn && type == LE_ADV_IND && len <= HCI_MAX_AD_LENGTH) {
/* Store report for later inclusion by
* mgmt_device_connected
*/
@@ -5447,14 +5480,15 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
/* Passive scanning shouldn't trigger any device found events,
* except for devices marked as CONN_REPORT for which we do send
- * device found events.
+ * device found events, or advertisement monitoring requested.
*/
if (hdev->le_scan_type == LE_SCAN_PASSIVE) {
if (type == LE_ADV_DIRECT_IND)
return;
if (!hci_pend_le_action_lookup(&hdev->pend_le_reports,
- bdaddr, bdaddr_type))
+ bdaddr, bdaddr_type) &&
+ idr_is_empty(&hdev->adv_monitors_idr))
return;
if (type == LE_ADV_NONCONN_IND || type == LE_ADV_SCAN_IND)
@@ -5491,7 +5525,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
* event or send an immediate device found event if the data
* should not be stored for later.
*/
- if (!has_pending_adv_report(hdev)) {
+ if (!ext_adv && !has_pending_adv_report(hdev)) {
/* If the report will trigger a SCAN_REQ store it for
* later merging.
*/
@@ -5526,7 +5560,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
/* If the new report will trigger a SCAN_REQ store it for
* later merging.
*/
- if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) {
+ if (!ext_adv && (type == LE_ADV_IND ||
+ type == LE_ADV_SCAN_IND)) {
store_pending_adv_report(hdev, bdaddr, bdaddr_type,
rssi, flags, data, len);
return;
@@ -5566,7 +5601,7 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb)
rssi = ev->data[ev->length];
process_adv_report(hdev, ev->evt_type, &ev->bdaddr,
ev->bdaddr_type, NULL, 0, rssi,
- ev->data, ev->length);
+ ev->data, ev->length, false);
} else {
bt_dev_err(hdev, "Dropping invalid advertising data");
}
@@ -5638,7 +5673,8 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb)
if (legacy_evt_type != LE_ADV_INVALID) {
process_adv_report(hdev, legacy_evt_type, &ev->bdaddr,
ev->bdaddr_type, NULL, 0, ev->rssi,
- ev->data, ev->length);
+ ev->data, ev->length,
+ !(evt_type & LE_EXT_ADV_LEGACY_PDU));
}
ptr += sizeof(*ev) + ev->length;
@@ -5836,7 +5872,8 @@ static void hci_le_direct_adv_report_evt(struct hci_dev *hdev,
process_adv_report(hdev, ev->evt_type, &ev->bdaddr,
ev->bdaddr_type, &ev->direct_addr,
- ev->direct_addr_type, ev->rssi, NULL, 0);
+ ev->direct_addr_type, ev->rssi, NULL, 0,
+ false);
ptr += sizeof(*ev);
}
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 1fc55685da62..e0269192f2e5 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -34,9 +34,6 @@
#define HCI_REQ_PEND 1
#define HCI_REQ_CANCELED 2
-#define LE_SUSPEND_SCAN_WINDOW 0x0012
-#define LE_SUSPEND_SCAN_INTERVAL 0x0400
-
void hci_req_init(struct hci_request *req, struct hci_dev *hdev)
{
skb_queue_head_init(&req->cmd_q);
@@ -366,13 +363,11 @@ void __hci_req_write_fast_connectable(struct hci_request *req, bool enable)
/* 160 msec page scan interval */
acp.interval = cpu_to_le16(0x0100);
} else {
- type = PAGE_SCAN_TYPE_STANDARD; /* default */
-
- /* default 1.28 sec page scan */
- acp.interval = cpu_to_le16(0x0800);
+ type = hdev->def_page_scan_type;
+ acp.interval = cpu_to_le16(hdev->def_page_scan_int);
}
- acp.window = cpu_to_le16(0x0012);
+ acp.window = cpu_to_le16(hdev->def_page_scan_window);
if (__cpu_to_le16(hdev->page_scan_interval) != acp.interval ||
__cpu_to_le16(hdev->page_scan_window) != acp.window)
@@ -418,18 +413,22 @@ static void __hci_update_background_scan(struct hci_request *req)
*/
hci_discovery_filter_clear(hdev);
+ BT_DBG("%s ADV monitoring is %s", hdev->name,
+ hci_is_adv_monitoring(hdev) ? "on" : "off");
+
if (list_empty(&hdev->pend_le_conns) &&
- list_empty(&hdev->pend_le_reports)) {
+ list_empty(&hdev->pend_le_reports) &&
+ !hci_is_adv_monitoring(hdev)) {
/* If there is no pending LE connections or devices
- * to be scanned for, we should stop the background
- * scanning.
+ * to be scanned for or no ADV monitors, we should stop the
+ * background scanning.
*/
/* If controller is not scanning we are done. */
if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
return;
- hci_req_add_le_scan_disable(req);
+ hci_req_add_le_scan_disable(req, false);
BT_DBG("%s stopping background scanning", hdev->name);
} else {
@@ -448,7 +447,7 @@ static void __hci_update_background_scan(struct hci_request *req)
* don't miss any advertising (due to duplicates filter).
*/
if (hci_dev_test_flag(hdev, HCI_LE_SCAN))
- hci_req_add_le_scan_disable(req);
+ hci_req_add_le_scan_disable(req, false);
hci_req_add_le_passive_scan(req);
@@ -653,7 +652,7 @@ void __hci_req_update_eir(struct hci_request *req)
hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
}
-void hci_req_add_le_scan_disable(struct hci_request *req)
+void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn)
{
struct hci_dev *hdev = req->hdev;
@@ -676,6 +675,15 @@ void hci_req_add_le_scan_disable(struct hci_request *req)
cp.enable = LE_SCAN_DISABLE;
hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
}
+
+ /* Disable address resolution */
+ if (use_ll_privacy(hdev) &&
+ hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) &&
+ hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION) && !rpa_le_conn) {
+ __u8 enable = 0x00;
+
+ hci_req_add(req, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE, 1, &enable);
+ }
}
static void del_from_white_list(struct hci_request *req, bdaddr_t *bdaddr,
@@ -689,6 +697,21 @@ static void del_from_white_list(struct hci_request *req, bdaddr_t *bdaddr,
bt_dev_dbg(req->hdev, "Remove %pMR (0x%x) from whitelist", &cp.bdaddr,
cp.bdaddr_type);
hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, sizeof(cp), &cp);
+
+ if (use_ll_privacy(req->hdev)) {
+ struct smp_irk *irk;
+
+ irk = hci_find_irk_by_addr(req->hdev, bdaddr, bdaddr_type);
+ if (irk) {
+ struct hci_cp_le_del_from_resolv_list cp;
+
+ cp.bdaddr_type = bdaddr_type;
+ bacpy(&cp.bdaddr, bdaddr);
+
+ hci_req_add(req, HCI_OP_LE_DEL_FROM_RESOLV_LIST,
+ sizeof(cp), &cp);
+ }
+ }
}
/* Adds connection to white list if needed. On error, returns -1. */
@@ -709,13 +732,14 @@ static int add_to_white_list(struct hci_request *req,
return -1;
/* White list can not be used with RPAs */
- if (!allow_rpa &&
+ if (!allow_rpa && !use_ll_privacy(hdev) &&
hci_find_irk_by_addr(hdev, &params->addr, params->addr_type)) {
return -1;
}
/* During suspend, only wakeable devices can be in whitelist */
- if (hdev->suspended && !params->wakeable)
+ if (hdev->suspended && !hci_conn_test_flag(HCI_CONN_FLAG_REMOTE_WAKEUP,
+ params->current_flags))
return 0;
*num_entries += 1;
@@ -726,6 +750,28 @@ static int add_to_white_list(struct hci_request *req,
cp.bdaddr_type);
hci_req_add(req, HCI_OP_LE_ADD_TO_WHITE_LIST, sizeof(cp), &cp);
+ if (use_ll_privacy(hdev)) {
+ struct smp_irk *irk;
+
+ irk = hci_find_irk_by_addr(hdev, &params->addr,
+ params->addr_type);
+ if (irk) {
+ struct hci_cp_le_add_to_resolv_list cp;
+
+ cp.bdaddr_type = params->addr_type;
+ bacpy(&cp.bdaddr, &params->addr);
+ memcpy(cp.peer_irk, irk->val, 16);
+
+ if (hci_dev_test_flag(hdev, HCI_PRIVACY))
+ memcpy(cp.local_irk, hdev->irk, 16);
+ else
+ memset(cp.local_irk, 0, 16);
+
+ hci_req_add(req, HCI_OP_LE_ADD_TO_RESOLV_LIST,
+ sizeof(cp), &cp);
+ }
+ }
+
return 0;
}
@@ -766,7 +812,7 @@ static u8 update_white_list(struct hci_request *req)
}
/* White list can not be used with RPAs */
- if (!allow_rpa &&
+ if (!allow_rpa && !use_ll_privacy(hdev) &&
hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) {
return 0x00;
}
@@ -798,6 +844,14 @@ static u8 update_white_list(struct hci_request *req)
return 0x00;
}
+ /* Once the controller offloading of advertisement monitor is in place,
+ * the if condition should include the support of MSFT extension
+ * support. If suspend is ongoing, whitelist should be the default to
+ * prevent waking by random advertisements.
+ */
+ if (!idr_is_empty(&hdev->adv_monitors_idr) && !hdev->suspended)
+ return 0x00;
+
/* Select filter policy to use white list */
return 0x01;
}
@@ -808,10 +862,24 @@ static bool scan_use_rpa(struct hci_dev *hdev)
}
static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval,
- u16 window, u8 own_addr_type, u8 filter_policy)
+ u16 window, u8 own_addr_type, u8 filter_policy,
+ bool addr_resolv)
{
struct hci_dev *hdev = req->hdev;
+ if (hdev->scanning_paused) {
+ bt_dev_dbg(hdev, "Scanning is paused for suspend");
+ return;
+ }
+
+ if (use_ll_privacy(hdev) &&
+ hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) &&
+ addr_resolv) {
+ u8 enable = 0x01;
+
+ hci_req_add(req, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE, 1, &enable);
+ }
+
/* Use ext scanning if set ext scan param and ext scan enable is
* supported
*/
@@ -885,12 +953,39 @@ static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval,
}
}
+/* Returns true if an le connection is in the scanning state */
+static inline bool hci_is_le_conn_scanning(struct hci_dev *hdev)
+{
+ struct hci_conn_hash *h = &hdev->conn_hash;
+ struct hci_conn *c;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(c, &h->list, list) {
+ if (c->type == LE_LINK && c->state == BT_CONNECT &&
+ test_bit(HCI_CONN_SCANNING, &c->flags)) {
+ rcu_read_unlock();
+ return true;
+ }
+ }
+
+ rcu_read_unlock();
+
+ return false;
+}
+
+/* Ensure to call hci_req_add_le_scan_disable() first to disable the
+ * controller based address resolution to be able to reconfigure
+ * resolving list.
+ */
void hci_req_add_le_passive_scan(struct hci_request *req)
{
struct hci_dev *hdev = req->hdev;
u8 own_addr_type;
u8 filter_policy;
u16 window, interval;
+ /* Background scanning should run with address resolution */
+ bool addr_resolv = true;
if (hdev->scanning_paused) {
bt_dev_dbg(hdev, "Scanning is paused for suspend");
@@ -927,8 +1022,11 @@ void hci_req_add_le_passive_scan(struct hci_request *req)
filter_policy |= 0x02;
if (hdev->suspended) {
- window = LE_SUSPEND_SCAN_WINDOW;
- interval = LE_SUSPEND_SCAN_INTERVAL;
+ window = hdev->le_scan_window_suspend;
+ interval = hdev->le_scan_int_suspend;
+ } else if (hci_is_le_conn_scanning(hdev)) {
+ window = hdev->le_scan_window_connect;
+ interval = hdev->le_scan_int_connect;
} else {
window = hdev->le_scan_window;
interval = hdev->le_scan_interval;
@@ -936,7 +1034,7 @@ void hci_req_add_le_passive_scan(struct hci_request *req)
bt_dev_dbg(hdev, "LE passive scan with whitelist = %d", filter_policy);
hci_req_start_scan(req, LE_SCAN_PASSIVE, interval, window,
- own_addr_type, filter_policy);
+ own_addr_type, filter_policy, addr_resolv);
}
static u8 get_adv_instance_scan_rsp_len(struct hci_dev *hdev, u8 instance)
@@ -973,15 +1071,19 @@ static void hci_req_clear_event_filter(struct hci_request *req)
static void hci_req_set_event_filter(struct hci_request *req)
{
- struct bdaddr_list *b;
+ struct bdaddr_list_with_flags *b;
struct hci_cp_set_event_filter f;
struct hci_dev *hdev = req->hdev;
- u8 scan;
+ u8 scan = SCAN_DISABLED;
/* Always clear event filter when starting */
hci_req_clear_event_filter(req);
- list_for_each_entry(b, &hdev->wakeable, list) {
+ list_for_each_entry(b, &hdev->whitelist, list) {
+ if (!hci_conn_test_flag(HCI_CONN_FLAG_REMOTE_WAKEUP,
+ b->current_flags))
+ continue;
+
memset(&f, 0, sizeof(f));
bacpy(&f.addr_conn_flt.bdaddr, &b->bdaddr);
f.flt_type = HCI_FLT_CONN_SETUP;
@@ -990,16 +1092,17 @@ static void hci_req_set_event_filter(struct hci_request *req)
bt_dev_dbg(hdev, "Adding event filters for %pMR", &b->bdaddr);
hci_req_add(req, HCI_OP_SET_EVENT_FLT, sizeof(f), &f);
+ scan = SCAN_PAGE;
}
- scan = !list_empty(&hdev->wakeable) ? SCAN_PAGE : SCAN_DISABLED;
hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
}
static void hci_req_config_le_suspend_scan(struct hci_request *req)
{
- /* Can't change params without disabling first */
- hci_req_add_le_scan_disable(req);
+ /* Before changing params disable scan if enabled */
+ if (hci_dev_test_flag(req->hdev, HCI_LE_SCAN))
+ hci_req_add_le_scan_disable(req, false);
/* Configure params and enable scanning */
hci_req_add_le_passive_scan(req);
@@ -1065,8 +1168,9 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next)
page_scan = SCAN_DISABLED;
hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &page_scan);
- /* Disable LE passive scan */
- hci_req_add_le_scan_disable(&req);
+ /* Disable LE passive scan if enabled */
+ if (hci_dev_test_flag(hdev, HCI_LE_SCAN))
+ hci_req_add_le_scan_disable(&req, false);
/* Mark task needing completion */
set_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks);
@@ -1160,13 +1264,8 @@ static u8 get_cur_adv_instance_scan_rsp_len(struct hci_dev *hdev)
void __hci_req_disable_advertising(struct hci_request *req)
{
if (ext_adv_capable(req->hdev)) {
- struct hci_cp_le_set_ext_adv_enable cp;
-
- cp.enable = 0x00;
- /* Disable all sets since we only support one set at the moment */
- cp.num_of_sets = 0x00;
+ __hci_req_disable_ext_adv_instance(req, 0x00);
- hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_ENABLE, sizeof(cp), &cp);
} else {
u8 enable = 0x00;
@@ -1627,6 +1726,28 @@ int hci_req_update_adv_data(struct hci_dev *hdev, u8 instance)
return hci_req_run(&req, NULL);
}
+static void enable_addr_resolution_complete(struct hci_dev *hdev, u8 status,
+ u16 opcode)
+{
+ BT_DBG("%s status %u", hdev->name, status);
+}
+
+void hci_req_disable_address_resolution(struct hci_dev *hdev)
+{
+ struct hci_request req;
+ __u8 enable = 0x00;
+
+ if (!use_ll_privacy(hdev) &&
+ !hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION))
+ return;
+
+ hci_req_init(&req, hdev);
+
+ hci_req_add(&req, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE, 1, &enable);
+
+ hci_req_run(&req, enable_addr_resolution_complete);
+}
+
static void adv_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode)
{
BT_DBG("%s status %u", hdev->name, status);
@@ -1786,8 +1907,6 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance)
int err;
struct adv_info *adv_instance;
bool secondary_adv;
- /* In ext adv set param interval is 3 octets */
- const u8 adv_interval[3] = { 0x00, 0x08, 0x00 };
if (instance > 0) {
adv_instance = hci_find_adv_instance(hdev, instance);
@@ -1820,8 +1939,9 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance)
memset(&cp, 0, sizeof(cp));
- memcpy(cp.min_interval, adv_interval, sizeof(cp.min_interval));
- memcpy(cp.max_interval, adv_interval, sizeof(cp.max_interval));
+ /* In ext adv set param interval is 3 octets */
+ hci_cpu_to_le24(hdev->le_adv_min_interval, cp.min_interval);
+ hci_cpu_to_le24(hdev->le_adv_max_interval, cp.max_interval);
secondary_adv = (flags & MGMT_ADV_FLAG_SEC_MASK);
@@ -1932,13 +2052,59 @@ int __hci_req_enable_ext_advertising(struct hci_request *req, u8 instance)
return 0;
}
+int __hci_req_disable_ext_adv_instance(struct hci_request *req, u8 instance)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct hci_cp_le_set_ext_adv_enable *cp;
+ struct hci_cp_ext_adv_set *adv_set;
+ u8 data[sizeof(*cp) + sizeof(*adv_set) * 1];
+ u8 req_size;
+
+ /* If request specifies an instance that doesn't exist, fail */
+ if (instance > 0 && !hci_find_adv_instance(hdev, instance))
+ return -EINVAL;
+
+ memset(data, 0, sizeof(data));
+
+ cp = (void *)data;
+ adv_set = (void *)cp->data;
+
+ /* Instance 0x00 indicates all advertising instances will be disabled */
+ cp->num_of_sets = !!instance;
+ cp->enable = 0x00;
+
+ adv_set->handle = instance;
+
+ req_size = sizeof(*cp) + sizeof(*adv_set) * cp->num_of_sets;
+ hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_ENABLE, req_size, data);
+
+ return 0;
+}
+
+int __hci_req_remove_ext_adv_instance(struct hci_request *req, u8 instance)
+{
+ struct hci_dev *hdev = req->hdev;
+
+ /* If request specifies an instance that doesn't exist, fail */
+ if (instance > 0 && !hci_find_adv_instance(hdev, instance))
+ return -EINVAL;
+
+ hci_req_add(req, HCI_OP_LE_REMOVE_ADV_SET, sizeof(instance), &instance);
+
+ return 0;
+}
+
int __hci_req_start_ext_adv(struct hci_request *req, u8 instance)
{
struct hci_dev *hdev = req->hdev;
+ struct adv_info *adv_instance = hci_find_adv_instance(hdev, instance);
int err;
- if (hci_dev_test_flag(hdev, HCI_LE_ADV))
- __hci_req_disable_advertising(req);
+ /* If instance isn't pending, the chip knows about it, and it's safe to
+ * disable
+ */
+ if (adv_instance && !adv_instance->pending)
+ __hci_req_disable_ext_adv_instance(req, instance);
err = __hci_req_setup_ext_adv_instance(req, instance);
if (err < 0)
@@ -2086,7 +2252,7 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk,
hci_dev_test_flag(hdev, HCI_ADVERTISING))
return;
- if (next_instance)
+ if (next_instance && !ext_adv_capable(hdev))
__hci_req_schedule_adv_instance(req, next_instance->instance,
false);
}
@@ -2128,7 +2294,13 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
if (use_rpa) {
int to;
- *own_addr_type = ADDR_LE_DEV_RANDOM;
+ /* If Controller supports LL Privacy use own address type is
+ * 0x03
+ */
+ if (use_ll_privacy(hdev))
+ *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED;
+ else
+ *own_addr_type = ADDR_LE_DEV_RANDOM;
if (!hci_dev_test_and_clear_flag(hdev, HCI_RPA_EXPIRED) &&
!bacmp(&hdev->random_addr, &hdev->rpa))
@@ -2547,7 +2719,7 @@ static void bg_scan_update(struct work_struct *work)
static int le_scan_disable(struct hci_request *req, unsigned long opt)
{
- hci_req_add_le_scan_disable(req);
+ hci_req_add_le_scan_disable(req, false);
return 0;
}
@@ -2645,7 +2817,12 @@ static int le_scan_restart(struct hci_request *req, unsigned long opt)
if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
return 0;
- hci_req_add_le_scan_disable(req);
+ if (hdev->scanning_paused) {
+ bt_dev_dbg(hdev, "Scanning is paused for suspend");
+ return 0;
+ }
+
+ hci_req_add_le_scan_disable(req, false);
if (use_ext_scan(hdev)) {
struct hci_cp_le_set_ext_scan_enable ext_enable_cp;
@@ -2725,6 +2902,8 @@ static int active_scan(struct hci_request *req, unsigned long opt)
u8 own_addr_type;
/* White list is not used for discovery */
u8 filter_policy = 0x00;
+ /* Discovery doesn't require controller address resolution */
+ bool addr_resolv = false;
int err;
BT_DBG("%s", hdev->name);
@@ -2734,7 +2913,7 @@ static int active_scan(struct hci_request *req, unsigned long opt)
* discovery scanning parameters.
*/
if (hci_dev_test_flag(hdev, HCI_LE_SCAN))
- hci_req_add_le_scan_disable(req);
+ hci_req_add_le_scan_disable(req, false);
/* All active scans will be done with either a resolvable private
* address (when privacy feature has been enabled) or non-resolvable
@@ -2745,8 +2924,9 @@ static int active_scan(struct hci_request *req, unsigned long opt)
if (err < 0)
own_addr_type = ADDR_LE_DEV_PUBLIC;
- hci_req_start_scan(req, LE_SCAN_ACTIVE, interval, DISCOV_LE_SCAN_WIN,
- own_addr_type, filter_policy);
+ hci_req_start_scan(req, LE_SCAN_ACTIVE, interval,
+ hdev->le_scan_window_discovery, own_addr_type,
+ filter_policy, addr_resolv);
return 0;
}
@@ -2793,18 +2973,18 @@ static void start_discovery(struct hci_dev *hdev, u8 *status)
* to do BR/EDR inquiry.
*/
hci_req_sync(hdev, interleaved_discov,
- DISCOV_LE_SCAN_INT * 2, HCI_CMD_TIMEOUT,
+ hdev->le_scan_int_discovery * 2, HCI_CMD_TIMEOUT,
status);
break;
}
timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout);
- hci_req_sync(hdev, active_scan, DISCOV_LE_SCAN_INT,
+ hci_req_sync(hdev, active_scan, hdev->le_scan_int_discovery,
HCI_CMD_TIMEOUT, status);
break;
case DISCOV_TYPE_LE:
timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT);
- hci_req_sync(hdev, active_scan, DISCOV_LE_SCAN_INT,
+ hci_req_sync(hdev, active_scan, hdev->le_scan_int_discovery,
HCI_CMD_TIMEOUT, status);
break;
default:
@@ -2848,14 +3028,14 @@ bool hci_req_stop_discovery(struct hci_request *req)
if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) {
cancel_delayed_work(&hdev->le_scan_disable);
- hci_req_add_le_scan_disable(req);
+ hci_req_add_le_scan_disable(req, false);
}
ret = true;
} else {
/* Passive scanning */
if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) {
- hci_req_add_le_scan_disable(req);
+ hci_req_add_le_scan_disable(req, false);
ret = true;
}
}
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index 0e81614d235e..6a12e84c66c4 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -65,11 +65,12 @@ void __hci_req_write_fast_connectable(struct hci_request *req, bool enable);
void __hci_req_update_name(struct hci_request *req);
void __hci_req_update_eir(struct hci_request *req);
-void hci_req_add_le_scan_disable(struct hci_request *req);
+void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn);
void hci_req_add_le_passive_scan(struct hci_request *req);
void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next);
+void hci_req_disable_address_resolution(struct hci_dev *hdev);
void hci_req_reenable_advertising(struct hci_dev *hdev);
void __hci_req_enable_advertising(struct hci_request *req);
void __hci_req_disable_advertising(struct hci_request *req);
@@ -86,6 +87,8 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk,
int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance);
int __hci_req_start_ext_adv(struct hci_request *req, u8 instance);
int __hci_req_enable_ext_advertising(struct hci_request *req, u8 instance);
+int __hci_req_disable_ext_adv_instance(struct hci_request *req, u8 instance);
+int __hci_req_remove_ext_adv_instance(struct hci_request *req, u8 instance);
void __hci_req_clear_ext_adv_sets(struct hci_request *req);
int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
bool use_rpa, struct adv_info *adv_instance,
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index caf38a8ea6a8..251b9128f530 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -52,7 +52,7 @@ struct hci_pinfo {
struct bt_sock bt;
struct hci_dev *hdev;
struct hci_filter filter;
- __u32 cmsg_mask;
+ __u8 cmsg_mask;
unsigned short channel;
unsigned long flags;
__u32 cookie;
@@ -443,8 +443,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
case HCI_DEV_SETUP:
if (hdev->manufacturer == 0xffff)
return NULL;
-
- /* fall through */
+ fallthrough;
case HCI_DEV_UP:
skb = bt_skb_alloc(HCI_MON_INDEX_INFO_SIZE, GFP_ATOMIC);
@@ -1399,7 +1398,7 @@ done:
static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg,
struct sk_buff *skb)
{
- __u32 mask = hci_pi(sk)->cmsg_mask;
+ __u8 mask = hci_pi(sk)->cmsg_mask;
if (mask & HCI_CMSG_DIR) {
int incoming = bt_cb(skb)->incoming;
@@ -1842,7 +1841,7 @@ drop:
}
static int hci_sock_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int len)
+ sockptr_t optval, unsigned int len)
{
struct hci_ufilter uf = { .opcode = 0 };
struct sock *sk = sock->sk;
@@ -1862,7 +1861,7 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname,
switch (optname) {
case HCI_DATA_DIR:
- if (get_user(opt, (int __user *)optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(opt))) {
err = -EFAULT;
break;
}
@@ -1874,7 +1873,7 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case HCI_TIME_STAMP:
- if (get_user(opt, (int __user *)optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(opt))) {
err = -EFAULT;
break;
}
@@ -1896,7 +1895,7 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname,
}
len = min_t(unsigned int, len, sizeof(uf));
- if (copy_from_user(&uf, optval, len)) {
+ if (copy_from_sockptr(&uf, optval, len)) {
err = -EFAULT;
break;
}
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 03be6a4baef3..595fb3c9d6c3 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -233,8 +233,6 @@ static const struct proto_ops hidp_sock_ops = {
.recvmsg = sock_no_recvmsg,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
- .setsockopt = sock_no_setsockopt,
- .getsockopt = sock_no_getsockopt,
.connect = sock_no_connect,
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index fe913a5c754a..ade83e224567 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -666,8 +666,7 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err)
l2cap_seq_list_free(&chan->srej_list);
l2cap_seq_list_free(&chan->retrans_list);
-
- /* fall through */
+ fallthrough;
case L2CAP_MODE_STREAMING:
skb_queue_purge(&chan->tx_q);
@@ -872,7 +871,8 @@ static inline u8 l2cap_get_auth_type(struct l2cap_chan *chan)
else
return HCI_AT_NO_BONDING;
}
- /* fall through */
+ fallthrough;
+
default:
switch (chan->sec_level) {
case BT_SECURITY_HIGH:
@@ -2983,8 +2983,7 @@ static void l2cap_tx_state_wait_f(struct l2cap_chan *chan,
break;
case L2CAP_EV_RECV_REQSEQ_AND_FBIT:
l2cap_process_reqseq(chan, control->reqseq);
-
- /* Fall through */
+ fallthrough;
case L2CAP_EV_RECV_FBIT:
if (control && control->final) {
@@ -3311,7 +3310,7 @@ static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask)
case L2CAP_MODE_ERTM:
if (l2cap_mode_supported(mode, remote_feat_mask))
return mode;
- /* fall through */
+ fallthrough;
default:
return L2CAP_MODE_BASIC;
}
@@ -3447,7 +3446,7 @@ static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data, size_t data
if (__l2cap_efs_supported(chan->conn))
set_bit(FLAG_EFS_ENABLE, &chan->flags);
- /* fall through */
+ fallthrough;
default:
chan->mode = l2cap_select_mode(rfc.mode, chan->conn->feat_mask);
break;
@@ -4539,7 +4538,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn,
goto done;
break;
}
- /* fall through */
+ fallthrough;
default:
l2cap_chan_set_err(chan, ECONNRESET);
@@ -7719,7 +7718,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
conn->mtu = hcon->hdev->le_mtu;
break;
}
- /* fall through */
+ fallthrough;
default:
conn->mtu = hcon->hdev->acl_mtu;
break;
@@ -7841,7 +7840,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
case L2CAP_MODE_STREAMING:
if (!disable_ertm)
break;
- /* fall through */
+ fallthrough;
default:
err = -EOPNOTSUPP;
goto done;
@@ -7893,11 +7892,13 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
else
hcon = hci_connect_le_scan(hdev, dst, dst_type,
chan->sec_level,
- HCI_LE_CONN_TIMEOUT);
+ HCI_LE_CONN_TIMEOUT,
+ CONN_REASON_L2CAP_CHAN);
} else {
u8 auth_type = l2cap_get_auth_type(chan);
- hcon = hci_connect_acl(hdev, dst, chan->sec_level, auth_type);
+ hcon = hci_connect_acl(hdev, dst, chan->sec_level, auth_type,
+ CONN_REASON_L2CAP_CHAN);
}
if (IS_ERR(hcon)) {
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index a995d2c51fa7..e1a3e66b1754 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -284,7 +284,7 @@ static int l2cap_sock_listen(struct socket *sock, int backlog)
case L2CAP_MODE_STREAMING:
if (!disable_ertm)
break;
- /* fall through */
+ fallthrough;
default:
err = -EOPNOTSUPP;
goto done;
@@ -703,7 +703,7 @@ static bool l2cap_valid_mtu(struct l2cap_chan *chan, u16 mtu)
}
static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct l2cap_chan *chan = l2cap_pi(sk)->chan;
@@ -736,7 +736,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
opts.txwin_size = chan->tx_win;
len = min_t(unsigned int, sizeof(opts), optlen);
- if (copy_from_user((char *) &opts, optval, len)) {
+ if (copy_from_sockptr(&opts, optval, len)) {
err = -EFAULT;
break;
}
@@ -760,7 +760,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
case L2CAP_MODE_STREAMING:
if (!disable_ertm)
break;
- /* fall through */
+ fallthrough;
default:
err = -EINVAL;
break;
@@ -782,7 +782,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
break;
case L2CAP_LM:
- if (get_user(opt, (u32 __user *) optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
err = -EFAULT;
break;
}
@@ -859,7 +859,7 @@ static int l2cap_set_mode(struct l2cap_chan *chan, u8 mode)
}
static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct l2cap_chan *chan = l2cap_pi(sk)->chan;
@@ -891,7 +891,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
sec.level = BT_SECURITY_LOW;
len = min_t(unsigned int, sizeof(sec), optlen);
- if (copy_from_user((char *) &sec, optval, len)) {
+ if (copy_from_sockptr(&sec, optval, len)) {
err = -EFAULT;
break;
}
@@ -939,7 +939,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (get_user(opt, (u32 __user *) optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
err = -EFAULT;
break;
}
@@ -954,7 +954,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case BT_FLUSHABLE:
- if (get_user(opt, (u32 __user *) optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
err = -EFAULT;
break;
}
@@ -990,7 +990,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
pwr.force_active = BT_POWER_FORCE_ACTIVE_ON;
len = min_t(unsigned int, sizeof(pwr), optlen);
- if (copy_from_user((char *) &pwr, optval, len)) {
+ if (copy_from_sockptr(&pwr, optval, len)) {
err = -EFAULT;
break;
}
@@ -1002,7 +1002,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case BT_CHANNEL_POLICY:
- if (get_user(opt, (u32 __user *) optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
err = -EFAULT;
break;
}
@@ -1050,7 +1050,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (get_user(opt, (u16 __user *) optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(u16))) {
err = -EFAULT;
break;
}
@@ -1081,7 +1081,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (get_user(opt, (u8 __user *) optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(u8))) {
err = -EFAULT;
break;
}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 9e8a3cccc6ca..5bbe71002fb9 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -36,9 +36,11 @@
#include "hci_request.h"
#include "smp.h"
#include "mgmt_util.h"
+#include "mgmt_config.h"
+#include "msft.h"
#define MGMT_VERSION 1
-#define MGMT_REVISION 17
+#define MGMT_REVISION 18
static const u16 mgmt_commands[] = {
MGMT_OP_READ_INDEX_LIST,
@@ -111,6 +113,15 @@ static const u16 mgmt_commands[] = {
MGMT_OP_READ_SECURITY_INFO,
MGMT_OP_READ_EXP_FEATURES_INFO,
MGMT_OP_SET_EXP_FEATURE,
+ MGMT_OP_READ_DEF_SYSTEM_CONFIG,
+ MGMT_OP_SET_DEF_SYSTEM_CONFIG,
+ MGMT_OP_READ_DEF_RUNTIME_CONFIG,
+ MGMT_OP_SET_DEF_RUNTIME_CONFIG,
+ MGMT_OP_GET_DEVICE_FLAGS,
+ MGMT_OP_SET_DEVICE_FLAGS,
+ MGMT_OP_READ_ADV_MONITOR_FEATURES,
+ MGMT_OP_ADD_ADV_PATTERNS_MONITOR,
+ MGMT_OP_REMOVE_ADV_MONITOR,
};
static const u16 mgmt_events[] = {
@@ -151,6 +162,7 @@ static const u16 mgmt_events[] = {
MGMT_EV_EXT_INFO_CHANGED,
MGMT_EV_PHY_CONFIGURATION_CHANGED,
MGMT_EV_EXP_FEATURE_CHANGED,
+ MGMT_EV_DEVICE_FLAGS_CHANGED,
};
static const u16 mgmt_untrusted_commands[] = {
@@ -162,6 +174,8 @@ static const u16 mgmt_untrusted_commands[] = {
MGMT_OP_READ_EXT_INFO,
MGMT_OP_READ_SECURITY_INFO,
MGMT_OP_READ_EXP_FEATURES_INFO,
+ MGMT_OP_READ_DEF_SYSTEM_CONFIG,
+ MGMT_OP_READ_DEF_RUNTIME_CONFIG,
};
static const u16 mgmt_untrusted_events[] = {
@@ -177,6 +191,8 @@ static const u16 mgmt_untrusted_events[] = {
MGMT_EV_EXT_INDEX_REMOVED,
MGMT_EV_EXT_INFO_CHANGED,
MGMT_EV_EXP_FEATURE_CHANGED,
+ MGMT_EV_ADV_MONITOR_ADDED,
+ MGMT_EV_ADV_MONITOR_REMOVED,
};
#define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000)
@@ -779,10 +795,15 @@ static u32 get_supported_settings(struct hci_dev *hdev)
if (lmp_le_capable(hdev)) {
settings |= MGMT_SETTING_LE;
- settings |= MGMT_SETTING_ADVERTISING;
settings |= MGMT_SETTING_SECURE_CONN;
settings |= MGMT_SETTING_PRIVACY;
settings |= MGMT_SETTING_STATIC_ADDRESS;
+
+ /* When the experimental feature for LL Privacy support is
+ * enabled, then advertising is no longer supported.
+ */
+ if (!hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY))
+ settings |= MGMT_SETTING_ADVERTISING;
}
if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) ||
@@ -2915,7 +2936,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
if (cp->addr.type == BDADDR_BREDR) {
conn = hci_connect_acl(hdev, &cp->addr.bdaddr, sec_level,
- auth_type);
+ auth_type, CONN_REASON_PAIR_DEVICE);
} else {
u8 addr_type = le_addr_type(cp->addr.type);
struct hci_conn_params *p;
@@ -2934,9 +2955,9 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
if (p->auto_connect == HCI_AUTO_CONN_EXPLICIT)
p->auto_connect = HCI_AUTO_CONN_DISABLED;
- conn = hci_connect_le_scan(hdev, &cp->addr.bdaddr,
- addr_type, sec_level,
- HCI_LE_CONN_TIMEOUT);
+ conn = hci_connect_le_scan(hdev, &cp->addr.bdaddr, addr_type,
+ sec_level, HCI_LE_CONN_TIMEOUT,
+ CONN_REASON_PAIR_DEVICE);
}
if (IS_ERR(conn)) {
@@ -3037,6 +3058,20 @@ static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, 0,
addr, sizeof(*addr));
+
+ /* Since user doesn't want to proceed with the connection, abort any
+ * ongoing pairing and then terminate the link if it was created
+ * because of the pair device action.
+ */
+ if (addr->type == BDADDR_BREDR)
+ hci_remove_link_key(hdev, &addr->bdaddr);
+ else
+ smp_cancel_and_remove_pairing(hdev, &addr->bdaddr,
+ le_addr_type(addr->type));
+
+ if (conn->conn_reason == CONN_REASON_PAIR_DEVICE)
+ hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM);
+
unlock:
hci_dev_unlock(hdev);
return err;
@@ -3723,12 +3758,25 @@ static const u8 debug_uuid[16] = {
};
#endif
+/* 671b10b5-42c0-4696-9227-eb28d1b049d6 */
+static const u8 simult_central_periph_uuid[16] = {
+ 0xd6, 0x49, 0xb0, 0xd1, 0x28, 0xeb, 0x27, 0x92,
+ 0x96, 0x46, 0xc0, 0x42, 0xb5, 0x10, 0x1b, 0x67,
+};
+
+/* 15c0a148-c273-11ea-b3de-0242ac130004 */
+static const u8 rpa_resolution_uuid[16] = {
+ 0x04, 0x00, 0x13, 0xac, 0x42, 0x02, 0xde, 0xb3,
+ 0xea, 0x11, 0x73, 0xc2, 0x48, 0xa1, 0xc0, 0x15,
+};
+
static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev,
void *data, u16 data_len)
{
- char buf[42];
+ char buf[62]; /* Enough space for 3 features */
struct mgmt_rp_read_exp_features_info *rp = (void *)buf;
u16 idx = 0;
+ u32 flags;
bt_dev_dbg(hdev, "sock %p", sk);
@@ -3736,7 +3784,7 @@ static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev,
#ifdef CONFIG_BT_FEATURE_DEBUG
if (!hdev) {
- u32 flags = bt_dbg_get() ? BIT(0) : 0;
+ flags = bt_dbg_get() ? BIT(0) : 0;
memcpy(rp->features[idx].uuid, debug_uuid, 16);
rp->features[idx].flags = cpu_to_le32(flags);
@@ -3744,6 +3792,31 @@ static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev,
}
#endif
+ if (hdev) {
+ if (test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) &&
+ (hdev->le_states[4] & 0x08) && /* Central */
+ (hdev->le_states[4] & 0x40) && /* Peripheral */
+ (hdev->le_states[3] & 0x10)) /* Simultaneous */
+ flags = BIT(0);
+ else
+ flags = 0;
+
+ memcpy(rp->features[idx].uuid, simult_central_periph_uuid, 16);
+ rp->features[idx].flags = cpu_to_le32(flags);
+ idx++;
+ }
+
+ if (hdev && use_ll_privacy(hdev)) {
+ if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY))
+ flags = BIT(0) | BIT(1);
+ else
+ flags = BIT(1);
+
+ memcpy(rp->features[idx].uuid, rpa_resolution_uuid, 16);
+ rp->features[idx].flags = cpu_to_le32(flags);
+ idx++;
+ }
+
rp->feature_count = cpu_to_le16(idx);
/* After reading the experimental features information, enable
@@ -3756,6 +3829,21 @@ static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev,
0, rp, sizeof(*rp) + (20 * idx));
}
+static int exp_ll_privacy_feature_changed(bool enabled, struct hci_dev *hdev,
+ struct sock *skip)
+{
+ struct mgmt_ev_exp_feature_changed ev;
+
+ memset(&ev, 0, sizeof(ev));
+ memcpy(ev.uuid, rpa_resolution_uuid, 16);
+ ev.flags = cpu_to_le32((enabled ? BIT(0) : 0) | BIT(1));
+
+ return mgmt_limited_event(MGMT_EV_EXP_FEATURE_CHANGED, hdev,
+ &ev, sizeof(ev),
+ HCI_MGMT_EXP_FEATURE_EVENTS, skip);
+
+}
+
#ifdef CONFIG_BT_FEATURE_DEBUG
static int exp_debug_feature_changed(bool enabled, struct sock *skip)
{
@@ -3794,6 +3882,16 @@ static int set_exp_feature(struct sock *sk, struct hci_dev *hdev,
}
#endif
+ if (hdev && use_ll_privacy(hdev) && !hdev_is_powered(hdev)) {
+ bool changed = hci_dev_test_flag(hdev,
+ HCI_ENABLE_LL_PRIVACY);
+
+ hci_dev_clear_flag(hdev, HCI_ENABLE_LL_PRIVACY);
+
+ if (changed)
+ exp_ll_privacy_feature_changed(false, hdev, sk);
+ }
+
hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS);
return mgmt_cmd_complete(sk, hdev ? hdev->id : MGMT_INDEX_NONE,
@@ -3844,11 +3942,401 @@ static int set_exp_feature(struct sock *sk, struct hci_dev *hdev,
}
#endif
+ if (!memcmp(cp->uuid, rpa_resolution_uuid, 16)) {
+ bool val, changed;
+ int err;
+ u32 flags;
+
+ /* Command requires to use the controller index */
+ if (!hdev)
+ return mgmt_cmd_status(sk, MGMT_INDEX_NONE,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_INVALID_INDEX);
+
+ /* Changes can only be made when controller is powered down */
+ if (hdev_is_powered(hdev))
+ return mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_NOT_POWERED);
+
+ /* Parameters are limited to a single octet */
+ if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1)
+ return mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ /* Only boolean on/off is supported */
+ if (cp->param[0] != 0x00 && cp->param[0] != 0x01)
+ return mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_SET_EXP_FEATURE,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ val = !!cp->param[0];
+
+ if (val) {
+ changed = !hci_dev_test_flag(hdev,
+ HCI_ENABLE_LL_PRIVACY);
+ hci_dev_set_flag(hdev, HCI_ENABLE_LL_PRIVACY);
+ hci_dev_clear_flag(hdev, HCI_ADVERTISING);
+
+ /* Enable LL privacy + supported settings changed */
+ flags = BIT(0) | BIT(1);
+ } else {
+ changed = hci_dev_test_flag(hdev,
+ HCI_ENABLE_LL_PRIVACY);
+ hci_dev_clear_flag(hdev, HCI_ENABLE_LL_PRIVACY);
+
+ /* Disable LL privacy + supported settings changed */
+ flags = BIT(1);
+ }
+
+ memcpy(rp.uuid, rpa_resolution_uuid, 16);
+ rp.flags = cpu_to_le32(flags);
+
+ hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS);
+
+ err = mgmt_cmd_complete(sk, hdev->id,
+ MGMT_OP_SET_EXP_FEATURE, 0,
+ &rp, sizeof(rp));
+
+ if (changed)
+ exp_ll_privacy_feature_changed(val, hdev, sk);
+
+ return err;
+ }
+
return mgmt_cmd_status(sk, hdev ? hdev->id : MGMT_INDEX_NONE,
MGMT_OP_SET_EXP_FEATURE,
MGMT_STATUS_NOT_SUPPORTED);
}
+#define SUPPORTED_DEVICE_FLAGS() ((1U << HCI_CONN_FLAG_MAX) - 1)
+
+static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
+ u16 data_len)
+{
+ struct mgmt_cp_get_device_flags *cp = data;
+ struct mgmt_rp_get_device_flags rp;
+ struct bdaddr_list_with_flags *br_params;
+ struct hci_conn_params *params;
+ u32 supported_flags = SUPPORTED_DEVICE_FLAGS();
+ u32 current_flags = 0;
+ u8 status = MGMT_STATUS_INVALID_PARAMS;
+
+ bt_dev_dbg(hdev, "Get device flags %pMR (type 0x%x)\n",
+ &cp->addr.bdaddr, cp->addr.type);
+
+ hci_dev_lock(hdev);
+
+ if (cp->addr.type == BDADDR_BREDR) {
+ br_params = hci_bdaddr_list_lookup_with_flags(&hdev->whitelist,
+ &cp->addr.bdaddr,
+ cp->addr.type);
+ if (!br_params)
+ goto done;
+
+ current_flags = br_params->current_flags;
+ } else {
+ params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
+ le_addr_type(cp->addr.type));
+
+ if (!params)
+ goto done;
+
+ current_flags = params->current_flags;
+ }
+
+ bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr);
+ rp.addr.type = cp->addr.type;
+ rp.supported_flags = cpu_to_le32(supported_flags);
+ rp.current_flags = cpu_to_le32(current_flags);
+
+ status = MGMT_STATUS_SUCCESS;
+
+done:
+ hci_dev_unlock(hdev);
+
+ return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_DEVICE_FLAGS, status,
+ &rp, sizeof(rp));
+}
+
+static void device_flags_changed(struct sock *sk, struct hci_dev *hdev,
+ bdaddr_t *bdaddr, u8 bdaddr_type,
+ u32 supported_flags, u32 current_flags)
+{
+ struct mgmt_ev_device_flags_changed ev;
+
+ bacpy(&ev.addr.bdaddr, bdaddr);
+ ev.addr.type = bdaddr_type;
+ ev.supported_flags = cpu_to_le32(supported_flags);
+ ev.current_flags = cpu_to_le32(current_flags);
+
+ mgmt_event(MGMT_EV_DEVICE_FLAGS_CHANGED, hdev, &ev, sizeof(ev), sk);
+}
+
+static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
+ u16 len)
+{
+ struct mgmt_cp_set_device_flags *cp = data;
+ struct bdaddr_list_with_flags *br_params;
+ struct hci_conn_params *params;
+ u8 status = MGMT_STATUS_INVALID_PARAMS;
+ u32 supported_flags = SUPPORTED_DEVICE_FLAGS();
+ u32 current_flags = __le32_to_cpu(cp->current_flags);
+
+ bt_dev_dbg(hdev, "Set device flags %pMR (type 0x%x) = 0x%x",
+ &cp->addr.bdaddr, cp->addr.type,
+ __le32_to_cpu(current_flags));
+
+ if ((supported_flags | current_flags) != supported_flags) {
+ bt_dev_warn(hdev, "Bad flag given (0x%x) vs supported (0x%0x)",
+ current_flags, supported_flags);
+ goto done;
+ }
+
+ hci_dev_lock(hdev);
+
+ if (cp->addr.type == BDADDR_BREDR) {
+ br_params = hci_bdaddr_list_lookup_with_flags(&hdev->whitelist,
+ &cp->addr.bdaddr,
+ cp->addr.type);
+
+ if (br_params) {
+ br_params->current_flags = current_flags;
+ status = MGMT_STATUS_SUCCESS;
+ } else {
+ bt_dev_warn(hdev, "No such BR/EDR device %pMR (0x%x)",
+ &cp->addr.bdaddr, cp->addr.type);
+ }
+ } else {
+ params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
+ le_addr_type(cp->addr.type));
+ if (params) {
+ params->current_flags = current_flags;
+ status = MGMT_STATUS_SUCCESS;
+ } else {
+ bt_dev_warn(hdev, "No such LE device %pMR (0x%x)",
+ &cp->addr.bdaddr,
+ le_addr_type(cp->addr.type));
+ }
+ }
+
+done:
+ hci_dev_unlock(hdev);
+
+ if (status == MGMT_STATUS_SUCCESS)
+ device_flags_changed(sk, hdev, &cp->addr.bdaddr, cp->addr.type,
+ supported_flags, current_flags);
+
+ return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_DEVICE_FLAGS, status,
+ &cp->addr, sizeof(cp->addr));
+}
+
+static void mgmt_adv_monitor_added(struct sock *sk, struct hci_dev *hdev,
+ u16 handle)
+{
+ struct mgmt_ev_adv_monitor_added ev;
+
+ ev.monitor_handle = cpu_to_le16(handle);
+
+ mgmt_event(MGMT_EV_ADV_MONITOR_ADDED, hdev, &ev, sizeof(ev), sk);
+}
+
+static void mgmt_adv_monitor_removed(struct sock *sk, struct hci_dev *hdev,
+ u16 handle)
+{
+ struct mgmt_ev_adv_monitor_added ev;
+
+ ev.monitor_handle = cpu_to_le16(handle);
+
+ mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk);
+}
+
+static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev,
+ void *data, u16 len)
+{
+ struct adv_monitor *monitor = NULL;
+ struct mgmt_rp_read_adv_monitor_features *rp = NULL;
+ int handle;
+ size_t rp_size = 0;
+ __u32 supported = 0;
+ __u16 num_handles = 0;
+ __u16 handles[HCI_MAX_ADV_MONITOR_NUM_HANDLES];
+
+ BT_DBG("request for %s", hdev->name);
+
+ hci_dev_lock(hdev);
+
+ if (msft_get_features(hdev) & MSFT_FEATURE_MASK_LE_ADV_MONITOR)
+ supported |= MGMT_ADV_MONITOR_FEATURE_MASK_OR_PATTERNS;
+
+ idr_for_each_entry(&hdev->adv_monitors_idr, monitor, handle) {
+ handles[num_handles++] = monitor->handle;
+ }
+
+ hci_dev_unlock(hdev);
+
+ rp_size = sizeof(*rp) + (num_handles * sizeof(u16));
+ rp = kmalloc(rp_size, GFP_KERNEL);
+ if (!rp)
+ return -ENOMEM;
+
+ /* Once controller-based monitoring is in place, the enabled_features
+ * should reflect the use.
+ */
+ rp->supported_features = cpu_to_le32(supported);
+ rp->enabled_features = 0;
+ rp->max_num_handles = cpu_to_le16(HCI_MAX_ADV_MONITOR_NUM_HANDLES);
+ rp->max_num_patterns = HCI_MAX_ADV_MONITOR_NUM_PATTERNS;
+ rp->num_handles = cpu_to_le16(num_handles);
+ if (num_handles)
+ memcpy(&rp->handles, &handles, (num_handles * sizeof(u16)));
+
+ return mgmt_cmd_complete(sk, hdev->id,
+ MGMT_OP_READ_ADV_MONITOR_FEATURES,
+ MGMT_STATUS_SUCCESS, rp, rp_size);
+}
+
+static int add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev,
+ void *data, u16 len)
+{
+ struct mgmt_cp_add_adv_patterns_monitor *cp = data;
+ struct mgmt_rp_add_adv_patterns_monitor rp;
+ struct adv_monitor *m = NULL;
+ struct adv_pattern *p = NULL;
+ unsigned int mp_cnt = 0, prev_adv_monitors_cnt;
+ __u8 cp_ofst = 0, cp_len = 0;
+ int err, i;
+
+ BT_DBG("request for %s", hdev->name);
+
+ if (len <= sizeof(*cp) || cp->pattern_count == 0) {
+ err = mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_ADD_ADV_PATTERNS_MONITOR,
+ MGMT_STATUS_INVALID_PARAMS);
+ goto failed;
+ }
+
+ m = kmalloc(sizeof(*m), GFP_KERNEL);
+ if (!m) {
+ err = -ENOMEM;
+ goto failed;
+ }
+
+ INIT_LIST_HEAD(&m->patterns);
+ m->active = false;
+
+ for (i = 0; i < cp->pattern_count; i++) {
+ if (++mp_cnt > HCI_MAX_ADV_MONITOR_NUM_PATTERNS) {
+ err = mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_ADD_ADV_PATTERNS_MONITOR,
+ MGMT_STATUS_INVALID_PARAMS);
+ goto failed;
+ }
+
+ cp_ofst = cp->patterns[i].offset;
+ cp_len = cp->patterns[i].length;
+ if (cp_ofst >= HCI_MAX_AD_LENGTH ||
+ cp_len > HCI_MAX_AD_LENGTH ||
+ (cp_ofst + cp_len) > HCI_MAX_AD_LENGTH) {
+ err = mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_ADD_ADV_PATTERNS_MONITOR,
+ MGMT_STATUS_INVALID_PARAMS);
+ goto failed;
+ }
+
+ p = kmalloc(sizeof(*p), GFP_KERNEL);
+ if (!p) {
+ err = -ENOMEM;
+ goto failed;
+ }
+
+ p->ad_type = cp->patterns[i].ad_type;
+ p->offset = cp->patterns[i].offset;
+ p->length = cp->patterns[i].length;
+ memcpy(p->value, cp->patterns[i].value, p->length);
+
+ INIT_LIST_HEAD(&p->list);
+ list_add(&p->list, &m->patterns);
+ }
+
+ if (mp_cnt != cp->pattern_count) {
+ err = mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_ADD_ADV_PATTERNS_MONITOR,
+ MGMT_STATUS_INVALID_PARAMS);
+ goto failed;
+ }
+
+ hci_dev_lock(hdev);
+
+ prev_adv_monitors_cnt = hdev->adv_monitors_cnt;
+
+ err = hci_add_adv_monitor(hdev, m);
+ if (err) {
+ if (err == -ENOSPC) {
+ mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_ADD_ADV_PATTERNS_MONITOR,
+ MGMT_STATUS_NO_RESOURCES);
+ }
+ goto unlock;
+ }
+
+ if (hdev->adv_monitors_cnt > prev_adv_monitors_cnt)
+ mgmt_adv_monitor_added(sk, hdev, m->handle);
+
+ hci_dev_unlock(hdev);
+
+ rp.monitor_handle = cpu_to_le16(m->handle);
+
+ return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_ADV_PATTERNS_MONITOR,
+ MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
+
+unlock:
+ hci_dev_unlock(hdev);
+
+failed:
+ hci_free_adv_monitor(m);
+ return err;
+}
+
+static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev,
+ void *data, u16 len)
+{
+ struct mgmt_cp_remove_adv_monitor *cp = data;
+ struct mgmt_rp_remove_adv_monitor rp;
+ unsigned int prev_adv_monitors_cnt;
+ u16 handle;
+ int err;
+
+ BT_DBG("request for %s", hdev->name);
+
+ hci_dev_lock(hdev);
+
+ handle = __le16_to_cpu(cp->monitor_handle);
+ prev_adv_monitors_cnt = hdev->adv_monitors_cnt;
+
+ err = hci_remove_adv_monitor(hdev, handle);
+ if (err == -ENOENT) {
+ err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADV_MONITOR,
+ MGMT_STATUS_INVALID_INDEX);
+ goto unlock;
+ }
+
+ if (hdev->adv_monitors_cnt < prev_adv_monitors_cnt)
+ mgmt_adv_monitor_removed(sk, hdev, handle);
+
+ hci_dev_unlock(hdev);
+
+ rp.monitor_handle = cp->monitor_handle;
+
+ return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_ADV_MONITOR,
+ MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
+
+unlock:
+ hci_dev_unlock(hdev);
+ return err;
+}
+
static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status,
u16 opcode, struct sk_buff *skb)
{
@@ -4147,7 +4635,7 @@ static bool discovery_type_is_valid(struct hci_dev *hdev, uint8_t type,
*mgmt_status = mgmt_le_support(hdev);
if (*mgmt_status)
return false;
- /* Intentional fall-through */
+ fallthrough;
case DISCOV_TYPE_BREDR:
*mgmt_status = mgmt_bredr_support(hdev);
if (*mgmt_status)
@@ -4662,6 +5150,13 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data,
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING,
status);
+ /* Enabling the experimental LL Privay support disables support for
+ * advertising.
+ */
+ if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY))
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING,
+ MGMT_STATUS_NOT_SUPPORTED);
+
if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02)
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING,
MGMT_STATUS_INVALID_PARAMS);
@@ -4848,7 +5343,7 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev,
hci_req_init(&req, hdev);
- hci_req_add_le_scan_disable(&req);
+ hci_req_add_le_scan_disable(&req, false);
hci_req_add_le_passive_scan(&req);
hci_req_run(&req, NULL);
@@ -5523,7 +6018,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
case MGMT_LTK_P256_DEBUG:
authenticated = 0x00;
type = SMP_LTK_P256_DEBUG;
- /* fall through */
+ fallthrough;
default:
continue;
}
@@ -5966,7 +6461,9 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
{
struct mgmt_cp_add_device *cp = data;
u8 auto_conn, addr_type;
+ struct hci_conn_params *params;
int err;
+ u32 current_flags = 0;
bt_dev_dbg(hdev, "sock %p", sk);
@@ -5993,8 +6490,9 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- err = hci_bdaddr_list_add(&hdev->whitelist, &cp->addr.bdaddr,
- cp->addr.type);
+ err = hci_bdaddr_list_add_with_flags(&hdev->whitelist,
+ &cp->addr.bdaddr,
+ cp->addr.type, 0);
if (err)
goto unlock;
@@ -6033,12 +6531,19 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
MGMT_STATUS_FAILED, &cp->addr,
sizeof(cp->addr));
goto unlock;
+ } else {
+ params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
+ addr_type);
+ if (params)
+ current_flags = params->current_flags;
}
hci_update_background_scan(hdev);
added:
device_added(sk, hdev, &cp->addr.bdaddr, cp->addr.type, cp->action);
+ device_flags_changed(NULL, hdev, &cp->addr.bdaddr, cp->addr.type,
+ SUPPORTED_DEVICE_FLAGS(), current_flags);
err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE,
MGMT_STATUS_SUCCESS, &cp->addr,
@@ -6724,6 +7229,13 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev,
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_ADV_FEATURES,
MGMT_STATUS_REJECTED);
+ /* Enabling the experimental LL Privay support disables support for
+ * advertising.
+ */
+ if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY))
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING,
+ MGMT_STATUS_NOT_SUPPORTED);
+
hci_dev_lock(hdev);
rp_len = sizeof(*rp) + hdev->adv_instance_cnt;
@@ -6927,6 +7439,13 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
status);
+ /* Enabling the experimental LL Privay support disables support for
+ * advertising.
+ */
+ if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY))
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING,
+ MGMT_STATUS_NOT_SUPPORTED);
+
if (cp->instance < 1 || cp->instance > HCI_MAX_ADV_INSTANCES)
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
MGMT_STATUS_INVALID_PARAMS);
@@ -7091,6 +7610,13 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev,
bt_dev_dbg(hdev, "sock %p", sk);
+ /* Enabling the experimental LL Privay support disables support for
+ * advertising.
+ */
+ if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY))
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING,
+ MGMT_STATUS_NOT_SUPPORTED);
+
hci_dev_lock(hdev);
if (cp->instance && !hci_find_adv_instance(hdev, cp->instance)) {
@@ -7116,6 +7642,12 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev,
hci_req_init(&req, hdev);
+ /* If we use extended advertising, instance is disabled and removed */
+ if (ext_adv_capable(hdev)) {
+ __hci_req_disable_ext_adv_instance(&req, cp->instance);
+ __hci_req_remove_ext_adv_instance(&req, cp->instance);
+ }
+
hci_req_clear_adv_instance(hdev, sk, &req, cp->instance, true);
if (list_empty(&hdev->adv_instances))
@@ -7297,6 +7829,20 @@ static const struct hci_mgmt_handler mgmt_handlers[] = {
{ set_exp_feature, MGMT_SET_EXP_FEATURE_SIZE,
HCI_MGMT_VAR_LEN |
HCI_MGMT_HDEV_OPTIONAL },
+ { read_def_system_config, MGMT_READ_DEF_SYSTEM_CONFIG_SIZE,
+ HCI_MGMT_UNTRUSTED },
+ { set_def_system_config, MGMT_SET_DEF_SYSTEM_CONFIG_SIZE,
+ HCI_MGMT_VAR_LEN },
+ { read_def_runtime_config, MGMT_READ_DEF_RUNTIME_CONFIG_SIZE,
+ HCI_MGMT_UNTRUSTED },
+ { set_def_runtime_config, MGMT_SET_DEF_RUNTIME_CONFIG_SIZE,
+ HCI_MGMT_VAR_LEN },
+ { get_device_flags, MGMT_GET_DEVICE_FLAGS_SIZE },
+ { set_device_flags, MGMT_SET_DEVICE_FLAGS_SIZE },
+ { read_adv_mon_features, MGMT_READ_ADV_MONITOR_FEATURES_SIZE },
+ { add_adv_patterns_monitor,MGMT_ADD_ADV_PATTERNS_MONITOR_SIZE,
+ HCI_MGMT_VAR_LEN },
+ { remove_adv_monitor, MGMT_REMOVE_ADV_MONITOR_SIZE },
};
void mgmt_index_added(struct hci_dev *hdev)
@@ -8216,8 +8762,11 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
if (!hci_discovery_active(hdev)) {
if (link_type == ACL_LINK)
return;
- if (link_type == LE_LINK && list_empty(&hdev->pend_le_reports))
+ if (link_type == LE_LINK &&
+ list_empty(&hdev->pend_le_reports) &&
+ !hci_is_adv_monitoring(hdev)) {
return;
+ }
}
if (hdev->discovery.result_filtering) {
diff --git a/net/bluetooth/mgmt_config.c b/net/bluetooth/mgmt_config.c
new file mode 100644
index 000000000000..b30b571f8caf
--- /dev/null
+++ b/net/bluetooth/mgmt_config.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (C) 2020 Google Corporation
+ */
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/mgmt.h>
+
+#include "mgmt_util.h"
+#include "mgmt_config.h"
+
+#define HDEV_PARAM_U16(_param_code_, _param_name_) \
+{ \
+ { cpu_to_le16(_param_code_), sizeof(__u16) }, \
+ { cpu_to_le16(hdev->_param_name_) } \
+}
+
+#define HDEV_PARAM_U16_JIFFIES_TO_MSECS(_param_code_, _param_name_) \
+{ \
+ { cpu_to_le16(_param_code_), sizeof(__u16) }, \
+ { cpu_to_le16(jiffies_to_msecs(hdev->_param_name_)) } \
+}
+
+int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
+ u16 data_len)
+{
+ struct {
+ struct mgmt_tlv entry;
+ union {
+ /* This is a simplification for now since all values
+ * are 16 bits. In the future, this code may need
+ * refactoring to account for variable length values
+ * and properly calculate the required buffer size.
+ */
+ __le16 value;
+ };
+ } __packed params[] = {
+ /* Please see mgmt-api.txt for documentation of these values */
+ HDEV_PARAM_U16(0x0000, def_page_scan_type),
+ HDEV_PARAM_U16(0x0001, def_page_scan_int),
+ HDEV_PARAM_U16(0x0002, def_page_scan_window),
+ HDEV_PARAM_U16(0x0003, def_inq_scan_type),
+ HDEV_PARAM_U16(0x0004, def_inq_scan_int),
+ HDEV_PARAM_U16(0x0005, def_inq_scan_window),
+ HDEV_PARAM_U16(0x0006, def_br_lsto),
+ HDEV_PARAM_U16(0x0007, def_page_timeout),
+ HDEV_PARAM_U16(0x0008, sniff_min_interval),
+ HDEV_PARAM_U16(0x0009, sniff_max_interval),
+ HDEV_PARAM_U16(0x000a, le_adv_min_interval),
+ HDEV_PARAM_U16(0x000b, le_adv_max_interval),
+ HDEV_PARAM_U16(0x000c, def_multi_adv_rotation_duration),
+ HDEV_PARAM_U16(0x000d, le_scan_interval),
+ HDEV_PARAM_U16(0x000e, le_scan_window),
+ HDEV_PARAM_U16(0x000f, le_scan_int_suspend),
+ HDEV_PARAM_U16(0x0010, le_scan_window_suspend),
+ HDEV_PARAM_U16(0x0011, le_scan_int_discovery),
+ HDEV_PARAM_U16(0x0012, le_scan_window_discovery),
+ HDEV_PARAM_U16(0x0013, le_scan_int_adv_monitor),
+ HDEV_PARAM_U16(0x0014, le_scan_window_adv_monitor),
+ HDEV_PARAM_U16(0x0015, le_scan_int_connect),
+ HDEV_PARAM_U16(0x0016, le_scan_window_connect),
+ HDEV_PARAM_U16(0x0017, le_conn_min_interval),
+ HDEV_PARAM_U16(0x0018, le_conn_max_interval),
+ HDEV_PARAM_U16(0x0019, le_conn_latency),
+ HDEV_PARAM_U16(0x001a, le_supv_timeout),
+ HDEV_PARAM_U16_JIFFIES_TO_MSECS(0x001b,
+ def_le_autoconnect_timeout),
+ };
+ struct mgmt_rp_read_def_system_config *rp = (void *)params;
+
+ bt_dev_dbg(hdev, "sock %p", sk);
+
+ return mgmt_cmd_complete(sk, hdev->id,
+ MGMT_OP_READ_DEF_SYSTEM_CONFIG,
+ 0, rp, sizeof(params));
+}
+
+#define TO_TLV(x) ((struct mgmt_tlv *)(x))
+#define TLV_GET_LE16(tlv) le16_to_cpu(*((__le16 *)(TO_TLV(tlv)->value)))
+
+int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
+ u16 data_len)
+{
+ u16 buffer_left = data_len;
+ u8 *buffer = data;
+
+ if (buffer_left < sizeof(struct mgmt_tlv)) {
+ return mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_SET_DEF_SYSTEM_CONFIG,
+ MGMT_STATUS_INVALID_PARAMS);
+ }
+
+ /* First pass to validate the tlv */
+ while (buffer_left >= sizeof(struct mgmt_tlv)) {
+ const u8 len = TO_TLV(buffer)->length;
+ const u16 exp_len = sizeof(struct mgmt_tlv) +
+ len;
+ const u16 type = le16_to_cpu(TO_TLV(buffer)->type);
+
+ if (buffer_left < exp_len) {
+ bt_dev_warn(hdev, "invalid len left %d, exp >= %d",
+ buffer_left, exp_len);
+
+ return mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_SET_DEF_SYSTEM_CONFIG,
+ MGMT_STATUS_INVALID_PARAMS);
+ }
+
+ /* Please see mgmt-api.txt for documentation of these values */
+ switch (type) {
+ case 0x0000:
+ case 0x0001:
+ case 0x0002:
+ case 0x0003:
+ case 0x0004:
+ case 0x0005:
+ case 0x0006:
+ case 0x0007:
+ case 0x0008:
+ case 0x0009:
+ case 0x000a:
+ case 0x000b:
+ case 0x000c:
+ case 0x000d:
+ case 0x000e:
+ case 0x000f:
+ case 0x0010:
+ case 0x0011:
+ case 0x0012:
+ case 0x0013:
+ case 0x0014:
+ case 0x0015:
+ case 0x0016:
+ case 0x0017:
+ case 0x0018:
+ case 0x0019:
+ case 0x001a:
+ case 0x001b:
+ if (len != sizeof(u16)) {
+ bt_dev_warn(hdev, "invalid length %d, exp %zu for type %d",
+ len, sizeof(u16), type);
+
+ return mgmt_cmd_status(sk, hdev->id,
+ MGMT_OP_SET_DEF_SYSTEM_CONFIG,
+ MGMT_STATUS_INVALID_PARAMS);
+ }
+ break;
+ default:
+ bt_dev_warn(hdev, "unsupported parameter %u", type);
+ break;
+ }
+
+ buffer_left -= exp_len;
+ buffer += exp_len;
+ }
+
+ buffer_left = data_len;
+ buffer = data;
+ while (buffer_left >= sizeof(struct mgmt_tlv)) {
+ const u8 len = TO_TLV(buffer)->length;
+ const u16 exp_len = sizeof(struct mgmt_tlv) +
+ len;
+ const u16 type = le16_to_cpu(TO_TLV(buffer)->type);
+
+ switch (type) {
+ case 0x0000:
+ hdev->def_page_scan_type = TLV_GET_LE16(buffer);
+ break;
+ case 0x0001:
+ hdev->def_page_scan_int = TLV_GET_LE16(buffer);
+ break;
+ case 0x0002:
+ hdev->def_page_scan_window = TLV_GET_LE16(buffer);
+ break;
+ case 0x0003:
+ hdev->def_inq_scan_type = TLV_GET_LE16(buffer);
+ break;
+ case 0x0004:
+ hdev->def_inq_scan_int = TLV_GET_LE16(buffer);
+ break;
+ case 0x0005:
+ hdev->def_inq_scan_window = TLV_GET_LE16(buffer);
+ break;
+ case 0x0006:
+ hdev->def_br_lsto = TLV_GET_LE16(buffer);
+ break;
+ case 0x0007:
+ hdev->def_page_timeout = TLV_GET_LE16(buffer);
+ break;
+ case 0x0008:
+ hdev->sniff_min_interval = TLV_GET_LE16(buffer);
+ break;
+ case 0x0009:
+ hdev->sniff_max_interval = TLV_GET_LE16(buffer);
+ break;
+ case 0x000a:
+ hdev->le_adv_min_interval = TLV_GET_LE16(buffer);
+ break;
+ case 0x000b:
+ hdev->le_adv_max_interval = TLV_GET_LE16(buffer);
+ break;
+ case 0x000c:
+ hdev->def_multi_adv_rotation_duration =
+ TLV_GET_LE16(buffer);
+ break;
+ case 0x000d:
+ hdev->le_scan_interval = TLV_GET_LE16(buffer);
+ break;
+ case 0x000e:
+ hdev->le_scan_window = TLV_GET_LE16(buffer);
+ break;
+ case 0x000f:
+ hdev->le_scan_int_suspend = TLV_GET_LE16(buffer);
+ break;
+ case 0x0010:
+ hdev->le_scan_window_suspend = TLV_GET_LE16(buffer);
+ break;
+ case 0x0011:
+ hdev->le_scan_int_discovery = TLV_GET_LE16(buffer);
+ break;
+ case 0x00012:
+ hdev->le_scan_window_discovery = TLV_GET_LE16(buffer);
+ break;
+ case 0x00013:
+ hdev->le_scan_int_adv_monitor = TLV_GET_LE16(buffer);
+ break;
+ case 0x00014:
+ hdev->le_scan_window_adv_monitor = TLV_GET_LE16(buffer);
+ break;
+ case 0x00015:
+ hdev->le_scan_int_connect = TLV_GET_LE16(buffer);
+ break;
+ case 0x00016:
+ hdev->le_scan_window_connect = TLV_GET_LE16(buffer);
+ break;
+ case 0x00017:
+ hdev->le_conn_min_interval = TLV_GET_LE16(buffer);
+ break;
+ case 0x00018:
+ hdev->le_conn_max_interval = TLV_GET_LE16(buffer);
+ break;
+ case 0x00019:
+ hdev->le_conn_latency = TLV_GET_LE16(buffer);
+ break;
+ case 0x0001a:
+ hdev->le_supv_timeout = TLV_GET_LE16(buffer);
+ break;
+ case 0x0001b:
+ hdev->def_le_autoconnect_timeout =
+ msecs_to_jiffies(TLV_GET_LE16(buffer));
+ break;
+ default:
+ bt_dev_warn(hdev, "unsupported parameter %u", type);
+ break;
+ }
+
+ buffer_left -= exp_len;
+ buffer += exp_len;
+ }
+
+ return mgmt_cmd_complete(sk, hdev->id,
+ MGMT_OP_SET_DEF_SYSTEM_CONFIG, 0, NULL, 0);
+}
+
+int read_def_runtime_config(struct sock *sk, struct hci_dev *hdev, void *data,
+ u16 data_len)
+{
+ bt_dev_dbg(hdev, "sock %p", sk);
+
+ return mgmt_cmd_complete(sk, hdev->id,
+ MGMT_OP_READ_DEF_RUNTIME_CONFIG, 0, NULL, 0);
+}
+
+int set_def_runtime_config(struct sock *sk, struct hci_dev *hdev, void *data,
+ u16 data_len)
+{
+ bt_dev_dbg(hdev, "sock %p", sk);
+
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DEF_SYSTEM_CONFIG,
+ MGMT_STATUS_INVALID_PARAMS);
+}
diff --git a/net/bluetooth/mgmt_config.h b/net/bluetooth/mgmt_config.h
new file mode 100644
index 000000000000..a4965f107891
--- /dev/null
+++ b/net/bluetooth/mgmt_config.h
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (C) 2020 Google Corporation
+ */
+
+int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
+ u16 data_len);
+
+int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
+ u16 data_len);
+
+int read_def_runtime_config(struct sock *sk, struct hci_dev *hdev, void *data,
+ u16 data_len);
+
+int set_def_runtime_config(struct sock *sk, struct hci_dev *hdev, void *data,
+ u16 data_len);
diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c
index d6c4e6b5ae77..8579bfeb2836 100644
--- a/net/bluetooth/msft.c
+++ b/net/bluetooth/msft.c
@@ -139,3 +139,10 @@ void msft_vendor_evt(struct hci_dev *hdev, struct sk_buff *skb)
bt_dev_dbg(hdev, "MSFT vendor event %u", event);
}
+
+__u64 msft_get_features(struct hci_dev *hdev)
+{
+ struct msft_data *msft = hdev->msft_data;
+
+ return msft ? msft->features : 0;
+}
diff --git a/net/bluetooth/msft.h b/net/bluetooth/msft.h
index 5aa9130e1f8a..e9c478e890b8 100644
--- a/net/bluetooth/msft.h
+++ b/net/bluetooth/msft.h
@@ -3,16 +3,25 @@
* Copyright (C) 2020 Google Corporation
*/
+#define MSFT_FEATURE_MASK_BREDR_RSSI_MONITOR BIT(0)
+#define MSFT_FEATURE_MASK_LE_CONN_RSSI_MONITOR BIT(1)
+#define MSFT_FEATURE_MASK_LE_ADV_RSSI_MONITOR BIT(2)
+#define MSFT_FEATURE_MASK_LE_ADV_MONITOR BIT(3)
+#define MSFT_FEATURE_MASK_CURVE_VALIDITY BIT(4)
+#define MSFT_FEATURE_MASK_CONCURRENT_ADV_MONITOR BIT(5)
+
#if IS_ENABLED(CONFIG_BT_MSFTEXT)
void msft_do_open(struct hci_dev *hdev);
void msft_do_close(struct hci_dev *hdev);
void msft_vendor_evt(struct hci_dev *hdev, struct sk_buff *skb);
+__u64 msft_get_features(struct hci_dev *hdev);
#else
static inline void msft_do_open(struct hci_dev *hdev) {}
static inline void msft_do_close(struct hci_dev *hdev) {}
static inline void msft_vendor_evt(struct hci_dev *hdev, struct sk_buff *skb) {}
+static inline __u64 msft_get_features(struct hci_dev *hdev) { return 0; }
#endif
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 2e20af317cea..f2bacb464ccf 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -479,7 +479,7 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
/* if closing a dlc in a session that hasn't been started,
* just close and unlink the dlc
*/
- /* fall through */
+ fallthrough;
default:
rfcomm_dlc_clear_timer(d);
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index df14eebe80da..ae6f80730561 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -218,7 +218,7 @@ static void __rfcomm_sock_close(struct sock *sk)
case BT_CONFIG:
case BT_CONNECTED:
rfcomm_dlc_close(d, 0);
- /* fall through */
+ fallthrough;
default:
sock_set_flag(sk, SOCK_ZAPPED);
@@ -644,7 +644,8 @@ static int rfcomm_sock_recvmsg(struct socket *sock, struct msghdr *msg,
return len;
}
-static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen)
+static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname,
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
int err = 0;
@@ -656,7 +657,7 @@ static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __u
switch (optname) {
case RFCOMM_LM:
- if (get_user(opt, (u32 __user *) optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
err = -EFAULT;
break;
}
@@ -685,7 +686,8 @@ static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __u
return err;
}
-static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
+static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct bt_security sec;
@@ -713,7 +715,7 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, c
sec.level = BT_SECURITY_LOW;
len = min_t(unsigned int, sizeof(sec), optlen);
- if (copy_from_user((char *) &sec, optval, len)) {
+ if (copy_from_sockptr(&sec, optval, len)) {
err = -EFAULT;
break;
}
@@ -732,7 +734,7 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, c
break;
}
- if (get_user(opt, (u32 __user *) optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
err = -EFAULT;
break;
}
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index c8c3d38cdc7b..dcf7f96ff417 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -66,6 +66,7 @@ struct sco_pinfo {
bdaddr_t dst;
__u32 flags;
__u16 setting;
+ __u8 cmsg_mask;
struct sco_conn *conn;
};
@@ -449,6 +450,15 @@ static void sco_sock_close(struct sock *sk)
sco_sock_kill(sk);
}
+static void sco_skb_put_cmsg(struct sk_buff *skb, struct msghdr *msg,
+ struct sock *sk)
+{
+ if (sco_pi(sk)->cmsg_mask & SCO_CMSG_PKT_STATUS)
+ put_cmsg(msg, SOL_BLUETOOTH, BT_SCM_PKT_STATUS,
+ sizeof(bt_cb(skb)->sco.pkt_status),
+ &bt_cb(skb)->sco.pkt_status);
+}
+
static void sco_sock_init(struct sock *sk, struct sock *parent)
{
BT_DBG("sk %p", sk);
@@ -457,6 +467,8 @@ static void sco_sock_init(struct sock *sk, struct sock *parent)
sk->sk_type = parent->sk_type;
bt_sk(sk)->flags = bt_sk(parent)->flags;
security_sk_clone(parent, sk);
+ } else {
+ bt_sk(sk)->skb_put_cmsg = sco_skb_put_cmsg;
}
}
@@ -791,7 +803,7 @@ static int sco_sock_recvmsg(struct socket *sock, struct msghdr *msg,
}
static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
int len, err = 0;
@@ -810,7 +822,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (get_user(opt, (u32 __user *) optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
err = -EFAULT;
break;
}
@@ -831,7 +843,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
voice.setting = sco_pi(sk)->setting;
len = min_t(unsigned int, sizeof(voice), optlen);
- if (copy_from_user((char *)&voice, optval, len)) {
+ if (copy_from_sockptr(&voice, optval, len)) {
err = -EFAULT;
break;
}
@@ -846,6 +858,18 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
sco_pi(sk)->setting = voice.setting;
break;
+ case BT_PKT_STATUS:
+ if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
+ err = -EFAULT;
+ break;
+ }
+
+ if (opt)
+ sco_pi(sk)->cmsg_mask |= SCO_CMSG_PKT_STATUS;
+ else
+ sco_pi(sk)->cmsg_mask &= SCO_CMSG_PKT_STATUS;
+ break;
+
default:
err = -ENOPROTOOPT;
break;
@@ -923,6 +947,7 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname,
int len, err = 0;
struct bt_voice voice;
u32 phys;
+ int pkt_status;
BT_DBG("sk %p", sk);
@@ -969,6 +994,13 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname,
err = -EFAULT;
break;
+ case BT_PKT_STATUS:
+ pkt_status = (sco_pi(sk)->cmsg_mask & SCO_CMSG_PKT_STATUS);
+
+ if (put_user(pkt_status, (int __user *)optval))
+ err = -EFAULT;
+ break;
+
default:
err = -ENOPROTOOPT;
break;
diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c
index 03e3c89c3046..f71c6fa65fb3 100644
--- a/net/bluetooth/selftest.c
+++ b/net/bluetooth/selftest.c
@@ -205,7 +205,7 @@ static int __init test_ecdh(void)
calltime = ktime_get();
- tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
+ tfm = crypto_alloc_kpp("ecdh", 0, 0);
if (IS_ERR(tfm)) {
BT_ERR("Unable to create ECDH crypto context");
err = PTR_ERR(tfm);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index c2c5ab05fa7e..433227f96c73 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -1387,7 +1387,7 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
goto zfree_smp;
}
- smp->tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
+ smp->tfm_ecdh = crypto_alloc_kpp("ecdh", 0, 0);
if (IS_ERR(smp->tfm_ecdh)) {
BT_ERR("Unable to create ECDH crypto context");
goto free_shash;
@@ -1654,7 +1654,7 @@ int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey)
memset(smp->tk, 0, sizeof(smp->tk));
BT_DBG("PassKey: %d", value);
put_unaligned_le32(value, smp->tk);
- /* Fall Through */
+ fallthrough;
case MGMT_OP_USER_CONFIRM_REPLY:
set_bit(SMP_FLAG_TK_VALID, &smp->flags);
break;
@@ -3282,7 +3282,7 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
return ERR_CAST(tfm_cmac);
}
- tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
+ tfm_ecdh = crypto_alloc_kpp("ecdh", 0, 0);
if (IS_ERR(tfm_ecdh)) {
BT_ERR("Unable to create ECDH crypto context");
crypto_free_shash(tfm_cmac);
@@ -3847,7 +3847,7 @@ int __init bt_selftest_smp(void)
return PTR_ERR(tfm_cmac);
}
- tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
+ tfm_ecdh = crypto_alloc_kpp("ecdh", 0, 0);
if (IS_ERR(tfm_ecdh)) {
BT_ERR("Unable to create ECDH crypto context");
crypto_free_shash(tfm_cmac);
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index b03c469cd01f..99eb8c6c0fbc 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -327,6 +327,12 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
/* priority is allowed */
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, priority),
+ offsetof(struct __sk_buff, ifindex)))
+ return -EINVAL;
+
+ /* ifindex is allowed */
+
+ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, ifindex),
offsetof(struct __sk_buff, cb)))
return -EINVAL;
@@ -381,6 +387,7 @@ static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
__skb->mark = skb->mark;
__skb->priority = skb->priority;
+ __skb->ifindex = skb->dev->ifindex;
__skb->tstamp = skb->tstamp;
memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN);
__skb->wire_len = cb->pkt_len;
@@ -391,6 +398,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
bool is_l2 = false, is_direct_pkt_access = false;
+ struct net *net = current->nsproxy->net_ns;
+ struct net_device *dev = net->loopback_dev;
u32 size = kattr->test.data_size_in;
u32 repeat = kattr->test.repeat;
struct __sk_buff *ctx = NULL;
@@ -432,7 +441,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
kfree(ctx);
return -ENOMEM;
}
- sock_net_set(sk, current->nsproxy->net_ns);
+ sock_net_set(sk, net);
sock_init_data(NULL, sk);
skb = build_skb(data, 0);
@@ -446,9 +455,37 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
__skb_put(skb, size);
- skb->protocol = eth_type_trans(skb, current->nsproxy->net_ns->loopback_dev);
+ if (ctx && ctx->ifindex > 1) {
+ dev = dev_get_by_index(net, ctx->ifindex);
+ if (!dev) {
+ ret = -ENODEV;
+ goto out;
+ }
+ }
+ skb->protocol = eth_type_trans(skb, dev);
skb_reset_network_header(skb);
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ sk->sk_family = AF_INET;
+ if (sizeof(struct iphdr) <= skb_headlen(skb)) {
+ sk->sk_rcv_saddr = ip_hdr(skb)->saddr;
+ sk->sk_daddr = ip_hdr(skb)->daddr;
+ }
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ sk->sk_family = AF_INET6;
+ if (sizeof(struct ipv6hdr) <= skb_headlen(skb)) {
+ sk->sk_v6_rcv_saddr = ipv6_hdr(skb)->saddr;
+ sk->sk_v6_daddr = ipv6_hdr(skb)->daddr;
+ }
+ break;
+#endif
+ default:
+ break;
+ }
+
if (is_l2)
__skb_push(skb, hh_len);
if (is_direct_pkt_access)
@@ -481,6 +518,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
ret = bpf_ctx_finish(kattr, uattr, ctx,
sizeof(struct __sk_buff));
out:
+ if (dev && dev != net->loopback_dev)
+ dev_put(dev);
kfree_skb(skb);
bpf_sk_storage_free(sk);
kfree(sk);
diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig
index 84015ef3ee27..73d0b12789f1 100644
--- a/net/bpfilter/Kconfig
+++ b/net/bpfilter/Kconfig
@@ -9,12 +9,14 @@ menuconfig BPFILTER
if BPFILTER
config BPFILTER_UMH
tristate "bpfilter kernel module with user mode helper"
- depends on CC_CAN_LINK_STATIC
+ depends on CC_CAN_LINK
+ depends on m || CC_CAN_LINK_STATIC
default m
help
This builds bpfilter kernel module with embedded user mode helper
- Note: your toolchain must support building static binaries, since
- rootfs isn't mounted at the time when __init functions are called
- and do_execv won't be able to find the elf interpreter.
+ Note: To compile this as built-in, your toolchain must support
+ building static binaries, since rootfs isn't mounted at the time
+ when __init functions are called and do_execv won't be able to find
+ the elf interpreter.
endif
diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile
index f23b53294fba..cdac82b8c53a 100644
--- a/net/bpfilter/Makefile
+++ b/net/bpfilter/Makefile
@@ -7,10 +7,12 @@ userprogs := bpfilter_umh
bpfilter_umh-objs := main.o
userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi
+ifeq ($(CONFIG_BPFILTER_UMH), y)
# builtin bpfilter_umh should be linked with -static
# since rootfs isn't mounted at the time of __init
# function is called and do_execv won't find elf interpreter
userldflags += -static
+endif
$(obj)/bpfilter_umh_blob.o: $(obj)/bpfilter_umh
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index 1905e01c3aa9..51a941b56ec3 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c
@@ -15,15 +15,13 @@ extern char bpfilter_umh_end;
static void shutdown_umh(void)
{
- struct task_struct *tsk;
+ struct umd_info *info = &bpfilter_ops.info;
+ struct pid *tgid = info->tgid;
- if (bpfilter_ops.stop)
- return;
-
- tsk = get_pid_task(find_vpid(bpfilter_ops.info.pid), PIDTYPE_PID);
- if (tsk) {
- send_sig(SIGKILL, tsk, 1);
- put_task_struct(tsk);
+ if (tgid) {
+ kill_pid(tgid, SIGKILL, 1);
+ wait_event(tgid->wait_pidfd, thread_group_exited(tgid));
+ bpfilter_umh_cleanup(info);
}
}
@@ -33,60 +31,65 @@ static void __stop_umh(void)
shutdown_umh();
}
-static int __bpfilter_process_sockopt(struct sock *sk, int optname,
- char __user *optval,
- unsigned int optlen, bool is_set)
+static int bpfilter_send_req(struct mbox_request *req)
{
- struct mbox_request req;
struct mbox_reply reply;
- loff_t pos;
+ loff_t pos = 0;
ssize_t n;
- int ret = -EFAULT;
-
- req.is_set = is_set;
- req.pid = current->pid;
- req.cmd = optname;
- req.addr = (long __force __user)optval;
- req.len = optlen;
- if (!bpfilter_ops.info.pid)
- goto out;
- n = kernel_write(bpfilter_ops.info.pipe_to_umh, &req, sizeof(req),
+
+ if (!bpfilter_ops.info.tgid)
+ return -EFAULT;
+ pos = 0;
+ n = kernel_write(bpfilter_ops.info.pipe_to_umh, req, sizeof(*req),
&pos);
- if (n != sizeof(req)) {
+ if (n != sizeof(*req)) {
pr_err("write fail %zd\n", n);
- __stop_umh();
- ret = -EFAULT;
- goto out;
+ goto stop;
}
pos = 0;
n = kernel_read(bpfilter_ops.info.pipe_from_umh, &reply, sizeof(reply),
&pos);
if (n != sizeof(reply)) {
pr_err("read fail %zd\n", n);
- __stop_umh();
- ret = -EFAULT;
- goto out;
+ goto stop;
}
- ret = reply.status;
-out:
- return ret;
+ return reply.status;
+stop:
+ __stop_umh();
+ return -EFAULT;
+}
+
+static int bpfilter_process_sockopt(struct sock *sk, int optname,
+ sockptr_t optval, unsigned int optlen,
+ bool is_set)
+{
+ struct mbox_request req = {
+ .is_set = is_set,
+ .pid = current->pid,
+ .cmd = optname,
+ .addr = (uintptr_t)optval.user,
+ .len = optlen,
+ };
+ if (uaccess_kernel() || sockptr_is_kernel(optval)) {
+ pr_err("kernel access not supported\n");
+ return -EFAULT;
+ }
+ return bpfilter_send_req(&req);
}
static int start_umh(void)
{
+ struct mbox_request req = { .pid = current->pid };
int err;
/* fork usermode process */
- err = fork_usermode_blob(&bpfilter_umh_start,
- &bpfilter_umh_end - &bpfilter_umh_start,
- &bpfilter_ops.info);
+ err = fork_usermode_driver(&bpfilter_ops.info);
if (err)
return err;
- bpfilter_ops.stop = false;
- pr_info("Loaded bpfilter_umh pid %d\n", bpfilter_ops.info.pid);
+ pr_info("Loaded bpfilter_umh pid %d\n", pid_nr(bpfilter_ops.info.tgid));
/* health check that usermode process started correctly */
- if (__bpfilter_process_sockopt(NULL, 0, NULL, 0, 0) != 0) {
+ if (bpfilter_send_req(&req) != 0) {
shutdown_umh();
return -EFAULT;
}
@@ -98,18 +101,21 @@ static int __init load_umh(void)
{
int err;
+ err = umd_load_blob(&bpfilter_ops.info,
+ &bpfilter_umh_start,
+ &bpfilter_umh_end - &bpfilter_umh_start);
+ if (err)
+ return err;
+
mutex_lock(&bpfilter_ops.lock);
- if (!bpfilter_ops.stop) {
- err = -EFAULT;
- goto out;
- }
err = start_umh();
if (!err && IS_ENABLED(CONFIG_INET)) {
- bpfilter_ops.sockopt = &__bpfilter_process_sockopt;
+ bpfilter_ops.sockopt = &bpfilter_process_sockopt;
bpfilter_ops.start = &start_umh;
}
-out:
mutex_unlock(&bpfilter_ops.lock);
+ if (err)
+ umd_unload_blob(&bpfilter_ops.info);
return err;
}
@@ -122,6 +128,8 @@ static void __exit fini_umh(void)
bpfilter_ops.sockopt = NULL;
}
mutex_unlock(&bpfilter_ops.lock);
+
+ umd_unload_blob(&bpfilter_ops.info);
}
module_init(load_umh);
module_exit(fini_umh);
diff --git a/net/bpfilter/bpfilter_umh_blob.S b/net/bpfilter/bpfilter_umh_blob.S
index 9ea6100dca87..40311d10d2f2 100644
--- a/net/bpfilter/bpfilter_umh_blob.S
+++ b/net/bpfilter/bpfilter_umh_blob.S
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
- .section .rodata, "a"
+ .section .init.rodata, "a"
.global bpfilter_umh_start
bpfilter_umh_start:
.incbin "net/bpfilter/bpfilter_umh"
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 8c7b78f8bc23..9a2fb4aa1a10 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -36,6 +36,8 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
const unsigned char *dest;
u16 vid = 0;
+ memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
+
rcu_read_lock();
nf_ops = rcu_dereference(nf_br_ops);
if (nf_ops && nf_ops->br_dev_xmit_hook(skb)) {
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 4877a0db16c6..9db504baa094 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -349,12 +349,21 @@ void br_fdb_cleanup(struct work_struct *work)
*/
rcu_read_lock();
hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
- unsigned long this_timer;
+ unsigned long this_timer = f->updated + delay;
if (test_bit(BR_FDB_STATIC, &f->flags) ||
- test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags))
+ test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags)) {
+ if (test_bit(BR_FDB_NOTIFY, &f->flags)) {
+ if (time_after(this_timer, now))
+ work_delay = min(work_delay,
+ this_timer - now);
+ else if (!test_and_set_bit(BR_FDB_NOTIFY_INACTIVE,
+ &f->flags))
+ fdb_notify(br, f, RTM_NEWNEIGH, false);
+ }
continue;
- this_timer = f->updated + delay;
+ }
+
if (time_after(this_timer, now)) {
work_delay = min(work_delay, this_timer - now);
} else {
@@ -556,11 +565,17 @@ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
return ret;
}
+/* returns true if the fdb was modified */
+static bool __fdb_mark_active(struct net_bridge_fdb_entry *fdb)
+{
+ return !!(test_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags) &&
+ test_and_clear_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags));
+}
+
void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid, unsigned long flags)
{
struct net_bridge_fdb_entry *fdb;
- bool fdb_modified = false;
/* some users want to always flood. */
if (hold_time(br) == 0)
@@ -575,6 +590,12 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
source->dev->name, addr, vid);
} else {
unsigned long now = jiffies;
+ bool fdb_modified = false;
+
+ if (now != fdb->updated) {
+ fdb->updated = now;
+ fdb_modified = __fdb_mark_active(fdb);
+ }
/* fastpath: update of existing entry */
if (unlikely(source != fdb->dst &&
@@ -587,8 +608,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
clear_bit(BR_FDB_ADDED_BY_EXT_LEARN,
&fdb->flags);
}
- if (now != fdb->updated)
- fdb->updated = now;
+
if (unlikely(test_bit(BR_FDB_ADDED_BY_USER, &flags)))
set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
if (unlikely(fdb_modified)) {
@@ -667,6 +687,23 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
&fdb->key.vlan_id))
goto nla_put_failure;
+ if (test_bit(BR_FDB_NOTIFY, &fdb->flags)) {
+ struct nlattr *nest = nla_nest_start(skb, NDA_FDB_EXT_ATTRS);
+ u8 notify_bits = FDB_NOTIFY_BIT;
+
+ if (!nest)
+ goto nla_put_failure;
+ if (test_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags))
+ notify_bits |= FDB_NOTIFY_INACTIVE_BIT;
+
+ if (nla_put_u8(skb, NFEA_ACTIVITY_NOTIFY, notify_bits)) {
+ nla_nest_cancel(skb, nest);
+ goto nla_put_failure;
+ }
+
+ nla_nest_end(skb, nest);
+ }
+
nlmsg_end(skb, nlh);
return 0;
@@ -681,7 +718,9 @@ static inline size_t fdb_nlmsg_size(void)
+ nla_total_size(ETH_ALEN) /* NDA_LLADDR */
+ nla_total_size(sizeof(u32)) /* NDA_MASTER */
+ nla_total_size(sizeof(u16)) /* NDA_VLAN */
- + nla_total_size(sizeof(struct nda_cacheinfo));
+ + nla_total_size(sizeof(struct nda_cacheinfo))
+ + nla_total_size(0) /* NDA_FDB_EXT_ATTRS */
+ + nla_total_size(sizeof(u8)); /* NFEA_ACTIVITY_NOTIFY */
}
static void fdb_notify(struct net_bridge *br,
@@ -791,14 +830,41 @@ errout:
return err;
}
+/* returns true if the fdb is modified */
+static bool fdb_handle_notify(struct net_bridge_fdb_entry *fdb, u8 notify)
+{
+ bool modified = false;
+
+ /* allow to mark an entry as inactive, usually done on creation */
+ if ((notify & FDB_NOTIFY_INACTIVE_BIT) &&
+ !test_and_set_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags))
+ modified = true;
+
+ if ((notify & FDB_NOTIFY_BIT) &&
+ !test_and_set_bit(BR_FDB_NOTIFY, &fdb->flags)) {
+ /* enabled activity tracking */
+ modified = true;
+ } else if (!(notify & FDB_NOTIFY_BIT) &&
+ test_and_clear_bit(BR_FDB_NOTIFY, &fdb->flags)) {
+ /* disabled activity tracking, clear notify state */
+ clear_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags);
+ modified = true;
+ }
+
+ return modified;
+}
+
/* Update (create or replace) forwarding database entry */
static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
- const u8 *addr, u16 state, u16 flags, u16 vid,
- u8 ndm_flags)
+ const u8 *addr, struct ndmsg *ndm, u16 flags, u16 vid,
+ struct nlattr *nfea_tb[])
{
- bool is_sticky = !!(ndm_flags & NTF_STICKY);
+ bool is_sticky = !!(ndm->ndm_flags & NTF_STICKY);
+ bool refresh = !nfea_tb[NFEA_DONT_REFRESH];
struct net_bridge_fdb_entry *fdb;
+ u16 state = ndm->ndm_state;
bool modified = false;
+ u8 notify = 0;
/* If the port cannot learn allow only local and static entries */
if (source && !(state & NUD_PERMANENT) && !(state & NUD_NOARP) &&
@@ -815,6 +881,13 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
if (is_sticky && (state & NUD_PERMANENT))
return -EINVAL;
+ if (nfea_tb[NFEA_ACTIVITY_NOTIFY]) {
+ notify = nla_get_u8(nfea_tb[NFEA_ACTIVITY_NOTIFY]);
+ if ((notify & ~BR_FDB_NOTIFY_SETTABLE_BITS) ||
+ (notify & BR_FDB_NOTIFY_SETTABLE_BITS) == FDB_NOTIFY_INACTIVE_BIT)
+ return -EINVAL;
+ }
+
fdb = br_fdb_find(br, addr, vid);
if (fdb == NULL) {
if (!(flags & NLM_F_CREATE))
@@ -858,11 +931,15 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
modified = true;
}
+ if (fdb_handle_notify(fdb, notify))
+ modified = true;
+
set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
fdb->used = jiffies;
if (modified) {
- fdb->updated = jiffies;
+ if (refresh)
+ fdb->updated = jiffies;
fdb_notify(br, fdb, RTM_NEWNEIGH, true);
}
@@ -871,7 +948,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
struct net_bridge_port *p, const unsigned char *addr,
- u16 nlh_flags, u16 vid)
+ u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[])
{
int err = 0;
@@ -893,20 +970,25 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
err = br_fdb_external_learn_add(br, p, addr, vid, true);
} else {
spin_lock_bh(&br->hash_lock);
- err = fdb_add_entry(br, p, addr, ndm->ndm_state,
- nlh_flags, vid, ndm->ndm_flags);
+ err = fdb_add_entry(br, p, addr, ndm, nlh_flags, vid, nfea_tb);
spin_unlock_bh(&br->hash_lock);
}
return err;
}
+static const struct nla_policy br_nda_fdb_pol[NFEA_MAX + 1] = {
+ [NFEA_ACTIVITY_NOTIFY] = { .type = NLA_U8 },
+ [NFEA_DONT_REFRESH] = { .type = NLA_FLAG },
+};
+
/* Add new permanent fdb entry with RTM_NEWNEIGH */
int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr, u16 vid, u16 nlh_flags,
struct netlink_ext_ack *extack)
{
+ struct nlattr *nfea_tb[NFEA_MAX + 1], *attr;
struct net_bridge_vlan_group *vg;
struct net_bridge_port *p = NULL;
struct net_bridge_vlan *v;
@@ -939,6 +1021,16 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
vg = nbp_vlan_group(p);
}
+ if (tb[NDA_FDB_EXT_ATTRS]) {
+ attr = tb[NDA_FDB_EXT_ATTRS];
+ err = nla_parse_nested(nfea_tb, NFEA_MAX, attr,
+ br_nda_fdb_pol, extack);
+ if (err)
+ return err;
+ } else {
+ memset(nfea_tb, 0, sizeof(struct nlattr *) * (NFEA_MAX + 1));
+ }
+
if (vid) {
v = br_vlan_find(vg, vid);
if (!v || !br_vlan_should_use(v)) {
@@ -947,9 +1039,9 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
}
/* VID was specified, so use it. */
- err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid);
+ err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb);
} else {
- err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0);
+ err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb);
if (err || !vg || !vg->num_vlans)
goto out;
@@ -960,7 +1052,8 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
list_for_each_entry(v, &vg->vlan_list, vlist) {
if (!br_vlan_should_use(v))
continue;
- err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid);
+ err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid,
+ nfea_tb);
if (err)
goto out;
}
diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c
index 90592af9db61..b36689e6e7cb 100644
--- a/net/bridge/br_mrp.c
+++ b/net/bridge/br_mrp.c
@@ -4,6 +4,27 @@
#include "br_private_mrp.h"
static const u8 mrp_test_dmac[ETH_ALEN] = { 0x1, 0x15, 0x4e, 0x0, 0x0, 0x1 };
+static const u8 mrp_in_test_dmac[ETH_ALEN] = { 0x1, 0x15, 0x4e, 0x0, 0x0, 0x3 };
+
+static bool br_mrp_is_ring_port(struct net_bridge_port *p_port,
+ struct net_bridge_port *s_port,
+ struct net_bridge_port *port)
+{
+ if (port == p_port ||
+ port == s_port)
+ return true;
+
+ return false;
+}
+
+static bool br_mrp_is_in_port(struct net_bridge_port *i_port,
+ struct net_bridge_port *port)
+{
+ if (port == i_port)
+ return true;
+
+ return false;
+}
static struct net_bridge_port *br_mrp_get_port(struct net_bridge *br,
u32 ifindex)
@@ -37,6 +58,22 @@ static struct br_mrp *br_mrp_find_id(struct net_bridge *br, u32 ring_id)
return res;
}
+static struct br_mrp *br_mrp_find_in_id(struct net_bridge *br, u32 in_id)
+{
+ struct br_mrp *res = NULL;
+ struct br_mrp *mrp;
+
+ list_for_each_entry_rcu(mrp, &br->mrp_list, list,
+ lockdep_rtnl_is_held()) {
+ if (mrp->in_id == in_id) {
+ res = mrp;
+ break;
+ }
+ }
+
+ return res;
+}
+
static bool br_mrp_unique_ifindex(struct net_bridge *br, u32 ifindex)
{
struct br_mrp *mrp;
@@ -52,6 +89,10 @@ static bool br_mrp_unique_ifindex(struct net_bridge *br, u32 ifindex)
p = rtnl_dereference(mrp->s_port);
if (p && p->dev->ifindex == ifindex)
return false;
+
+ p = rtnl_dereference(mrp->i_port);
+ if (p && p->dev->ifindex == ifindex)
+ return false;
}
return true;
@@ -66,7 +107,8 @@ static struct br_mrp *br_mrp_find_port(struct net_bridge *br,
list_for_each_entry_rcu(mrp, &br->mrp_list, list,
lockdep_rtnl_is_held()) {
if (rcu_access_pointer(mrp->p_port) == p ||
- rcu_access_pointer(mrp->s_port) == p) {
+ rcu_access_pointer(mrp->s_port) == p ||
+ rcu_access_pointer(mrp->i_port) == p) {
res = mrp;
break;
}
@@ -160,6 +202,36 @@ static struct sk_buff *br_mrp_alloc_test_skb(struct br_mrp *mrp,
return skb;
}
+static struct sk_buff *br_mrp_alloc_in_test_skb(struct br_mrp *mrp,
+ struct net_bridge_port *p,
+ enum br_mrp_port_role_type port_role)
+{
+ struct br_mrp_in_test_hdr *hdr = NULL;
+ struct sk_buff *skb = NULL;
+
+ if (!p)
+ return NULL;
+
+ skb = br_mrp_skb_alloc(p, p->dev->dev_addr, mrp_in_test_dmac);
+ if (!skb)
+ return NULL;
+
+ br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_IN_TEST, sizeof(*hdr));
+ hdr = skb_put(skb, sizeof(*hdr));
+
+ hdr->id = cpu_to_be16(mrp->in_id);
+ ether_addr_copy(hdr->sa, p->br->dev->dev_addr);
+ hdr->port_role = cpu_to_be16(port_role);
+ hdr->state = cpu_to_be16(mrp->in_state);
+ hdr->transitions = cpu_to_be16(mrp->in_transitions);
+ hdr->timestamp = cpu_to_be32(jiffies_to_msecs(jiffies));
+
+ br_mrp_skb_common(skb, mrp);
+ br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_END, 0x0);
+
+ return skb;
+}
+
/* This function is continuously called in the following cases:
* - when node role is MRM, in this case test_monitor is always set to false
* because it needs to notify the userspace that the ring is open and needs to
@@ -213,7 +285,7 @@ static void br_mrp_test_work_expired(struct work_struct *work)
}
if (notify_open && !mrp->ring_role_offloaded)
- br_mrp_port_open(p->dev, true);
+ br_mrp_ring_port_open(p->dev, true);
}
p = rcu_dereference(mrp->s_port);
@@ -229,7 +301,7 @@ static void br_mrp_test_work_expired(struct work_struct *work)
}
if (notify_open && !mrp->ring_role_offloaded)
- br_mrp_port_open(p->dev, true);
+ br_mrp_ring_port_open(p->dev, true);
}
out:
@@ -239,6 +311,83 @@ out:
usecs_to_jiffies(mrp->test_interval));
}
+/* This function is continuously called when the node has the interconnect role
+ * MIM. It would generate interconnect test frames and will send them on all 3
+ * ports. But will also check if it stop receiving interconnect test frames.
+ */
+static void br_mrp_in_test_work_expired(struct work_struct *work)
+{
+ struct delayed_work *del_work = to_delayed_work(work);
+ struct br_mrp *mrp = container_of(del_work, struct br_mrp, in_test_work);
+ struct net_bridge_port *p;
+ bool notify_open = false;
+ struct sk_buff *skb;
+
+ if (time_before_eq(mrp->in_test_end, jiffies))
+ return;
+
+ if (mrp->in_test_count_miss < mrp->in_test_max_miss) {
+ mrp->in_test_count_miss++;
+ } else {
+ /* Notify that the interconnect ring is open only if the
+ * interconnect ring state is closed, otherwise it would
+ * continue to notify at every interval.
+ */
+ if (mrp->in_state == BR_MRP_IN_STATE_CLOSED)
+ notify_open = true;
+ }
+
+ rcu_read_lock();
+
+ p = rcu_dereference(mrp->p_port);
+ if (p) {
+ skb = br_mrp_alloc_in_test_skb(mrp, p,
+ BR_MRP_PORT_ROLE_PRIMARY);
+ if (!skb)
+ goto out;
+
+ skb_reset_network_header(skb);
+ dev_queue_xmit(skb);
+
+ if (notify_open && !mrp->in_role_offloaded)
+ br_mrp_in_port_open(p->dev, true);
+ }
+
+ p = rcu_dereference(mrp->s_port);
+ if (p) {
+ skb = br_mrp_alloc_in_test_skb(mrp, p,
+ BR_MRP_PORT_ROLE_SECONDARY);
+ if (!skb)
+ goto out;
+
+ skb_reset_network_header(skb);
+ dev_queue_xmit(skb);
+
+ if (notify_open && !mrp->in_role_offloaded)
+ br_mrp_in_port_open(p->dev, true);
+ }
+
+ p = rcu_dereference(mrp->i_port);
+ if (p) {
+ skb = br_mrp_alloc_in_test_skb(mrp, p,
+ BR_MRP_PORT_ROLE_INTER);
+ if (!skb)
+ goto out;
+
+ skb_reset_network_header(skb);
+ dev_queue_xmit(skb);
+
+ if (notify_open && !mrp->in_role_offloaded)
+ br_mrp_in_port_open(p->dev, true);
+ }
+
+out:
+ rcu_read_unlock();
+
+ queue_delayed_work(system_wq, &mrp->in_test_work,
+ usecs_to_jiffies(mrp->in_test_interval));
+}
+
/* Deletes the MRP instance.
* note: called under rtnl_lock
*/
@@ -251,6 +400,10 @@ static void br_mrp_del_impl(struct net_bridge *br, struct br_mrp *mrp)
cancel_delayed_work_sync(&mrp->test_work);
br_mrp_switchdev_send_ring_test(br, mrp, 0, 0, 0, 0);
+ /* Stop sending MRP_InTest frames if has an interconnect role */
+ cancel_delayed_work_sync(&mrp->in_test_work);
+ br_mrp_switchdev_send_in_test(br, mrp, 0, 0, 0);
+
br_mrp_switchdev_del(br, mrp);
/* Reset the ports */
@@ -278,6 +431,18 @@ static void br_mrp_del_impl(struct net_bridge *br, struct br_mrp *mrp)
rcu_assign_pointer(mrp->s_port, NULL);
}
+ p = rtnl_dereference(mrp->i_port);
+ if (p) {
+ spin_lock_bh(&br->lock);
+ state = netif_running(br->dev) ?
+ BR_STATE_FORWARDING : BR_STATE_DISABLED;
+ p->state = state;
+ p->flags &= ~BR_MRP_AWARE;
+ spin_unlock_bh(&br->lock);
+ br_mrp_port_switchdev_set_state(p, state);
+ rcu_assign_pointer(mrp->i_port, NULL);
+ }
+
list_del_rcu(&mrp->list);
kfree_rcu(mrp, rcu);
}
@@ -329,6 +494,7 @@ int br_mrp_add(struct net_bridge *br, struct br_mrp_instance *instance)
rcu_assign_pointer(mrp->s_port, p);
INIT_DELAYED_WORK(&mrp->test_work, br_mrp_test_work_expired);
+ INIT_DELAYED_WORK(&mrp->in_test_work, br_mrp_in_test_work_expired);
list_add_tail_rcu(&mrp->list, &br->mrp_list);
err = br_mrp_switchdev_add(br, mrp);
@@ -511,6 +677,180 @@ int br_mrp_start_test(struct net_bridge *br,
return 0;
}
+/* Set in state, int state can be only Open or Closed
+ * note: already called with rtnl_lock
+ */
+int br_mrp_set_in_state(struct net_bridge *br, struct br_mrp_in_state *state)
+{
+ struct br_mrp *mrp = br_mrp_find_in_id(br, state->in_id);
+
+ if (!mrp)
+ return -EINVAL;
+
+ if (mrp->in_state == BR_MRP_IN_STATE_CLOSED &&
+ state->in_state != BR_MRP_IN_STATE_CLOSED)
+ mrp->in_transitions++;
+
+ mrp->in_state = state->in_state;
+
+ br_mrp_switchdev_set_in_state(br, mrp, state->in_state);
+
+ return 0;
+}
+
+/* Set in role, in role can be only MIM(Media Interconnection Manager) or
+ * MIC(Media Interconnection Client).
+ * note: already called with rtnl_lock
+ */
+int br_mrp_set_in_role(struct net_bridge *br, struct br_mrp_in_role *role)
+{
+ struct br_mrp *mrp = br_mrp_find_id(br, role->ring_id);
+ struct net_bridge_port *p;
+ int err;
+
+ if (!mrp)
+ return -EINVAL;
+
+ if (!br_mrp_get_port(br, role->i_ifindex))
+ return -EINVAL;
+
+ if (role->in_role == BR_MRP_IN_ROLE_DISABLED) {
+ u8 state;
+
+ /* It is not allowed to disable a port that doesn't exist */
+ p = rtnl_dereference(mrp->i_port);
+ if (!p)
+ return -EINVAL;
+
+ /* Stop the generating MRP_InTest frames */
+ cancel_delayed_work_sync(&mrp->in_test_work);
+ br_mrp_switchdev_send_in_test(br, mrp, 0, 0, 0);
+
+ /* Remove the port */
+ spin_lock_bh(&br->lock);
+ state = netif_running(br->dev) ?
+ BR_STATE_FORWARDING : BR_STATE_DISABLED;
+ p->state = state;
+ p->flags &= ~BR_MRP_AWARE;
+ spin_unlock_bh(&br->lock);
+ br_mrp_port_switchdev_set_state(p, state);
+ rcu_assign_pointer(mrp->i_port, NULL);
+
+ mrp->in_role = role->in_role;
+ mrp->in_id = 0;
+
+ return 0;
+ }
+
+ /* It is not possible to have the same port part of multiple rings */
+ if (!br_mrp_unique_ifindex(br, role->i_ifindex))
+ return -EINVAL;
+
+ /* It is not allowed to set a different interconnect port if the mrp
+ * instance has already one. First it needs to be disabled and after
+ * that set the new port
+ */
+ if (rcu_access_pointer(mrp->i_port))
+ return -EINVAL;
+
+ p = br_mrp_get_port(br, role->i_ifindex);
+ spin_lock_bh(&br->lock);
+ p->state = BR_STATE_FORWARDING;
+ p->flags |= BR_MRP_AWARE;
+ spin_unlock_bh(&br->lock);
+ rcu_assign_pointer(mrp->i_port, p);
+
+ mrp->in_role = role->in_role;
+ mrp->in_id = role->in_id;
+
+ /* If there is an error just bailed out */
+ err = br_mrp_switchdev_set_in_role(br, mrp, role->in_id,
+ role->ring_id, role->in_role);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ /* Now detect if the HW actually applied the role or not. If the HW
+ * applied the role it means that the SW will not to do those operations
+ * anymore. For example if the role is MIM then the HW will notify the
+ * SW when interconnect ring is open, but if the is not pushed to the HW
+ * the SW will need to detect when the interconnect ring is open.
+ */
+ mrp->in_role_offloaded = err == -EOPNOTSUPP ? 0 : 1;
+
+ return 0;
+}
+
+/* Start to generate MRP_InTest frames, the frames are generated by
+ * HW and if it fails, they are generated by the SW.
+ * note: already called with rtnl_lock
+ */
+int br_mrp_start_in_test(struct net_bridge *br,
+ struct br_mrp_start_in_test *in_test)
+{
+ struct br_mrp *mrp = br_mrp_find_in_id(br, in_test->in_id);
+
+ if (!mrp)
+ return -EINVAL;
+
+ if (mrp->in_role != BR_MRP_IN_ROLE_MIM)
+ return -EINVAL;
+
+ /* Try to push it to the HW and if it fails then continue with SW
+ * implementation and if that also fails then return error.
+ */
+ if (!br_mrp_switchdev_send_in_test(br, mrp, in_test->interval,
+ in_test->max_miss, in_test->period))
+ return 0;
+
+ mrp->in_test_interval = in_test->interval;
+ mrp->in_test_end = jiffies + usecs_to_jiffies(in_test->period);
+ mrp->in_test_max_miss = in_test->max_miss;
+ mrp->in_test_count_miss = 0;
+ queue_delayed_work(system_wq, &mrp->in_test_work,
+ usecs_to_jiffies(in_test->interval));
+
+ return 0;
+}
+
+/* Determin if the frame type is a ring frame */
+static bool br_mrp_ring_frame(struct sk_buff *skb)
+{
+ const struct br_mrp_tlv_hdr *hdr;
+ struct br_mrp_tlv_hdr _hdr;
+
+ hdr = skb_header_pointer(skb, sizeof(uint16_t), sizeof(_hdr), &_hdr);
+ if (!hdr)
+ return false;
+
+ if (hdr->type == BR_MRP_TLV_HEADER_RING_TEST ||
+ hdr->type == BR_MRP_TLV_HEADER_RING_TOPO ||
+ hdr->type == BR_MRP_TLV_HEADER_RING_LINK_DOWN ||
+ hdr->type == BR_MRP_TLV_HEADER_RING_LINK_UP ||
+ hdr->type == BR_MRP_TLV_HEADER_OPTION)
+ return true;
+
+ return false;
+}
+
+/* Determin if the frame type is an interconnect frame */
+static bool br_mrp_in_frame(struct sk_buff *skb)
+{
+ const struct br_mrp_tlv_hdr *hdr;
+ struct br_mrp_tlv_hdr _hdr;
+
+ hdr = skb_header_pointer(skb, sizeof(uint16_t), sizeof(_hdr), &_hdr);
+ if (!hdr)
+ return false;
+
+ if (hdr->type == BR_MRP_TLV_HEADER_IN_TEST ||
+ hdr->type == BR_MRP_TLV_HEADER_IN_TOPO ||
+ hdr->type == BR_MRP_TLV_HEADER_IN_LINK_DOWN ||
+ hdr->type == BR_MRP_TLV_HEADER_IN_LINK_UP)
+ return true;
+
+ return false;
+}
+
/* Process only MRP Test frame. All the other MRP frames are processed by
* userspace application
* note: already called with rcu_read_lock
@@ -537,7 +877,7 @@ static void br_mrp_mrm_process(struct br_mrp *mrp, struct net_bridge_port *port,
* not closed
*/
if (mrp->ring_state != BR_MRP_RING_STATE_CLOSED)
- br_mrp_port_open(port->dev, false);
+ br_mrp_ring_port_open(port->dev, false);
}
/* Determin if the test hdr has a better priority than the node */
@@ -591,17 +931,92 @@ static void br_mrp_mra_process(struct br_mrp *mrp, struct net_bridge *br,
mrp->test_count_miss = 0;
}
-/* This will just forward the frame to the other mrp ring port(MRC role) or will
- * not do anything.
+/* Process only MRP InTest frame. All the other MRP frames are processed by
+ * userspace application
+ * note: already called with rcu_read_lock
+ */
+static bool br_mrp_mim_process(struct br_mrp *mrp, struct net_bridge_port *port,
+ struct sk_buff *skb)
+{
+ const struct br_mrp_in_test_hdr *in_hdr;
+ struct br_mrp_in_test_hdr _in_hdr;
+ const struct br_mrp_tlv_hdr *hdr;
+ struct br_mrp_tlv_hdr _hdr;
+
+ /* Each MRP header starts with a version field which is 16 bits.
+ * Therefore skip the version and get directly the TLV header.
+ */
+ hdr = skb_header_pointer(skb, sizeof(uint16_t), sizeof(_hdr), &_hdr);
+ if (!hdr)
+ return false;
+
+ /* The check for InTest frame type was already done */
+ in_hdr = skb_header_pointer(skb, sizeof(uint16_t) + sizeof(_hdr),
+ sizeof(_in_hdr), &_in_hdr);
+ if (!in_hdr)
+ return false;
+
+ /* It needs to process only it's own InTest frames. */
+ if (mrp->in_id != ntohs(in_hdr->id))
+ return false;
+
+ mrp->in_test_count_miss = 0;
+
+ /* Notify the userspace that the ring is closed only when the ring is
+ * not closed
+ */
+ if (mrp->in_state != BR_MRP_IN_STATE_CLOSED)
+ br_mrp_in_port_open(port->dev, false);
+
+ return true;
+}
+
+/* Get the MRP frame type
+ * note: already called with rcu_read_lock
+ */
+static u8 br_mrp_get_frame_type(struct sk_buff *skb)
+{
+ const struct br_mrp_tlv_hdr *hdr;
+ struct br_mrp_tlv_hdr _hdr;
+
+ /* Each MRP header starts with a version field which is 16 bits.
+ * Therefore skip the version and get directly the TLV header.
+ */
+ hdr = skb_header_pointer(skb, sizeof(uint16_t), sizeof(_hdr), &_hdr);
+ if (!hdr)
+ return 0xff;
+
+ return hdr->type;
+}
+
+static bool br_mrp_mrm_behaviour(struct br_mrp *mrp)
+{
+ if (mrp->ring_role == BR_MRP_RING_ROLE_MRM ||
+ (mrp->ring_role == BR_MRP_RING_ROLE_MRA && !mrp->test_monitor))
+ return true;
+
+ return false;
+}
+
+static bool br_mrp_mrc_behaviour(struct br_mrp *mrp)
+{
+ if (mrp->ring_role == BR_MRP_RING_ROLE_MRC ||
+ (mrp->ring_role == BR_MRP_RING_ROLE_MRA && mrp->test_monitor))
+ return true;
+
+ return false;
+}
+
+/* This will just forward the frame to the other mrp ring ports, depending on
+ * the frame type, ring role and interconnect role
* note: already called with rcu_read_lock
*/
static int br_mrp_rcv(struct net_bridge_port *p,
struct sk_buff *skb, struct net_device *dev)
{
- struct net_device *s_dev, *p_dev, *d_dev;
- struct net_bridge_port *p_port, *s_port;
+ struct net_bridge_port *p_port, *s_port, *i_port = NULL;
+ struct net_bridge_port *p_dst, *s_dst, *i_dst = NULL;
struct net_bridge *br;
- struct sk_buff *nskb;
struct br_mrp *mrp;
/* If port is disabled don't accept any frames */
@@ -616,46 +1031,139 @@ static int br_mrp_rcv(struct net_bridge_port *p,
p_port = rcu_dereference(mrp->p_port);
if (!p_port)
return 0;
+ p_dst = p_port;
s_port = rcu_dereference(mrp->s_port);
if (!s_port)
return 0;
+ s_dst = s_port;
- /* If the role is MRM then don't forward the frames */
- if (mrp->ring_role == BR_MRP_RING_ROLE_MRM) {
- br_mrp_mrm_process(mrp, p, skb);
- return 1;
- }
-
- /* If the role is MRA then don't forward the frames if it behaves as
- * MRM node
+ /* If the frame is a ring frame then it is not required to check the
+ * interconnect role and ports to process or forward the frame
*/
- if (mrp->ring_role == BR_MRP_RING_ROLE_MRA) {
- if (!mrp->test_monitor) {
+ if (br_mrp_ring_frame(skb)) {
+ /* If the role is MRM then don't forward the frames */
+ if (mrp->ring_role == BR_MRP_RING_ROLE_MRM) {
br_mrp_mrm_process(mrp, p, skb);
- return 1;
+ goto no_forward;
}
- br_mrp_mra_process(mrp, br, p, skb);
+ /* If the role is MRA then don't forward the frames if it
+ * behaves as MRM node
+ */
+ if (mrp->ring_role == BR_MRP_RING_ROLE_MRA) {
+ if (!mrp->test_monitor) {
+ br_mrp_mrm_process(mrp, p, skb);
+ goto no_forward;
+ }
+
+ br_mrp_mra_process(mrp, br, p, skb);
+ }
+
+ goto forward;
}
- /* Clone the frame and forward it on the other MRP port */
- nskb = skb_clone(skb, GFP_ATOMIC);
- if (!nskb)
- return 0;
+ if (br_mrp_in_frame(skb)) {
+ u8 in_type = br_mrp_get_frame_type(skb);
- p_dev = p_port->dev;
- s_dev = s_port->dev;
+ i_port = rcu_dereference(mrp->i_port);
+ i_dst = i_port;
- if (p_dev == dev)
- d_dev = s_dev;
- else
- d_dev = p_dev;
+ /* If the ring port is in block state it should not forward
+ * In_Test frames
+ */
+ if (br_mrp_is_ring_port(p_port, s_port, p) &&
+ p->state == BR_STATE_BLOCKING &&
+ in_type == BR_MRP_TLV_HEADER_IN_TEST)
+ goto no_forward;
+
+ /* Nodes that behaves as MRM needs to stop forwarding the
+ * frames in case the ring is closed, otherwise will be a loop.
+ * In this case the frame is no forward between the ring ports.
+ */
+ if (br_mrp_mrm_behaviour(mrp) &&
+ br_mrp_is_ring_port(p_port, s_port, p) &&
+ (s_port->state != BR_STATE_FORWARDING ||
+ p_port->state != BR_STATE_FORWARDING)) {
+ p_dst = NULL;
+ s_dst = NULL;
+ }
+
+ /* A node that behaves as MRC and doesn't have a interconnect
+ * role then it should forward all frames between the ring ports
+ * because it doesn't have an interconnect port
+ */
+ if (br_mrp_mrc_behaviour(mrp) &&
+ mrp->in_role == BR_MRP_IN_ROLE_DISABLED)
+ goto forward;
+
+ if (mrp->in_role == BR_MRP_IN_ROLE_MIM) {
+ if (in_type == BR_MRP_TLV_HEADER_IN_TEST) {
+ /* MIM should not forward it's own InTest
+ * frames
+ */
+ if (br_mrp_mim_process(mrp, p, skb)) {
+ goto no_forward;
+ } else {
+ if (br_mrp_is_ring_port(p_port, s_port,
+ p))
+ i_dst = NULL;
+
+ if (br_mrp_is_in_port(i_port, p))
+ goto no_forward;
+ }
+ } else {
+ /* MIM should forward IntLinkChange and
+ * IntTopoChange between ring ports but MIM
+ * should not forward IntLinkChange and
+ * IntTopoChange if the frame was received at
+ * the interconnect port
+ */
+ if (br_mrp_is_ring_port(p_port, s_port, p))
+ i_dst = NULL;
+
+ if (br_mrp_is_in_port(i_port, p))
+ goto no_forward;
+ }
+ }
+
+ if (mrp->in_role == BR_MRP_IN_ROLE_MIC) {
+ /* MIC should forward InTest frames on all ports
+ * regardless of the received port
+ */
+ if (in_type == BR_MRP_TLV_HEADER_IN_TEST)
+ goto forward;
+
+ /* MIC should forward IntLinkChange frames only if they
+ * are received on ring ports to all the ports
+ */
+ if (br_mrp_is_ring_port(p_port, s_port, p) &&
+ (in_type == BR_MRP_TLV_HEADER_IN_LINK_UP ||
+ in_type == BR_MRP_TLV_HEADER_IN_LINK_DOWN))
+ goto forward;
+
+ /* Should forward the InTopo frames only between the
+ * ring ports
+ */
+ if (in_type == BR_MRP_TLV_HEADER_IN_TOPO) {
+ i_dst = NULL;
+ goto forward;
+ }
+
+ /* In all the other cases don't forward the frames */
+ goto no_forward;
+ }
+ }
- nskb->dev = d_dev;
- skb_push(nskb, ETH_HLEN);
- dev_queue_xmit(nskb);
+forward:
+ if (p_dst)
+ br_forward(p_dst, skb, true, false);
+ if (s_dst)
+ br_forward(s_dst, skb, true, false);
+ if (i_dst)
+ br_forward(i_dst, skb, true, false);
+no_forward:
return 1;
}
diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c
index 34b3a8776991..2a2fdf3500c5 100644
--- a/net/bridge/br_mrp_netlink.c
+++ b/net/bridge/br_mrp_netlink.c
@@ -14,6 +14,9 @@ static const struct nla_policy br_mrp_policy[IFLA_BRIDGE_MRP_MAX + 1] = {
[IFLA_BRIDGE_MRP_RING_STATE] = { .type = NLA_NESTED },
[IFLA_BRIDGE_MRP_RING_ROLE] = { .type = NLA_NESTED },
[IFLA_BRIDGE_MRP_START_TEST] = { .type = NLA_NESTED },
+ [IFLA_BRIDGE_MRP_IN_ROLE] = { .type = NLA_NESTED },
+ [IFLA_BRIDGE_MRP_IN_STATE] = { .type = NLA_NESTED },
+ [IFLA_BRIDGE_MRP_START_IN_TEST] = { .type = NLA_NESTED },
};
static const struct nla_policy
@@ -235,6 +238,121 @@ static int br_mrp_start_test_parse(struct net_bridge *br, struct nlattr *attr,
return br_mrp_start_test(br, &test);
}
+static const struct nla_policy
+br_mrp_in_state_policy[IFLA_BRIDGE_MRP_IN_STATE_MAX + 1] = {
+ [IFLA_BRIDGE_MRP_IN_STATE_UNSPEC] = { .type = NLA_REJECT },
+ [IFLA_BRIDGE_MRP_IN_STATE_IN_ID] = { .type = NLA_U32 },
+ [IFLA_BRIDGE_MRP_IN_STATE_STATE] = { .type = NLA_U32 },
+};
+
+static int br_mrp_in_state_parse(struct net_bridge *br, struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_BRIDGE_MRP_IN_STATE_MAX + 1];
+ struct br_mrp_in_state state;
+ int err;
+
+ err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_IN_STATE_MAX, attr,
+ br_mrp_in_state_policy, extack);
+ if (err)
+ return err;
+
+ if (!tb[IFLA_BRIDGE_MRP_IN_STATE_IN_ID] ||
+ !tb[IFLA_BRIDGE_MRP_IN_STATE_STATE]) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Missing attribute: IN_ID or STATE");
+ return -EINVAL;
+ }
+
+ memset(&state, 0x0, sizeof(state));
+
+ state.in_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_STATE_IN_ID]);
+ state.in_state = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_STATE_STATE]);
+
+ return br_mrp_set_in_state(br, &state);
+}
+
+static const struct nla_policy
+br_mrp_in_role_policy[IFLA_BRIDGE_MRP_IN_ROLE_MAX + 1] = {
+ [IFLA_BRIDGE_MRP_IN_ROLE_UNSPEC] = { .type = NLA_REJECT },
+ [IFLA_BRIDGE_MRP_IN_ROLE_RING_ID] = { .type = NLA_U32 },
+ [IFLA_BRIDGE_MRP_IN_ROLE_IN_ID] = { .type = NLA_U16 },
+ [IFLA_BRIDGE_MRP_IN_ROLE_ROLE] = { .type = NLA_U32 },
+ [IFLA_BRIDGE_MRP_IN_ROLE_I_IFINDEX] = { .type = NLA_U32 },
+};
+
+static int br_mrp_in_role_parse(struct net_bridge *br, struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_BRIDGE_MRP_IN_ROLE_MAX + 1];
+ struct br_mrp_in_role role;
+ int err;
+
+ err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_IN_ROLE_MAX, attr,
+ br_mrp_in_role_policy, extack);
+ if (err)
+ return err;
+
+ if (!tb[IFLA_BRIDGE_MRP_IN_ROLE_RING_ID] ||
+ !tb[IFLA_BRIDGE_MRP_IN_ROLE_IN_ID] ||
+ !tb[IFLA_BRIDGE_MRP_IN_ROLE_I_IFINDEX] ||
+ !tb[IFLA_BRIDGE_MRP_IN_ROLE_ROLE]) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Missing attribute: RING_ID or ROLE or IN_ID or I_IFINDEX");
+ return -EINVAL;
+ }
+
+ memset(&role, 0x0, sizeof(role));
+
+ role.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_ROLE_RING_ID]);
+ role.in_id = nla_get_u16(tb[IFLA_BRIDGE_MRP_IN_ROLE_IN_ID]);
+ role.i_ifindex = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_ROLE_I_IFINDEX]);
+ role.in_role = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_ROLE_ROLE]);
+
+ return br_mrp_set_in_role(br, &role);
+}
+
+static const struct nla_policy
+br_mrp_start_in_test_policy[IFLA_BRIDGE_MRP_START_IN_TEST_MAX + 1] = {
+ [IFLA_BRIDGE_MRP_START_IN_TEST_UNSPEC] = { .type = NLA_REJECT },
+ [IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID] = { .type = NLA_U32 },
+ [IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL] = { .type = NLA_U32 },
+ [IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS] = { .type = NLA_U32 },
+ [IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD] = { .type = NLA_U32 },
+};
+
+static int br_mrp_start_in_test_parse(struct net_bridge *br,
+ struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_BRIDGE_MRP_START_IN_TEST_MAX + 1];
+ struct br_mrp_start_in_test test;
+ int err;
+
+ err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_START_IN_TEST_MAX, attr,
+ br_mrp_start_in_test_policy, extack);
+ if (err)
+ return err;
+
+ if (!tb[IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID] ||
+ !tb[IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL] ||
+ !tb[IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS] ||
+ !tb[IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD]) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Missing attribute: RING_ID or INTERVAL or MAX_MISS or PERIOD");
+ return -EINVAL;
+ }
+
+ memset(&test, 0x0, sizeof(test));
+
+ test.in_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID]);
+ test.interval = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL]);
+ test.max_miss = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS]);
+ test.period = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD]);
+
+ return br_mrp_start_in_test(br, &test);
+}
+
int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p,
struct nlattr *attr, int cmd, struct netlink_ext_ack *extack)
{
@@ -301,10 +419,114 @@ int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p,
return err;
}
+ if (tb[IFLA_BRIDGE_MRP_IN_STATE]) {
+ err = br_mrp_in_state_parse(br, tb[IFLA_BRIDGE_MRP_IN_STATE],
+ extack);
+ if (err)
+ return err;
+ }
+
+ if (tb[IFLA_BRIDGE_MRP_IN_ROLE]) {
+ err = br_mrp_in_role_parse(br, tb[IFLA_BRIDGE_MRP_IN_ROLE],
+ extack);
+ if (err)
+ return err;
+ }
+
+ if (tb[IFLA_BRIDGE_MRP_START_IN_TEST]) {
+ err = br_mrp_start_in_test_parse(br,
+ tb[IFLA_BRIDGE_MRP_START_IN_TEST],
+ extack);
+ if (err)
+ return err;
+ }
+
return 0;
}
-int br_mrp_port_open(struct net_device *dev, u8 loc)
+int br_mrp_fill_info(struct sk_buff *skb, struct net_bridge *br)
+{
+ struct nlattr *tb, *mrp_tb;
+ struct br_mrp *mrp;
+
+ mrp_tb = nla_nest_start_noflag(skb, IFLA_BRIDGE_MRP);
+ if (!mrp_tb)
+ return -EMSGSIZE;
+
+ list_for_each_entry_rcu(mrp, &br->mrp_list, list) {
+ struct net_bridge_port *p;
+
+ tb = nla_nest_start_noflag(skb, IFLA_BRIDGE_MRP_INFO);
+ if (!tb)
+ goto nla_info_failure;
+
+ if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_RING_ID,
+ mrp->ring_id))
+ goto nla_put_failure;
+
+ p = rcu_dereference(mrp->p_port);
+ if (p && nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_P_IFINDEX,
+ p->dev->ifindex))
+ goto nla_put_failure;
+
+ p = rcu_dereference(mrp->s_port);
+ if (p && nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_S_IFINDEX,
+ p->dev->ifindex))
+ goto nla_put_failure;
+
+ p = rcu_dereference(mrp->i_port);
+ if (p && nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_I_IFINDEX,
+ p->dev->ifindex))
+ goto nla_put_failure;
+
+ if (nla_put_u16(skb, IFLA_BRIDGE_MRP_INFO_PRIO,
+ mrp->prio))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_RING_STATE,
+ mrp->ring_state))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_RING_ROLE,
+ mrp->ring_role))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_TEST_INTERVAL,
+ mrp->test_interval))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_TEST_MAX_MISS,
+ mrp->test_max_miss))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_TEST_MONITOR,
+ mrp->test_monitor))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_IN_STATE,
+ mrp->in_state))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_IN_ROLE,
+ mrp->in_role))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_IN_TEST_INTERVAL,
+ mrp->in_test_interval))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_IN_TEST_MAX_MISS,
+ mrp->in_test_max_miss))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, tb);
+ }
+ nla_nest_end(skb, mrp_tb);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, tb);
+
+nla_info_failure:
+ nla_nest_cancel(skb, mrp_tb);
+
+ return -EMSGSIZE;
+}
+
+int br_mrp_ring_port_open(struct net_device *dev, u8 loc)
{
struct net_bridge_port *p;
int err = 0;
@@ -325,3 +547,25 @@ int br_mrp_port_open(struct net_device *dev, u8 loc)
out:
return err;
}
+
+int br_mrp_in_port_open(struct net_device *dev, u8 loc)
+{
+ struct net_bridge_port *p;
+ int err = 0;
+
+ p = br_port_get_rcu(dev);
+ if (!p) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (loc)
+ p->flags |= BR_MRP_LOST_IN_CONT;
+ else
+ p->flags &= ~BR_MRP_LOST_IN_CONT;
+
+ br_ifinfo_notify(RTM_NEWLINK, NULL, p);
+
+out:
+ return err;
+}
diff --git a/net/bridge/br_mrp_switchdev.c b/net/bridge/br_mrp_switchdev.c
index 0da68a0da4b5..ed547e03ace1 100644
--- a/net/bridge/br_mrp_switchdev.c
+++ b/net/bridge/br_mrp_switchdev.c
@@ -107,6 +107,68 @@ int br_mrp_switchdev_set_ring_state(struct net_bridge *br,
return 0;
}
+int br_mrp_switchdev_set_in_role(struct net_bridge *br, struct br_mrp *mrp,
+ u16 in_id, u32 ring_id,
+ enum br_mrp_in_role_type role)
+{
+ struct switchdev_obj_in_role_mrp mrp_role = {
+ .obj.orig_dev = br->dev,
+ .obj.id = SWITCHDEV_OBJ_ID_IN_ROLE_MRP,
+ .in_role = role,
+ .in_id = mrp->in_id,
+ .ring_id = mrp->ring_id,
+ .i_port = rtnl_dereference(mrp->i_port)->dev,
+ };
+ int err;
+
+ if (role == BR_MRP_IN_ROLE_DISABLED)
+ err = switchdev_port_obj_del(br->dev, &mrp_role.obj);
+ else
+ err = switchdev_port_obj_add(br->dev, &mrp_role.obj, NULL);
+
+ return err;
+}
+
+int br_mrp_switchdev_set_in_state(struct net_bridge *br, struct br_mrp *mrp,
+ enum br_mrp_in_state_type state)
+{
+ struct switchdev_obj_in_state_mrp mrp_state = {
+ .obj.orig_dev = br->dev,
+ .obj.id = SWITCHDEV_OBJ_ID_IN_STATE_MRP,
+ .in_state = state,
+ .in_id = mrp->in_id,
+ };
+ int err;
+
+ err = switchdev_port_obj_add(br->dev, &mrp_state.obj, NULL);
+
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ return 0;
+}
+
+int br_mrp_switchdev_send_in_test(struct net_bridge *br, struct br_mrp *mrp,
+ u32 interval, u8 max_miss, u32 period)
+{
+ struct switchdev_obj_in_test_mrp test = {
+ .obj.orig_dev = br->dev,
+ .obj.id = SWITCHDEV_OBJ_ID_IN_TEST_MRP,
+ .interval = interval,
+ .max_miss = max_miss,
+ .in_id = mrp->in_id,
+ .period = period,
+ };
+ int err;
+
+ if (interval == 0)
+ err = switchdev_port_obj_del(br->dev, &test.obj);
+ else
+ err = switchdev_port_obj_add(br->dev, &test.obj, NULL);
+
+ return err;
+}
+
int br_mrp_port_switchdev_set_state(struct net_bridge_port *p,
enum br_mrp_port_state_type state)
{
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 240e260e3461..147d52596e17 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -152,6 +152,7 @@ static inline size_t br_port_info_size(void)
#endif
+ nla_total_size(sizeof(u16)) /* IFLA_BRPORT_GROUP_FWD_MASK */
+ nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MRP_RING_OPEN */
+ + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MRP_IN_OPEN */
+ 0;
}
@@ -216,6 +217,8 @@ static int br_port_fill_attrs(struct sk_buff *skb,
!!(p->flags & BR_NEIGH_SUPPRESS)) ||
nla_put_u8(skb, IFLA_BRPORT_MRP_RING_OPEN, !!(p->flags &
BR_MRP_LOST_CONT)) ||
+ nla_put_u8(skb, IFLA_BRPORT_MRP_IN_OPEN,
+ !!(p->flags & BR_MRP_LOST_IN_CONT)) ||
nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED)))
return -EMSGSIZE;
@@ -453,6 +456,28 @@ static int br_fill_ifinfo(struct sk_buff *skb,
rcu_read_unlock();
if (err)
goto nla_put_failure;
+
+ nla_nest_end(skb, af);
+ }
+
+ if (filter_mask & RTEXT_FILTER_MRP) {
+ struct nlattr *af;
+ int err;
+
+ if (!br_mrp_enabled(br) || port)
+ goto done;
+
+ af = nla_nest_start_noflag(skb, IFLA_AF_SPEC);
+ if (!af)
+ goto nla_put_failure;
+
+ rcu_read_lock();
+ err = br_mrp_fill_info(skb, br);
+ rcu_read_unlock();
+
+ if (err)
+ goto nla_put_failure;
+
nla_nest_end(skb, af);
}
@@ -516,7 +541,8 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
struct net_bridge_port *port = br_port_get_rtnl(dev);
if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN) &&
- !(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED))
+ !(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) &&
+ !(filter_mask & RTEXT_FILTER_MRP))
return 0;
return br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, nlflags,
diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c
index 162998e2f039..8914290c75d4 100644
--- a/net/bridge/br_netlink_tunnel.c
+++ b/net/bridge/br_netlink_tunnel.c
@@ -250,6 +250,36 @@ int br_parse_vlan_tunnel_info(struct nlattr *attr,
return 0;
}
+/* send a notification if v_curr can't enter the range and start a new one */
+static void __vlan_tunnel_handle_range(const struct net_bridge_port *p,
+ struct net_bridge_vlan **v_start,
+ struct net_bridge_vlan **v_end,
+ int v_curr, bool curr_change)
+{
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *v;
+
+ vg = nbp_vlan_group(p);
+ if (!vg)
+ return;
+
+ v = br_vlan_find(vg, v_curr);
+
+ if (!*v_start)
+ goto out_init;
+
+ if (v && curr_change && br_vlan_can_enter_range(v, *v_end)) {
+ *v_end = v;
+ return;
+ }
+
+ br_vlan_notify(p->br, p, (*v_start)->vid, (*v_end)->vid, RTM_NEWVLAN);
+out_init:
+ /* we start a range only if there are any changes to notify about */
+ *v_start = curr_change ? v : NULL;
+ *v_end = *v_start;
+}
+
int br_process_vlan_tunnel_info(const struct net_bridge *br,
const struct net_bridge_port *p, int cmd,
struct vtunnel_info *tinfo_curr,
@@ -263,6 +293,7 @@ int br_process_vlan_tunnel_info(const struct net_bridge *br,
return -EINVAL;
memcpy(tinfo_last, tinfo_curr, sizeof(struct vtunnel_info));
} else if (tinfo_curr->flags & BRIDGE_VLAN_INFO_RANGE_END) {
+ struct net_bridge_vlan *v_start = NULL, *v_end = NULL;
int t, v;
if (!(tinfo_last->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN))
@@ -272,11 +303,24 @@ int br_process_vlan_tunnel_info(const struct net_bridge *br,
return -EINVAL;
t = tinfo_last->tunid;
for (v = tinfo_last->vid; v <= tinfo_curr->vid; v++) {
- err = br_vlan_tunnel_info(p, cmd, v, t, changed);
+ bool curr_change = false;
+
+ err = br_vlan_tunnel_info(p, cmd, v, t, &curr_change);
if (err)
- return err;
+ break;
t++;
+
+ if (curr_change)
+ *changed = curr_change;
+ __vlan_tunnel_handle_range(p, &v_start, &v_end, v,
+ curr_change);
}
+ if (v_start && v_end)
+ br_vlan_notify(br, p, v_start->vid, v_end->vid,
+ RTM_NEWVLAN);
+ if (err)
+ return err;
+
memset(tinfo_last, 0, sizeof(struct vtunnel_info));
memset(tinfo_curr, 0, sizeof(struct vtunnel_info));
} else {
@@ -286,6 +330,7 @@ int br_process_vlan_tunnel_info(const struct net_bridge *br,
tinfo_curr->tunid, changed);
if (err)
return err;
+ br_vlan_notify(br, p, tinfo_curr->vid, 0, RTM_NEWVLAN);
memset(tinfo_last, 0, sizeof(struct vtunnel_info));
memset(tinfo_curr, 0, sizeof(struct vtunnel_info));
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index e0ea6dbbc97e..baa1500f384f 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -48,6 +48,8 @@ enum {
/* Path to usermode spanning tree program */
#define BR_STP_PROG "/sbin/bridge-stp"
+#define BR_FDB_NOTIFY_SETTABLE_BITS (FDB_NOTIFY_BIT | FDB_NOTIFY_INACTIVE_BIT)
+
typedef struct bridge_id bridge_id;
typedef struct mac_addr mac_addr;
typedef __u16 port_id;
@@ -184,6 +186,8 @@ enum {
BR_FDB_ADDED_BY_USER,
BR_FDB_ADDED_BY_EXT_LEARN,
BR_FDB_OFFLOADED,
+ BR_FDB_NOTIFY,
+ BR_FDB_NOTIFY_INACTIVE
};
struct net_bridge_fdb_key {
@@ -1196,6 +1200,12 @@ static inline void br_vlan_notify(const struct net_bridge *br,
int cmd)
{
}
+
+static inline bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
+ const struct net_bridge_vlan *range_end)
+{
+ return true;
+}
#endif
/* br_vlan_options.c */
@@ -1313,6 +1323,7 @@ int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p,
int br_mrp_process(struct net_bridge_port *p, struct sk_buff *skb);
bool br_mrp_enabled(struct net_bridge *br);
void br_mrp_port_del(struct net_bridge *br, struct net_bridge_port *p);
+int br_mrp_fill_info(struct sk_buff *skb, struct net_bridge *br);
#else
static inline int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p,
struct nlattr *attr, int cmd,
@@ -1335,6 +1346,12 @@ static inline void br_mrp_port_del(struct net_bridge *br,
struct net_bridge_port *p)
{
}
+
+static inline int br_mrp_fill_info(struct sk_buff *skb, struct net_bridge *br)
+{
+ return 0;
+}
+
#endif
/* br_netlink.c */
diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h
index 315eb37d89f0..af0e9eff6549 100644
--- a/net/bridge/br_private_mrp.h
+++ b/net/bridge/br_private_mrp.h
@@ -12,8 +12,10 @@ struct br_mrp {
struct net_bridge_port __rcu *p_port;
struct net_bridge_port __rcu *s_port;
+ struct net_bridge_port __rcu *i_port;
u32 ring_id;
+ u16 in_id;
u16 prio;
enum br_mrp_ring_role_type ring_role;
@@ -21,6 +23,11 @@ struct br_mrp {
enum br_mrp_ring_state_type ring_state;
u32 ring_transitions;
+ enum br_mrp_in_role_type in_role;
+ u8 in_role_offloaded;
+ enum br_mrp_in_state_type in_state;
+ u32 in_transitions;
+
struct delayed_work test_work;
u32 test_interval;
unsigned long test_end;
@@ -28,6 +35,12 @@ struct br_mrp {
u32 test_max_miss;
bool test_monitor;
+ struct delayed_work in_test_work;
+ u32 in_test_interval;
+ unsigned long in_test_end;
+ u32 in_test_count_miss;
+ u32 in_test_max_miss;
+
u32 seq_id;
struct rcu_head rcu;
@@ -44,6 +57,10 @@ int br_mrp_set_ring_state(struct net_bridge *br,
struct br_mrp_ring_state *state);
int br_mrp_set_ring_role(struct net_bridge *br, struct br_mrp_ring_role *role);
int br_mrp_start_test(struct net_bridge *br, struct br_mrp_start_test *test);
+int br_mrp_set_in_state(struct net_bridge *br, struct br_mrp_in_state *state);
+int br_mrp_set_in_role(struct net_bridge *br, struct br_mrp_in_role *role);
+int br_mrp_start_in_test(struct net_bridge *br,
+ struct br_mrp_start_in_test *test);
/* br_mrp_switchdev.c */
int br_mrp_switchdev_add(struct net_bridge *br, struct br_mrp *mrp);
@@ -59,8 +76,16 @@ int br_mrp_port_switchdev_set_state(struct net_bridge_port *p,
enum br_mrp_port_state_type state);
int br_mrp_port_switchdev_set_role(struct net_bridge_port *p,
enum br_mrp_port_role_type role);
+int br_mrp_switchdev_set_in_role(struct net_bridge *br, struct br_mrp *mrp,
+ u16 in_id, u32 ring_id,
+ enum br_mrp_in_role_type role);
+int br_mrp_switchdev_set_in_state(struct net_bridge *br, struct br_mrp *mrp,
+ enum br_mrp_in_state_type state);
+int br_mrp_switchdev_send_in_test(struct net_bridge *br, struct br_mrp *mrp,
+ u32 interval, u8 max_miss, u32 period);
/* br_mrp_netlink.c */
-int br_mrp_port_open(struct net_device *dev, u8 loc);
+int br_mrp_ring_port_open(struct net_device *dev, u8 loc);
+int br_mrp_in_port_open(struct net_device *dev, u8 loc);
#endif /* _BR_PRIVATE_MRP_H */
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index c83ffe912163..fe66932f5abb 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1063,14 +1063,13 @@ free_counterstmp:
}
/* replace the table */
-static int do_replace(struct net *net, const void __user *user,
- unsigned int len)
+static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
{
int ret, countersize;
struct ebt_table_info *newinfo;
struct ebt_replace tmp;
- if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
if (len != sizeof(tmp) + tmp.entries_size)
@@ -1242,9 +1241,8 @@ void ebt_unregister_table(struct net *net, struct ebt_table *table,
/* userspace just supplied us with counters */
static int do_update_counters(struct net *net, const char *name,
- struct ebt_counter __user *counters,
- unsigned int num_counters,
- const void __user *user, unsigned int len)
+ struct ebt_counter __user *counters,
+ unsigned int num_counters, unsigned int len)
{
int i, ret;
struct ebt_counter *tmp;
@@ -1287,19 +1285,18 @@ free_tmp:
return ret;
}
-static int update_counters(struct net *net, const void __user *user,
- unsigned int len)
+static int update_counters(struct net *net, sockptr_t arg, unsigned int len)
{
struct ebt_replace hlp;
- if (copy_from_user(&hlp, user, sizeof(hlp)))
+ if (copy_from_sockptr(&hlp, arg, sizeof(hlp)))
return -EFAULT;
if (len != sizeof(hlp) + hlp.num_counters * sizeof(struct ebt_counter))
return -EINVAL;
return do_update_counters(net, hlp.name, hlp.counters,
- hlp.num_counters, user, len);
+ hlp.num_counters, len);
}
static inline int ebt_obj_to_user(char __user *um, const char *_name,
@@ -1451,86 +1448,6 @@ static int copy_everything_to_user(struct ebt_table *t, void __user *user,
ebt_entry_to_user, entries, tmp.entries);
}
-static int do_ebt_set_ctl(struct sock *sk,
- int cmd, void __user *user, unsigned int len)
-{
- int ret;
- struct net *net = sock_net(sk);
-
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
- switch (cmd) {
- case EBT_SO_SET_ENTRIES:
- ret = do_replace(net, user, len);
- break;
- case EBT_SO_SET_COUNTERS:
- ret = update_counters(net, user, len);
- break;
- default:
- ret = -EINVAL;
- }
- return ret;
-}
-
-static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
-{
- int ret;
- struct ebt_replace tmp;
- struct ebt_table *t;
- struct net *net = sock_net(sk);
-
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
- if (copy_from_user(&tmp, user, sizeof(tmp)))
- return -EFAULT;
-
- tmp.name[sizeof(tmp.name) - 1] = '\0';
-
- t = find_table_lock(net, tmp.name, &ret, &ebt_mutex);
- if (!t)
- return ret;
-
- switch (cmd) {
- case EBT_SO_GET_INFO:
- case EBT_SO_GET_INIT_INFO:
- if (*len != sizeof(struct ebt_replace)) {
- ret = -EINVAL;
- mutex_unlock(&ebt_mutex);
- break;
- }
- if (cmd == EBT_SO_GET_INFO) {
- tmp.nentries = t->private->nentries;
- tmp.entries_size = t->private->entries_size;
- tmp.valid_hooks = t->valid_hooks;
- } else {
- tmp.nentries = t->table->nentries;
- tmp.entries_size = t->table->entries_size;
- tmp.valid_hooks = t->table->valid_hooks;
- }
- mutex_unlock(&ebt_mutex);
- if (copy_to_user(user, &tmp, *len) != 0) {
- ret = -EFAULT;
- break;
- }
- ret = 0;
- break;
-
- case EBT_SO_GET_ENTRIES:
- case EBT_SO_GET_INIT_ENTRIES:
- ret = copy_everything_to_user(t, user, len, cmd);
- mutex_unlock(&ebt_mutex);
- break;
-
- default:
- mutex_unlock(&ebt_mutex);
- ret = -EINVAL;
- }
-
- return ret;
-}
-
#ifdef CONFIG_COMPAT
/* 32 bit-userspace compatibility definitions. */
struct compat_ebt_replace {
@@ -1935,7 +1852,7 @@ static int compat_mtw_from_user(const struct compat_ebt_entry_mwt *mwt,
size_kern = match_size;
module_put(match->me);
break;
- case EBT_COMPAT_WATCHER: /* fallthrough */
+ case EBT_COMPAT_WATCHER:
case EBT_COMPAT_TARGET:
wt = xt_request_find_target(NFPROTO_BRIDGE, name,
mwt->u.revision);
@@ -2160,7 +2077,7 @@ static int compat_copy_entries(unsigned char *data, unsigned int size_user,
static int compat_copy_ebt_replace_from_user(struct ebt_replace *repl,
- void __user *user, unsigned int len)
+ sockptr_t arg, unsigned int len)
{
struct compat_ebt_replace tmp;
int i;
@@ -2168,7 +2085,7 @@ static int compat_copy_ebt_replace_from_user(struct ebt_replace *repl,
if (len < sizeof(tmp))
return -EINVAL;
- if (copy_from_user(&tmp, user, sizeof(tmp)))
+ if (copy_from_sockptr(&tmp, arg, sizeof(tmp)))
return -EFAULT;
if (len != sizeof(tmp) + tmp.entries_size)
@@ -2195,8 +2112,7 @@ static int compat_copy_ebt_replace_from_user(struct ebt_replace *repl,
return 0;
}
-static int compat_do_replace(struct net *net, void __user *user,
- unsigned int len)
+static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
{
int ret, i, countersize, size64;
struct ebt_table_info *newinfo;
@@ -2204,10 +2120,10 @@ static int compat_do_replace(struct net *net, void __user *user,
struct ebt_entries_buf_state state;
void *entries_tmp;
- ret = compat_copy_ebt_replace_from_user(&tmp, user, len);
+ ret = compat_copy_ebt_replace_from_user(&tmp, arg, len);
if (ret) {
/* try real handler in case userland supplied needed padding */
- if (ret == -EINVAL && do_replace(net, user, len) == 0)
+ if (ret == -EINVAL && do_replace(net, arg, len) == 0)
ret = 0;
return ret;
}
@@ -2298,42 +2214,20 @@ out_unlock:
goto free_entries;
}
-static int compat_update_counters(struct net *net, void __user *user,
+static int compat_update_counters(struct net *net, sockptr_t arg,
unsigned int len)
{
struct compat_ebt_replace hlp;
- if (copy_from_user(&hlp, user, sizeof(hlp)))
+ if (copy_from_sockptr(&hlp, arg, sizeof(hlp)))
return -EFAULT;
/* try real handler in case userland supplied needed padding */
if (len != sizeof(hlp) + hlp.num_counters * sizeof(struct ebt_counter))
- return update_counters(net, user, len);
+ return update_counters(net, arg, len);
return do_update_counters(net, hlp.name, compat_ptr(hlp.counters),
- hlp.num_counters, user, len);
-}
-
-static int compat_do_ebt_set_ctl(struct sock *sk,
- int cmd, void __user *user, unsigned int len)
-{
- int ret;
- struct net *net = sock_net(sk);
-
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
- switch (cmd) {
- case EBT_SO_SET_ENTRIES:
- ret = compat_do_replace(net, user, len);
- break;
- case EBT_SO_SET_COUNTERS:
- ret = compat_update_counters(net, user, len);
- break;
- default:
- ret = -EINVAL;
- }
- return ret;
+ hlp.num_counters, len);
}
static int compat_do_ebt_get_ctl(struct sock *sk, int cmd,
@@ -2344,14 +2238,6 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd,
struct ebt_table *t;
struct net *net = sock_net(sk);
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
- /* try real handler in case userland supplied needed padding */
- if ((cmd == EBT_SO_GET_INFO ||
- cmd == EBT_SO_GET_INIT_INFO) && *len != sizeof(tmp))
- return do_ebt_get_ctl(sk, cmd, user, len);
-
if (copy_from_user(&tmp, user, sizeof(tmp)))
return -EFAULT;
@@ -2413,20 +2299,112 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd,
}
#endif
+static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+ struct net *net = sock_net(sk);
+ struct ebt_replace tmp;
+ struct ebt_table *t;
+ int ret;
+
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+#ifdef CONFIG_COMPAT
+ /* try real handler in case userland supplied needed padding */
+ if (in_compat_syscall() &&
+ ((cmd != EBT_SO_GET_INFO && cmd != EBT_SO_GET_INIT_INFO) ||
+ *len != sizeof(tmp)))
+ return compat_do_ebt_get_ctl(sk, cmd, user, len);
+#endif
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)))
+ return -EFAULT;
+
+ tmp.name[sizeof(tmp.name) - 1] = '\0';
+
+ t = find_table_lock(net, tmp.name, &ret, &ebt_mutex);
+ if (!t)
+ return ret;
+
+ switch (cmd) {
+ case EBT_SO_GET_INFO:
+ case EBT_SO_GET_INIT_INFO:
+ if (*len != sizeof(struct ebt_replace)) {
+ ret = -EINVAL;
+ mutex_unlock(&ebt_mutex);
+ break;
+ }
+ if (cmd == EBT_SO_GET_INFO) {
+ tmp.nentries = t->private->nentries;
+ tmp.entries_size = t->private->entries_size;
+ tmp.valid_hooks = t->valid_hooks;
+ } else {
+ tmp.nentries = t->table->nentries;
+ tmp.entries_size = t->table->entries_size;
+ tmp.valid_hooks = t->table->valid_hooks;
+ }
+ mutex_unlock(&ebt_mutex);
+ if (copy_to_user(user, &tmp, *len) != 0) {
+ ret = -EFAULT;
+ break;
+ }
+ ret = 0;
+ break;
+
+ case EBT_SO_GET_ENTRIES:
+ case EBT_SO_GET_INIT_ENTRIES:
+ ret = copy_everything_to_user(t, user, len, cmd);
+ mutex_unlock(&ebt_mutex);
+ break;
+
+ default:
+ mutex_unlock(&ebt_mutex);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int do_ebt_set_ctl(struct sock *sk, int cmd, sockptr_t arg,
+ unsigned int len)
+{
+ struct net *net = sock_net(sk);
+ int ret;
+
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case EBT_SO_SET_ENTRIES:
+#ifdef CONFIG_COMPAT
+ if (in_compat_syscall())
+ ret = compat_do_replace(net, arg, len);
+ else
+#endif
+ ret = do_replace(net, arg, len);
+ break;
+ case EBT_SO_SET_COUNTERS:
+#ifdef CONFIG_COMPAT
+ if (in_compat_syscall())
+ ret = compat_update_counters(net, arg, len);
+ else
+#endif
+ ret = update_counters(net, arg, len);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ return ret;
+}
+
static struct nf_sockopt_ops ebt_sockopts = {
.pf = PF_INET,
.set_optmin = EBT_BASE_CTL,
.set_optmax = EBT_SO_SET_MAX + 1,
.set = do_ebt_set_ctl,
-#ifdef CONFIG_COMPAT
- .compat_set = compat_do_ebt_set_ctl,
-#endif
.get_optmin = EBT_BASE_CTL,
.get_optmax = EBT_SO_GET_MAX + 1,
.get = do_ebt_get_ctl,
-#ifdef CONFIG_COMPAT
- .compat_get = compat_do_ebt_get_ctl,
-#endif
.owner = THIS_MODULE,
};
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index ef14da50a981..3ad0a1df6712 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -669,8 +669,8 @@ out_err:
return sent ? : err;
}
-static int setsockopt(struct socket *sock,
- int lvl, int opt, char __user *ov, unsigned int ol)
+static int setsockopt(struct socket *sock, int lvl, int opt, sockptr_t ov,
+ unsigned int ol)
{
struct sock *sk = sock->sk;
struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
@@ -685,7 +685,7 @@ static int setsockopt(struct socket *sock,
return -EINVAL;
if (lvl != SOL_CAIF)
goto bad_sol;
- if (copy_from_user(&linksel, ov, sizeof(int)))
+ if (copy_from_sockptr(&linksel, ov, sizeof(int)))
return -EINVAL;
lock_sock(&(cf_sk->sk));
cf_sk->conn_req.link_selector = linksel;
@@ -699,7 +699,7 @@ static int setsockopt(struct socket *sock,
return -ENOPROTOOPT;
lock_sock(&(cf_sk->sk));
if (ol > sizeof(cf_sk->conn_req.param.data) ||
- copy_from_user(&cf_sk->conn_req.param.data, ov, ol)) {
+ copy_from_sockptr(&cf_sk->conn_req.param.data, ov, ol)) {
release_sock(&cf_sk->sk);
return -EINVAL;
}
@@ -981,7 +981,6 @@ static const struct proto_ops caif_seqpacket_ops = {
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = setsockopt,
- .getsockopt = sock_no_getsockopt,
.sendmsg = caif_seqpkt_sendmsg,
.recvmsg = caif_seqpkt_recvmsg,
.mmap = sock_no_mmap,
@@ -1002,7 +1001,6 @@ static const struct proto_ops caif_stream_ops = {
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = setsockopt,
- .getsockopt = sock_no_getsockopt,
.sendmsg = caif_stream_sendmsg,
.recvmsg = caif_stream_recvmsg,
.mmap = sock_no_mmap,
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 128d37a4c2e0..5c06404bdf3e 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -410,6 +410,7 @@ static struct hlist_head *can_rcv_list_find(canid_t *can_id, canid_t *mask,
/**
* can_rx_register - subscribe CAN frames from a specific interface
+ * @net: the applicable net namespace
* @dev: pointer to netdevice (NULL => subcribe from 'all' CAN devices list)
* @can_id: CAN identifier (see description)
* @mask: CAN mask (see description)
@@ -498,6 +499,7 @@ static void can_rx_delete_receiver(struct rcu_head *rp)
/**
* can_rx_unregister - unsubscribe CAN frames from a specific interface
+ * @net: the applicable net namespace
* @dev: pointer to netdevice (NULL => unsubscribe from 'all' CAN devices list)
* @can_id: CAN identifier
* @mask: CAN mask
diff --git a/net/can/bcm.c b/net/can/bcm.c
index c96fa0f33db3..d14ea12affb1 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1648,8 +1648,6 @@ static const struct proto_ops bcm_ops = {
.gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
- .setsockopt = sock_no_setsockopt,
- .getsockopt = sock_no_getsockopt,
.sendmsg = bcm_sendmsg,
.recvmsg = bcm_recvmsg,
.mmap = sock_no_mmap,
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index f7587428febd..78ff9b3f1d40 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -627,14 +627,14 @@ static int j1939_sk_release(struct socket *sock)
return 0;
}
-static int j1939_sk_setsockopt_flag(struct j1939_sock *jsk, char __user *optval,
+static int j1939_sk_setsockopt_flag(struct j1939_sock *jsk, sockptr_t optval,
unsigned int optlen, int flag)
{
int tmp;
if (optlen != sizeof(tmp))
return -EINVAL;
- if (copy_from_user(&tmp, optval, optlen))
+ if (copy_from_sockptr(&tmp, optval, optlen))
return -EFAULT;
lock_sock(&jsk->sk);
if (tmp)
@@ -646,7 +646,7 @@ static int j1939_sk_setsockopt_flag(struct j1939_sock *jsk, char __user *optval,
}
static int j1939_sk_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct j1939_sock *jsk = j1939_sk(sk);
@@ -658,7 +658,7 @@ static int j1939_sk_setsockopt(struct socket *sock, int level, int optname,
switch (optname) {
case SO_J1939_FILTER:
- if (optval) {
+ if (!sockptr_is_null(optval)) {
struct j1939_filter *f;
int c;
@@ -670,7 +670,7 @@ static int j1939_sk_setsockopt(struct socket *sock, int level, int optname,
return -EINVAL;
count = optlen / sizeof(*filters);
- filters = memdup_user(optval, optlen);
+ filters = memdup_sockptr(optval, optlen);
if (IS_ERR(filters))
return PTR_ERR(filters);
@@ -703,7 +703,7 @@ static int j1939_sk_setsockopt(struct socket *sock, int level, int optname,
case SO_J1939_SEND_PRIO:
if (optlen != sizeof(tmp))
return -EINVAL;
- if (copy_from_user(&tmp, optval, optlen))
+ if (copy_from_sockptr(&tmp, optval, optlen))
return -EFAULT;
if (tmp < 0 || tmp > 7)
return -EDOM;
diff --git a/net/can/raw.c b/net/can/raw.c
index 59c039d73c6d..94a9405658dc 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -485,7 +485,7 @@ static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
}
static int raw_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct raw_sock *ro = raw_sk(sk);
@@ -511,11 +511,11 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
if (count > 1) {
/* filter does not fit into dfilter => alloc space */
- filter = memdup_user(optval, optlen);
+ filter = memdup_sockptr(optval, optlen);
if (IS_ERR(filter))
return PTR_ERR(filter);
} else if (count == 1) {
- if (copy_from_user(&sfilter, optval, sizeof(sfilter)))
+ if (copy_from_sockptr(&sfilter, optval, sizeof(sfilter)))
return -EFAULT;
}
@@ -568,7 +568,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
if (optlen != sizeof(err_mask))
return -EINVAL;
- if (copy_from_user(&err_mask, optval, optlen))
+ if (copy_from_sockptr(&err_mask, optval, optlen))
return -EFAULT;
err_mask &= CAN_ERR_MASK;
@@ -607,7 +607,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
if (optlen != sizeof(ro->loopback))
return -EINVAL;
- if (copy_from_user(&ro->loopback, optval, optlen))
+ if (copy_from_sockptr(&ro->loopback, optval, optlen))
return -EFAULT;
break;
@@ -616,7 +616,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
if (optlen != sizeof(ro->recv_own_msgs))
return -EINVAL;
- if (copy_from_user(&ro->recv_own_msgs, optval, optlen))
+ if (copy_from_sockptr(&ro->recv_own_msgs, optval, optlen))
return -EFAULT;
break;
@@ -625,7 +625,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
if (optlen != sizeof(ro->fd_frames))
return -EINVAL;
- if (copy_from_user(&ro->fd_frames, optval, optlen))
+ if (copy_from_sockptr(&ro->fd_frames, optval, optlen))
return -EFAULT;
break;
@@ -634,7 +634,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
if (optlen != sizeof(ro->join_filters))
return -EINVAL;
- if (copy_from_user(&ro->join_filters, optval, optlen))
+ if (copy_from_sockptr(&ro->join_filters, optval, optlen))
return -EFAULT;
break;
diff --git a/net/compat.c b/net/compat.c
index 434838bef5f8..93ea07e93ae5 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -335,120 +335,6 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm)
__scm_destroy(scm);
}
-/* allocate a 64-bit sock_fprog on the user stack for duration of syscall. */
-struct sock_fprog __user *get_compat_bpf_fprog(char __user *optval)
-{
- struct compat_sock_fprog __user *fprog32 = (struct compat_sock_fprog __user *)optval;
- struct sock_fprog __user *kfprog = compat_alloc_user_space(sizeof(struct sock_fprog));
- struct compat_sock_fprog f32;
- struct sock_fprog f;
-
- if (copy_from_user(&f32, fprog32, sizeof(*fprog32)))
- return NULL;
- memset(&f, 0, sizeof(f));
- f.len = f32.len;
- f.filter = compat_ptr(f32.filter);
- if (copy_to_user(kfprog, &f, sizeof(struct sock_fprog)))
- return NULL;
-
- return kfprog;
-}
-EXPORT_SYMBOL_GPL(get_compat_bpf_fprog);
-
-static int do_set_attach_filter(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
-{
- struct sock_fprog __user *kfprog;
-
- kfprog = get_compat_bpf_fprog(optval);
- if (!kfprog)
- return -EFAULT;
-
- return sock_setsockopt(sock, level, optname, (char __user *)kfprog,
- sizeof(struct sock_fprog));
-}
-
-static int compat_sock_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
-{
- if (optname == SO_ATTACH_FILTER ||
- optname == SO_ATTACH_REUSEPORT_CBPF)
- return do_set_attach_filter(sock, level, optname,
- optval, optlen);
- return sock_setsockopt(sock, level, optname, optval, optlen);
-}
-
-static int __compat_sys_setsockopt(int fd, int level, int optname,
- char __user *optval, unsigned int optlen)
-{
- int err;
- struct socket *sock;
-
- if (optlen > INT_MAX)
- return -EINVAL;
-
- sock = sockfd_lookup(fd, &err);
- if (sock) {
- err = security_socket_setsockopt(sock, level, optname);
- if (err) {
- sockfd_put(sock);
- return err;
- }
-
- if (level == SOL_SOCKET)
- err = compat_sock_setsockopt(sock, level,
- optname, optval, optlen);
- else if (sock->ops->compat_setsockopt)
- err = sock->ops->compat_setsockopt(sock, level,
- optname, optval, optlen);
- else
- err = sock->ops->setsockopt(sock, level,
- optname, optval, optlen);
- sockfd_put(sock);
- }
- return err;
-}
-
-COMPAT_SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
- char __user *, optval, unsigned int, optlen)
-{
- return __compat_sys_setsockopt(fd, level, optname, optval, optlen);
-}
-
-static int __compat_sys_getsockopt(int fd, int level, int optname,
- char __user *optval,
- int __user *optlen)
-{
- int err;
- struct socket *sock = sockfd_lookup(fd, &err);
-
- if (sock) {
- err = security_socket_getsockopt(sock, level, optname);
- if (err) {
- sockfd_put(sock);
- return err;
- }
-
- if (level == SOL_SOCKET)
- err = sock_getsockopt(sock, level,
- optname, optval, optlen);
- else if (sock->ops->compat_getsockopt)
- err = sock->ops->compat_getsockopt(sock, level,
- optname, optval, optlen);
- else
- err = sock->ops->getsockopt(sock, level,
- optname, optval, optlen);
- sockfd_put(sock);
- }
- return err;
-}
-
-COMPAT_SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
- char __user *, optval, int __user *, optlen)
-{
- return __compat_sys_getsockopt(fd, level, optname, optval, optlen);
-}
-
/* Argument list sizes for compat_sys_socketcall */
#define AL(x) ((x) * sizeof(u32))
static unsigned char nas[21] = {
@@ -608,13 +494,11 @@ COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args)
ret = __sys_shutdown(a0, a1);
break;
case SYS_SETSOCKOPT:
- ret = __compat_sys_setsockopt(a0, a1, a[2],
- compat_ptr(a[3]), a[4]);
+ ret = __sys_setsockopt(a0, a1, a[2], compat_ptr(a[3]), a[4]);
break;
case SYS_GETSOCKOPT:
- ret = __compat_sys_getsockopt(a0, a1, a[2],
- compat_ptr(a[3]),
- compat_ptr(a[4]));
+ ret = __sys_getsockopt(a0, a1, a[2], compat_ptr(a[3]),
+ compat_ptr(a[4]));
break;
case SYS_SENDMSG:
ret = __compat_sys_sendmsg(a0, compat_ptr(a1), a[2]);
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index d2c4d16dadba..d3377c90a291 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -6,13 +6,12 @@
#include <linux/types.h>
#include <linux/spinlock.h>
#include <linux/bpf.h>
+#include <linux/btf_ids.h>
#include <net/bpf_sk_storage.h>
#include <net/sock.h>
#include <uapi/linux/sock_diag.h>
#include <uapi/linux/btf.h>
-static atomic_t cache_idx;
-
#define SK_STORAGE_CREATE_FLAG_MASK \
(BPF_F_NO_PREALLOC | BPF_F_CLONE)
@@ -81,6 +80,9 @@ struct bpf_sk_storage_elem {
#define SDATA(_SELEM) (&(_SELEM)->sdata)
#define BPF_SK_STORAGE_CACHE_SIZE 16
+static DEFINE_SPINLOCK(cache_idx_lock);
+static u64 cache_idx_usage_counts[BPF_SK_STORAGE_CACHE_SIZE];
+
struct bpf_sk_storage {
struct bpf_sk_storage_data __rcu *cache[BPF_SK_STORAGE_CACHE_SIZE];
struct hlist_head list; /* List of bpf_sk_storage_elem */
@@ -512,6 +514,37 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map)
return 0;
}
+static u16 cache_idx_get(void)
+{
+ u64 min_usage = U64_MAX;
+ u16 i, res = 0;
+
+ spin_lock(&cache_idx_lock);
+
+ for (i = 0; i < BPF_SK_STORAGE_CACHE_SIZE; i++) {
+ if (cache_idx_usage_counts[i] < min_usage) {
+ min_usage = cache_idx_usage_counts[i];
+ res = i;
+
+ /* Found a free cache_idx */
+ if (!min_usage)
+ break;
+ }
+ }
+ cache_idx_usage_counts[res]++;
+
+ spin_unlock(&cache_idx_lock);
+
+ return res;
+}
+
+static void cache_idx_free(u16 idx)
+{
+ spin_lock(&cache_idx_lock);
+ cache_idx_usage_counts[idx]--;
+ spin_unlock(&cache_idx_lock);
+}
+
/* Called by __sk_destruct() & bpf_sk_storage_clone() */
void bpf_sk_storage_free(struct sock *sk)
{
@@ -560,6 +593,8 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
smap = (struct bpf_sk_storage_map *)map;
+ cache_idx_free(smap->cache_idx);
+
/* Note that this map might be concurrently cloned from
* bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
* RCU read section to finish before proceeding. New RCU
@@ -673,8 +708,7 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
}
smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size;
- smap->cache_idx = (unsigned int)atomic_inc_return(&cache_idx) %
- BPF_SK_STORAGE_CACHE_SIZE;
+ smap->cache_idx = cache_idx_get();
return &smap->map;
}
@@ -886,6 +920,7 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
return -ENOENT;
}
+static int sk_storage_map_btf_id;
const struct bpf_map_ops sk_storage_map_ops = {
.map_alloc_check = bpf_sk_storage_map_alloc_check,
.map_alloc = bpf_sk_storage_map_alloc,
@@ -895,6 +930,8 @@ const struct bpf_map_ops sk_storage_map_ops = {
.map_update_elem = bpf_fd_sk_storage_update_elem,
.map_delete_elem = bpf_fd_sk_storage_delete_elem,
.map_check_btf = bpf_sk_storage_map_check_btf,
+ .map_btf_name = "bpf_sk_storage_map",
+ .map_btf_id = &sk_storage_map_btf_id,
};
const struct bpf_func_proto bpf_sk_storage_get_proto = {
@@ -907,6 +944,16 @@ const struct bpf_func_proto bpf_sk_storage_get_proto = {
.arg4_type = ARG_ANYTHING,
};
+const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto = {
+ .func = bpf_sk_storage_get,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_CTX, /* context is 'struct sock' */
+ .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
+ .arg4_type = ARG_ANYTHING,
+};
+
const struct bpf_func_proto bpf_sk_storage_delete_proto = {
.func = bpf_sk_storage_delete,
.gpl_only = false,
@@ -1181,3 +1228,208 @@ int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,
return err;
}
EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put);
+
+struct bpf_iter_seq_sk_storage_map_info {
+ struct bpf_map *map;
+ unsigned int bucket_id;
+ unsigned skip_elems;
+};
+
+static struct bpf_sk_storage_elem *
+bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info,
+ struct bpf_sk_storage_elem *prev_selem)
+{
+ struct bpf_sk_storage *sk_storage;
+ struct bpf_sk_storage_elem *selem;
+ u32 skip_elems = info->skip_elems;
+ struct bpf_sk_storage_map *smap;
+ u32 bucket_id = info->bucket_id;
+ u32 i, count, n_buckets;
+ struct bucket *b;
+
+ smap = (struct bpf_sk_storage_map *)info->map;
+ n_buckets = 1U << smap->bucket_log;
+ if (bucket_id >= n_buckets)
+ return NULL;
+
+ /* try to find next selem in the same bucket */
+ selem = prev_selem;
+ count = 0;
+ while (selem) {
+ selem = hlist_entry_safe(selem->map_node.next,
+ struct bpf_sk_storage_elem, map_node);
+ if (!selem) {
+ /* not found, unlock and go to the next bucket */
+ b = &smap->buckets[bucket_id++];
+ raw_spin_unlock_bh(&b->lock);
+ skip_elems = 0;
+ break;
+ }
+ sk_storage = rcu_dereference_raw(selem->sk_storage);
+ if (sk_storage) {
+ info->skip_elems = skip_elems + count;
+ return selem;
+ }
+ count++;
+ }
+
+ for (i = bucket_id; i < (1U << smap->bucket_log); i++) {
+ b = &smap->buckets[i];
+ raw_spin_lock_bh(&b->lock);
+ count = 0;
+ hlist_for_each_entry(selem, &b->list, map_node) {
+ sk_storage = rcu_dereference_raw(selem->sk_storage);
+ if (sk_storage && count >= skip_elems) {
+ info->bucket_id = i;
+ info->skip_elems = count;
+ return selem;
+ }
+ count++;
+ }
+ raw_spin_unlock_bh(&b->lock);
+ skip_elems = 0;
+ }
+
+ info->bucket_id = i;
+ info->skip_elems = 0;
+ return NULL;
+}
+
+static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct bpf_sk_storage_elem *selem;
+
+ selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL);
+ if (!selem)
+ return NULL;
+
+ if (*pos == 0)
+ ++*pos;
+ return selem;
+}
+
+static void *bpf_sk_storage_map_seq_next(struct seq_file *seq, void *v,
+ loff_t *pos)
+{
+ struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
+
+ ++*pos;
+ ++info->skip_elems;
+ return bpf_sk_storage_map_seq_find_next(seq->private, v);
+}
+
+struct bpf_iter__bpf_sk_storage_map {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct bpf_map *, map);
+ __bpf_md_ptr(struct sock *, sk);
+ __bpf_md_ptr(void *, value);
+};
+
+DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta,
+ struct bpf_map *map, struct sock *sk,
+ void *value)
+
+static int __bpf_sk_storage_map_seq_show(struct seq_file *seq,
+ struct bpf_sk_storage_elem *selem)
+{
+ struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
+ struct bpf_iter__bpf_sk_storage_map ctx = {};
+ struct bpf_sk_storage *sk_storage;
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+ int ret = 0;
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, selem == NULL);
+ if (prog) {
+ ctx.meta = &meta;
+ ctx.map = info->map;
+ if (selem) {
+ sk_storage = rcu_dereference_raw(selem->sk_storage);
+ ctx.sk = sk_storage->sk;
+ ctx.value = SDATA(selem)->data;
+ }
+ ret = bpf_iter_run_prog(prog, &ctx);
+ }
+
+ return ret;
+}
+
+static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v)
+{
+ return __bpf_sk_storage_map_seq_show(seq, v);
+}
+
+static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
+ struct bpf_sk_storage_map *smap;
+ struct bucket *b;
+
+ if (!v) {
+ (void)__bpf_sk_storage_map_seq_show(seq, v);
+ } else {
+ smap = (struct bpf_sk_storage_map *)info->map;
+ b = &smap->buckets[info->bucket_id];
+ raw_spin_unlock_bh(&b->lock);
+ }
+}
+
+static int bpf_iter_init_sk_storage_map(void *priv_data,
+ struct bpf_iter_aux_info *aux)
+{
+ struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data;
+
+ seq_info->map = aux->map;
+ return 0;
+}
+
+static int bpf_iter_check_map(struct bpf_prog *prog,
+ struct bpf_iter_aux_info *aux)
+{
+ struct bpf_map *map = aux->map;
+
+ if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
+ return -EINVAL;
+
+ if (prog->aux->max_rdonly_access > map->value_size)
+ return -EACCES;
+
+ return 0;
+}
+
+static const struct seq_operations bpf_sk_storage_map_seq_ops = {
+ .start = bpf_sk_storage_map_seq_start,
+ .next = bpf_sk_storage_map_seq_next,
+ .stop = bpf_sk_storage_map_seq_stop,
+ .show = bpf_sk_storage_map_seq_show,
+};
+
+static const struct bpf_iter_seq_info iter_seq_info = {
+ .seq_ops = &bpf_sk_storage_map_seq_ops,
+ .init_seq_private = bpf_iter_init_sk_storage_map,
+ .fini_seq_private = NULL,
+ .seq_priv_size = sizeof(struct bpf_iter_seq_sk_storage_map_info),
+};
+
+static struct bpf_iter_reg bpf_sk_storage_map_reg_info = {
+ .target = "bpf_sk_storage_map",
+ .check_target = bpf_iter_check_map,
+ .req_linfo = BPF_ITER_LINK_MAP_FD,
+ .ctx_arg_info_size = 2,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__bpf_sk_storage_map, sk),
+ PTR_TO_BTF_ID_OR_NULL },
+ { offsetof(struct bpf_iter__bpf_sk_storage_map, value),
+ PTR_TO_RDWR_BUF_OR_NULL },
+ },
+ .seq_info = &iter_seq_info,
+};
+
+static int __init bpf_sk_storage_map_iter_init(void)
+{
+ bpf_sk_storage_map_reg_info.ctx_arg_info[0].btf_id =
+ btf_sock_ids[BTF_SOCK_TYPE_SOCK];
+ return bpf_iter_reg_target(&bpf_sk_storage_map_reg_info);
+}
+late_initcall(bpf_sk_storage_map_iter_init);
diff --git a/net/core/dev.c b/net/core/dev.c
index 7a774ebf64e2..7df6c9617321 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -143,6 +143,7 @@
#include <linux/net_namespace.h>
#include <linux/indirect_call_wrapper.h>
#include <net/devlink.h>
+#include <linux/pm_runtime.h>
#include "net-sysfs.h"
@@ -1492,8 +1493,13 @@ static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
ASSERT_RTNL();
- if (!netif_device_present(dev))
- return -ENODEV;
+ if (!netif_device_present(dev)) {
+ /* may be detached because parent is runtime-suspended */
+ if (dev->dev.parent)
+ pm_runtime_resume(dev->dev.parent);
+ if (!netif_device_present(dev))
+ return -ENODEV;
+ }
/* Block netpoll from trying to do any rx path servicing.
* If we don't do this there is a chance ndo_poll_controller
@@ -3448,10 +3454,9 @@ static netdev_features_t net_mpls_features(struct sk_buff *skb,
static netdev_features_t harmonize_features(struct sk_buff *skb,
netdev_features_t features)
{
- int tmp;
__be16 type;
- type = skb_network_protocol(skb, &tmp);
+ type = skb_network_protocol(skb, NULL);
features = net_mpls_features(skb, features, type);
if (skb->ip_summed != CHECKSUM_NONE &&
@@ -5442,6 +5447,8 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
for (i = 0; i < new->aux->used_map_cnt; i++) {
if (dev_map_can_have_prog(new->aux->used_maps[i]))
return -EINVAL;
+ if (cpu_map_prog_allowed(new->aux->used_maps[i]))
+ return -EINVAL;
}
}
@@ -5460,10 +5467,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
}
break;
- case XDP_QUERY_PROG:
- xdp->prog_id = old ? old->aux->id : 0;
- break;
-
default:
ret = -EINVAL;
break;
@@ -5585,7 +5588,7 @@ void netif_receive_skb_list(struct list_head *head)
}
EXPORT_SYMBOL(netif_receive_skb_list);
-DEFINE_PER_CPU(struct work_struct, flush_works);
+static DEFINE_PER_CPU(struct work_struct, flush_works);
/* Network device is going away, flush any packets still pending */
static void flush_backlog(struct work_struct *work)
@@ -6685,7 +6688,9 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
trace_napi_poll(n, work, weight);
}
- WARN_ON_ONCE(work > weight);
+ if (unlikely(work > weight))
+ pr_err_once("NAPI poll function %pS returned %d, exceeding its budget of %d.\n",
+ n->poll, work, weight);
if (likely(work < weight))
goto out_unlock;
@@ -7900,6 +7905,7 @@ EXPORT_SYMBOL(netdev_bonding_info_change);
/**
* netdev_get_xmit_slave - Get the xmit slave of master device
+ * @dev: device
* @skb: The packet
* @all_slaves: assume all the slaves are active
*
@@ -8705,182 +8711,489 @@ int dev_change_proto_down_generic(struct net_device *dev, bool proto_down)
}
EXPORT_SYMBOL(dev_change_proto_down_generic);
-u32 __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
- enum bpf_netdev_command cmd)
+/**
+ * dev_change_proto_down_reason - proto down reason
+ *
+ * @dev: device
+ * @mask: proto down mask
+ * @value: proto down value
+ */
+void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
+ u32 value)
{
- struct netdev_bpf xdp;
+ int b;
- if (!bpf_op)
- return 0;
+ if (!mask) {
+ dev->proto_down_reason = value;
+ } else {
+ for_each_set_bit(b, &mask, 32) {
+ if (value & (1 << b))
+ dev->proto_down_reason |= BIT(b);
+ else
+ dev->proto_down_reason &= ~BIT(b);
+ }
+ }
+}
+EXPORT_SYMBOL(dev_change_proto_down_reason);
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = cmd;
+struct bpf_xdp_link {
+ struct bpf_link link;
+ struct net_device *dev; /* protected by rtnl_lock, no refcnt held */
+ int flags;
+};
- /* Query must always succeed. */
- WARN_ON(bpf_op(dev, &xdp) < 0 && cmd == XDP_QUERY_PROG);
+static enum bpf_xdp_mode dev_xdp_mode(u32 flags)
+{
+ if (flags & XDP_FLAGS_HW_MODE)
+ return XDP_MODE_HW;
+ if (flags & XDP_FLAGS_DRV_MODE)
+ return XDP_MODE_DRV;
+ return XDP_MODE_SKB;
+}
- return xdp.prog_id;
+static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode)
+{
+ switch (mode) {
+ case XDP_MODE_SKB:
+ return generic_xdp_install;
+ case XDP_MODE_DRV:
+ case XDP_MODE_HW:
+ return dev->netdev_ops->ndo_bpf;
+ default:
+ return NULL;
+ };
}
-static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
- struct netlink_ext_ack *extack, u32 flags,
- struct bpf_prog *prog)
+static struct bpf_xdp_link *dev_xdp_link(struct net_device *dev,
+ enum bpf_xdp_mode mode)
+{
+ return dev->xdp_state[mode].link;
+}
+
+static struct bpf_prog *dev_xdp_prog(struct net_device *dev,
+ enum bpf_xdp_mode mode)
+{
+ struct bpf_xdp_link *link = dev_xdp_link(dev, mode);
+
+ if (link)
+ return link->link.prog;
+ return dev->xdp_state[mode].prog;
+}
+
+u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
+{
+ struct bpf_prog *prog = dev_xdp_prog(dev, mode);
+
+ return prog ? prog->aux->id : 0;
+}
+
+static void dev_xdp_set_link(struct net_device *dev, enum bpf_xdp_mode mode,
+ struct bpf_xdp_link *link)
+{
+ dev->xdp_state[mode].link = link;
+ dev->xdp_state[mode].prog = NULL;
+}
+
+static void dev_xdp_set_prog(struct net_device *dev, enum bpf_xdp_mode mode,
+ struct bpf_prog *prog)
+{
+ dev->xdp_state[mode].link = NULL;
+ dev->xdp_state[mode].prog = prog;
+}
+
+static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode,
+ bpf_op_t bpf_op, struct netlink_ext_ack *extack,
+ u32 flags, struct bpf_prog *prog)
{
- bool non_hw = !(flags & XDP_FLAGS_HW_MODE);
- struct bpf_prog *prev_prog = NULL;
struct netdev_bpf xdp;
int err;
- if (non_hw) {
- prev_prog = bpf_prog_by_id(__dev_xdp_query(dev, bpf_op,
- XDP_QUERY_PROG));
- if (IS_ERR(prev_prog))
- prev_prog = NULL;
- }
-
memset(&xdp, 0, sizeof(xdp));
- if (flags & XDP_FLAGS_HW_MODE)
- xdp.command = XDP_SETUP_PROG_HW;
- else
- xdp.command = XDP_SETUP_PROG;
+ xdp.command = mode == XDP_MODE_HW ? XDP_SETUP_PROG_HW : XDP_SETUP_PROG;
xdp.extack = extack;
xdp.flags = flags;
xdp.prog = prog;
+ /* Drivers assume refcnt is already incremented (i.e, prog pointer is
+ * "moved" into driver), so they don't increment it on their own, but
+ * they do decrement refcnt when program is detached or replaced.
+ * Given net_device also owns link/prog, we need to bump refcnt here
+ * to prevent drivers from underflowing it.
+ */
+ if (prog)
+ bpf_prog_inc(prog);
err = bpf_op(dev, &xdp);
- if (!err && non_hw)
- bpf_prog_change_xdp(prev_prog, prog);
+ if (err) {
+ if (prog)
+ bpf_prog_put(prog);
+ return err;
+ }
- if (prev_prog)
- bpf_prog_put(prev_prog);
+ if (mode != XDP_MODE_HW)
+ bpf_prog_change_xdp(dev_xdp_prog(dev, mode), prog);
- return err;
+ return 0;
}
static void dev_xdp_uninstall(struct net_device *dev)
{
- struct netdev_bpf xdp;
- bpf_op_t ndo_bpf;
+ struct bpf_xdp_link *link;
+ struct bpf_prog *prog;
+ enum bpf_xdp_mode mode;
+ bpf_op_t bpf_op;
+
+ ASSERT_RTNL();
- /* Remove generic XDP */
- WARN_ON(dev_xdp_install(dev, generic_xdp_install, NULL, 0, NULL));
+ for (mode = XDP_MODE_SKB; mode < __MAX_XDP_MODE; mode++) {
+ prog = dev_xdp_prog(dev, mode);
+ if (!prog)
+ continue;
- /* Remove from the driver */
- ndo_bpf = dev->netdev_ops->ndo_bpf;
- if (!ndo_bpf)
- return;
+ bpf_op = dev_xdp_bpf_op(dev, mode);
+ if (!bpf_op)
+ continue;
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = XDP_QUERY_PROG;
- WARN_ON(ndo_bpf(dev, &xdp));
- if (xdp.prog_id)
- WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
- NULL));
+ WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL));
- /* Remove HW offload */
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = XDP_QUERY_PROG_HW;
- if (!ndo_bpf(dev, &xdp) && xdp.prog_id)
- WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
- NULL));
+ /* auto-detach link from net device */
+ link = dev_xdp_link(dev, mode);
+ if (link)
+ link->dev = NULL;
+ else
+ bpf_prog_put(prog);
+
+ dev_xdp_set_link(dev, mode, NULL);
+ }
}
-/**
- * dev_change_xdp_fd - set or clear a bpf program for a device rx path
- * @dev: device
- * @extack: netlink extended ack
- * @fd: new program fd or negative value to clear
- * @expected_fd: old program fd that userspace expects to replace or clear
- * @flags: xdp-related flags
- *
- * Set or clear a bpf program for a device
- */
-int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
- int fd, int expected_fd, u32 flags)
+static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack,
+ struct bpf_xdp_link *link, struct bpf_prog *new_prog,
+ struct bpf_prog *old_prog, u32 flags)
{
- const struct net_device_ops *ops = dev->netdev_ops;
- enum bpf_netdev_command query;
- u32 prog_id, expected_id = 0;
- bpf_op_t bpf_op, bpf_chk;
- struct bpf_prog *prog;
- bool offload;
+ struct bpf_prog *cur_prog;
+ enum bpf_xdp_mode mode;
+ bpf_op_t bpf_op;
int err;
ASSERT_RTNL();
- offload = flags & XDP_FLAGS_HW_MODE;
- query = offload ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG;
+ /* either link or prog attachment, never both */
+ if (link && (new_prog || old_prog))
+ return -EINVAL;
+ /* link supports only XDP mode flags */
+ if (link && (flags & ~XDP_FLAGS_MODES)) {
+ NL_SET_ERR_MSG(extack, "Invalid XDP flags for BPF link attachment");
+ return -EINVAL;
+ }
+ /* just one XDP mode bit should be set, zero defaults to SKB mode */
+ if (hweight32(flags & XDP_FLAGS_MODES) > 1) {
+ NL_SET_ERR_MSG(extack, "Only one XDP mode flag can be set");
+ return -EINVAL;
+ }
+ /* old_prog != NULL implies XDP_FLAGS_REPLACE is set */
+ if (old_prog && !(flags & XDP_FLAGS_REPLACE)) {
+ NL_SET_ERR_MSG(extack, "XDP_FLAGS_REPLACE is not specified");
+ return -EINVAL;
+ }
- bpf_op = bpf_chk = ops->ndo_bpf;
- if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) {
- NL_SET_ERR_MSG(extack, "underlying driver does not support XDP in native mode");
- return -EOPNOTSUPP;
+ mode = dev_xdp_mode(flags);
+ /* can't replace attached link */
+ if (dev_xdp_link(dev, mode)) {
+ NL_SET_ERR_MSG(extack, "Can't replace active BPF XDP link");
+ return -EBUSY;
}
- if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE))
- bpf_op = generic_xdp_install;
- if (bpf_op == bpf_chk)
- bpf_chk = generic_xdp_install;
-
- prog_id = __dev_xdp_query(dev, bpf_op, query);
- if (flags & XDP_FLAGS_REPLACE) {
- if (expected_fd >= 0) {
- prog = bpf_prog_get_type_dev(expected_fd,
- BPF_PROG_TYPE_XDP,
- bpf_op == ops->ndo_bpf);
- if (IS_ERR(prog))
- return PTR_ERR(prog);
- expected_id = prog->aux->id;
- bpf_prog_put(prog);
- }
- if (prog_id != expected_id) {
- NL_SET_ERR_MSG(extack, "Active program does not match expected");
- return -EEXIST;
- }
+ cur_prog = dev_xdp_prog(dev, mode);
+ /* can't replace attached prog with link */
+ if (link && cur_prog) {
+ NL_SET_ERR_MSG(extack, "Can't replace active XDP program with BPF link");
+ return -EBUSY;
+ }
+ if ((flags & XDP_FLAGS_REPLACE) && cur_prog != old_prog) {
+ NL_SET_ERR_MSG(extack, "Active program does not match expected");
+ return -EEXIST;
+ }
+ if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && cur_prog) {
+ NL_SET_ERR_MSG(extack, "XDP program already attached");
+ return -EBUSY;
}
- if (fd >= 0) {
- if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) {
- NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time");
- return -EEXIST;
- }
- if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && prog_id) {
- NL_SET_ERR_MSG(extack, "XDP program already attached");
- return -EBUSY;
- }
+ /* put effective new program into new_prog */
+ if (link)
+ new_prog = link->link.prog;
- prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
- bpf_op == ops->ndo_bpf);
- if (IS_ERR(prog))
- return PTR_ERR(prog);
+ if (new_prog) {
+ bool offload = mode == XDP_MODE_HW;
+ enum bpf_xdp_mode other_mode = mode == XDP_MODE_SKB
+ ? XDP_MODE_DRV : XDP_MODE_SKB;
- if (!offload && bpf_prog_is_dev_bound(prog->aux)) {
- NL_SET_ERR_MSG(extack, "using device-bound program without HW_MODE flag is not supported");
- bpf_prog_put(prog);
+ if (!offload && dev_xdp_prog(dev, other_mode)) {
+ NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time");
+ return -EEXIST;
+ }
+ if (!offload && bpf_prog_is_dev_bound(new_prog->aux)) {
+ NL_SET_ERR_MSG(extack, "Using device-bound program without HW_MODE flag is not supported");
return -EINVAL;
}
-
- if (prog->expected_attach_type == BPF_XDP_DEVMAP) {
+ if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) {
NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device");
- bpf_prog_put(prog);
return -EINVAL;
}
+ if (new_prog->expected_attach_type == BPF_XDP_CPUMAP) {
+ NL_SET_ERR_MSG(extack, "BPF_XDP_CPUMAP programs can not be attached to a device");
+ return -EINVAL;
+ }
+ }
- /* prog->aux->id may be 0 for orphaned device-bound progs */
- if (prog->aux->id && prog->aux->id == prog_id) {
- bpf_prog_put(prog);
- return 0;
+ /* don't call drivers if the effective program didn't change */
+ if (new_prog != cur_prog) {
+ bpf_op = dev_xdp_bpf_op(dev, mode);
+ if (!bpf_op) {
+ NL_SET_ERR_MSG(extack, "Underlying driver does not support XDP in native mode");
+ return -EOPNOTSUPP;
+ }
+
+ err = dev_xdp_install(dev, mode, bpf_op, extack, flags, new_prog);
+ if (err)
+ return err;
+ }
+
+ if (link)
+ dev_xdp_set_link(dev, mode, link);
+ else
+ dev_xdp_set_prog(dev, mode, new_prog);
+ if (cur_prog)
+ bpf_prog_put(cur_prog);
+
+ return 0;
+}
+
+static int dev_xdp_attach_link(struct net_device *dev,
+ struct netlink_ext_ack *extack,
+ struct bpf_xdp_link *link)
+{
+ return dev_xdp_attach(dev, extack, link, NULL, NULL, link->flags);
+}
+
+static int dev_xdp_detach_link(struct net_device *dev,
+ struct netlink_ext_ack *extack,
+ struct bpf_xdp_link *link)
+{
+ enum bpf_xdp_mode mode;
+ bpf_op_t bpf_op;
+
+ ASSERT_RTNL();
+
+ mode = dev_xdp_mode(link->flags);
+ if (dev_xdp_link(dev, mode) != link)
+ return -EINVAL;
+
+ bpf_op = dev_xdp_bpf_op(dev, mode);
+ WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL));
+ dev_xdp_set_link(dev, mode, NULL);
+ return 0;
+}
+
+static void bpf_xdp_link_release(struct bpf_link *link)
+{
+ struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+
+ rtnl_lock();
+
+ /* if racing with net_device's tear down, xdp_link->dev might be
+ * already NULL, in which case link was already auto-detached
+ */
+ if (xdp_link->dev) {
+ WARN_ON(dev_xdp_detach_link(xdp_link->dev, NULL, xdp_link));
+ xdp_link->dev = NULL;
+ }
+
+ rtnl_unlock();
+}
+
+static int bpf_xdp_link_detach(struct bpf_link *link)
+{
+ bpf_xdp_link_release(link);
+ return 0;
+}
+
+static void bpf_xdp_link_dealloc(struct bpf_link *link)
+{
+ struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+
+ kfree(xdp_link);
+}
+
+static void bpf_xdp_link_show_fdinfo(const struct bpf_link *link,
+ struct seq_file *seq)
+{
+ struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+ u32 ifindex = 0;
+
+ rtnl_lock();
+ if (xdp_link->dev)
+ ifindex = xdp_link->dev->ifindex;
+ rtnl_unlock();
+
+ seq_printf(seq, "ifindex:\t%u\n", ifindex);
+}
+
+static int bpf_xdp_link_fill_link_info(const struct bpf_link *link,
+ struct bpf_link_info *info)
+{
+ struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+ u32 ifindex = 0;
+
+ rtnl_lock();
+ if (xdp_link->dev)
+ ifindex = xdp_link->dev->ifindex;
+ rtnl_unlock();
+
+ info->xdp.ifindex = ifindex;
+ return 0;
+}
+
+static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
+ struct bpf_prog *old_prog)
+{
+ struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+ enum bpf_xdp_mode mode;
+ bpf_op_t bpf_op;
+ int err = 0;
+
+ rtnl_lock();
+
+ /* link might have been auto-released already, so fail */
+ if (!xdp_link->dev) {
+ err = -ENOLINK;
+ goto out_unlock;
+ }
+
+ if (old_prog && link->prog != old_prog) {
+ err = -EPERM;
+ goto out_unlock;
+ }
+ old_prog = link->prog;
+ if (old_prog == new_prog) {
+ /* no-op, don't disturb drivers */
+ bpf_prog_put(new_prog);
+ goto out_unlock;
+ }
+
+ mode = dev_xdp_mode(xdp_link->flags);
+ bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode);
+ err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL,
+ xdp_link->flags, new_prog);
+ if (err)
+ goto out_unlock;
+
+ old_prog = xchg(&link->prog, new_prog);
+ bpf_prog_put(old_prog);
+
+out_unlock:
+ rtnl_unlock();
+ return err;
+}
+
+static const struct bpf_link_ops bpf_xdp_link_lops = {
+ .release = bpf_xdp_link_release,
+ .dealloc = bpf_xdp_link_dealloc,
+ .detach = bpf_xdp_link_detach,
+ .show_fdinfo = bpf_xdp_link_show_fdinfo,
+ .fill_link_info = bpf_xdp_link_fill_link_info,
+ .update_prog = bpf_xdp_link_update,
+};
+
+int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ struct net *net = current->nsproxy->net_ns;
+ struct bpf_link_primer link_primer;
+ struct bpf_xdp_link *link;
+ struct net_device *dev;
+ int err, fd;
+
+ dev = dev_get_by_index(net, attr->link_create.target_ifindex);
+ if (!dev)
+ return -EINVAL;
+
+ link = kzalloc(sizeof(*link), GFP_USER);
+ if (!link) {
+ err = -ENOMEM;
+ goto out_put_dev;
+ }
+
+ bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog);
+ link->dev = dev;
+ link->flags = attr->link_create.flags;
+
+ err = bpf_link_prime(&link->link, &link_primer);
+ if (err) {
+ kfree(link);
+ goto out_put_dev;
+ }
+
+ rtnl_lock();
+ err = dev_xdp_attach_link(dev, NULL, link);
+ rtnl_unlock();
+
+ if (err) {
+ bpf_link_cleanup(&link_primer);
+ goto out_put_dev;
+ }
+
+ fd = bpf_link_settle(&link_primer);
+ /* link itself doesn't hold dev's refcnt to not complicate shutdown */
+ dev_put(dev);
+ return fd;
+
+out_put_dev:
+ dev_put(dev);
+ return err;
+}
+
+/**
+ * dev_change_xdp_fd - set or clear a bpf program for a device rx path
+ * @dev: device
+ * @extack: netlink extended ack
+ * @fd: new program fd or negative value to clear
+ * @expected_fd: old program fd that userspace expects to replace or clear
+ * @flags: xdp-related flags
+ *
+ * Set or clear a bpf program for a device
+ */
+int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
+ int fd, int expected_fd, u32 flags)
+{
+ enum bpf_xdp_mode mode = dev_xdp_mode(flags);
+ struct bpf_prog *new_prog = NULL, *old_prog = NULL;
+ int err;
+
+ ASSERT_RTNL();
+
+ if (fd >= 0) {
+ new_prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
+ mode != XDP_MODE_SKB);
+ if (IS_ERR(new_prog))
+ return PTR_ERR(new_prog);
+ }
+
+ if (expected_fd >= 0) {
+ old_prog = bpf_prog_get_type_dev(expected_fd, BPF_PROG_TYPE_XDP,
+ mode != XDP_MODE_SKB);
+ if (IS_ERR(old_prog)) {
+ err = PTR_ERR(old_prog);
+ old_prog = NULL;
+ goto err_out;
}
- } else {
- if (!prog_id)
- return 0;
- prog = NULL;
}
- err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
- if (err < 0 && prog)
- bpf_prog_put(prog);
+ err = dev_xdp_attach(dev, extack, NULL, new_prog, old_prog, flags);
+err_out:
+ if (err && new_prog)
+ bpf_prog_put(new_prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
return err;
}
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 547b587c1950..b2cf9b7bb7b8 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -5,6 +5,7 @@
#include <linux/rtnetlink.h>
#include <linux/net_tstamp.h>
#include <linux/wireless.h>
+#include <net/dsa.h>
#include <net/wext.h>
/*
@@ -225,6 +226,26 @@ static int net_hwtstamp_validate(struct ifreq *ifr)
return 0;
}
+static int dev_do_ioctl(struct net_device *dev,
+ struct ifreq *ifr, unsigned int cmd)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ int err = -EOPNOTSUPP;
+
+ err = dsa_ndo_do_ioctl(dev, ifr, cmd);
+ if (err == 0 || err != -EOPNOTSUPP)
+ return err;
+
+ if (ops->ndo_do_ioctl) {
+ if (netif_device_present(dev))
+ err = ops->ndo_do_ioctl(dev, ifr, cmd);
+ else
+ err = -ENODEV;
+ }
+
+ return err;
+}
+
/*
* Perform the SIOCxIFxxx calls, inside rtnl_lock()
*/
@@ -323,13 +344,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
cmd == SIOCSHWTSTAMP ||
cmd == SIOCGHWTSTAMP ||
cmd == SIOCWANDEV) {
- err = -EOPNOTSUPP;
- if (ops->ndo_do_ioctl) {
- if (netif_device_present(dev))
- err = ops->ndo_do_ioctl(dev, ifr, cmd);
- else
- err = -ENODEV;
- }
+ err = dev_do_ioctl(dev, ifr, cmd);
} else
err = -EINVAL;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 47f14a2f25fb..e674f0f46dc2 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -85,6 +85,10 @@ EXPORT_SYMBOL(devlink_dpipe_header_ipv6);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
+static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = {
+ [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY },
+};
+
static LIST_HEAD(devlink_list);
/* devlink_mutex
@@ -382,19 +386,19 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
return NULL;
}
-#define DEVLINK_NL_FLAG_NEED_DEVLINK BIT(0)
-#define DEVLINK_NL_FLAG_NEED_PORT BIT(1)
-#define DEVLINK_NL_FLAG_NEED_SB BIT(2)
+#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
+#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
/* The per devlink instance lock is taken by default in the pre-doit
* operation, yet several commands do not require this. The global
* devlink lock is taken and protects from disruption by user-calls.
*/
-#define DEVLINK_NL_FLAG_NO_LOCK BIT(3)
+#define DEVLINK_NL_FLAG_NO_LOCK BIT(2)
static int devlink_nl_pre_doit(const struct genl_ops *ops,
struct sk_buff *skb, struct genl_info *info)
{
+ struct devlink_port *devlink_port;
struct devlink *devlink;
int err;
@@ -406,27 +410,18 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
}
if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
mutex_lock(&devlink->lock);
- if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) {
- info->user_ptr[0] = devlink;
- } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
- struct devlink_port *devlink_port;
-
+ info->user_ptr[0] = devlink;
+ if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
devlink_port = devlink_port_get_from_info(devlink, info);
if (IS_ERR(devlink_port)) {
err = PTR_ERR(devlink_port);
goto unlock;
}
- info->user_ptr[0] = devlink_port;
- }
- if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_SB) {
- struct devlink_sb *devlink_sb;
-
- devlink_sb = devlink_sb_get_from_info(devlink, info);
- if (IS_ERR(devlink_sb)) {
- err = PTR_ERR(devlink_sb);
- goto unlock;
- }
- info->user_ptr[1] = devlink_sb;
+ info->user_ptr[1] = devlink_port;
+ } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT) {
+ devlink_port = devlink_port_get_from_info(devlink, info);
+ if (!IS_ERR(devlink_port))
+ info->user_ptr[1] = devlink_port;
}
return 0;
@@ -442,16 +437,8 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
{
struct devlink *devlink;
- /* When devlink changes netns, it would not be found
- * by devlink_get_from_info(). So try if it is stored first.
- */
- if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) {
- devlink = info->user_ptr[0];
- } else {
- devlink = devlink_get_from_info(info);
- WARN_ON(IS_ERR(devlink));
- }
- if (!IS_ERR(devlink) && ~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
+ devlink = info->user_ptr[0];
+ if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
mutex_unlock(&devlink->lock);
mutex_unlock(&devlink_mutex);
}
@@ -524,8 +511,14 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
{
struct devlink_port_attrs *attrs = &devlink_port->attrs;
- if (!attrs->set)
+ if (!devlink_port->attrs_set)
return 0;
+ if (attrs->lanes) {
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_LANES, attrs->lanes))
+ return -EMSGSIZE;
+ }
+ if (nla_put_u8(msg, DEVLINK_ATTR_PORT_SPLITTABLE, attrs->splittable))
+ return -EMSGSIZE;
if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour))
return -EMSGSIZE;
switch (devlink_port->attrs.flavour) {
@@ -563,10 +556,54 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
return 0;
}
+static int
+devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink *devlink = port->devlink;
+ const struct devlink_ops *ops;
+ struct nlattr *function_attr;
+ bool empty_nest = true;
+ int err = 0;
+
+ function_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PORT_FUNCTION);
+ if (!function_attr)
+ return -EMSGSIZE;
+
+ ops = devlink->ops;
+ if (ops->port_function_hw_addr_get) {
+ int hw_addr_len;
+ u8 hw_addr[MAX_ADDR_LEN];
+
+ err = ops->port_function_hw_addr_get(devlink, port, hw_addr, &hw_addr_len, extack);
+ if (err == -EOPNOTSUPP) {
+ /* Port function attributes are optional for a port. If port doesn't
+ * support function attribute, returning -EOPNOTSUPP is not an error.
+ */
+ err = 0;
+ goto out;
+ } else if (err) {
+ goto out;
+ }
+ err = nla_put(msg, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, hw_addr_len, hw_addr);
+ if (err)
+ goto out;
+ empty_nest = false;
+ }
+
+out:
+ if (err || empty_nest)
+ nla_nest_cancel(msg, function_attr);
+ else
+ nla_nest_end(msg, function_attr);
+ return err;
+}
+
static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
struct devlink_port *devlink_port,
enum devlink_command cmd, u32 portid,
- u32 seq, int flags)
+ u32 seq, int flags,
+ struct netlink_ext_ack *extack)
{
void *hdr;
@@ -607,6 +644,8 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
spin_unlock_bh(&devlink_port->type_lock);
if (devlink_nl_port_attrs_put(msg, devlink_port))
goto nla_put_failure;
+ if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack))
+ goto nla_put_failure;
genlmsg_end(msg, hdr);
return 0;
@@ -634,7 +673,8 @@ static void devlink_port_notify(struct devlink_port *devlink_port,
if (!msg)
return;
- err = devlink_nl_port_fill(msg, devlink, devlink_port, cmd, 0, 0, 0);
+ err = devlink_nl_port_fill(msg, devlink, devlink_port, cmd, 0, 0, 0,
+ NULL);
if (err) {
nlmsg_free(msg);
return;
@@ -697,7 +737,7 @@ out:
static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_port *devlink_port = info->user_ptr[0];
+ struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink *devlink = devlink_port->devlink;
struct sk_buff *msg;
int err;
@@ -708,7 +748,8 @@ static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
err = devlink_nl_port_fill(msg, devlink, devlink_port,
DEVLINK_CMD_PORT_NEW,
- info->snd_portid, info->snd_seq, 0);
+ info->snd_portid, info->snd_seq, 0,
+ info->extack);
if (err) {
nlmsg_free(msg);
return err;
@@ -740,7 +781,8 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
DEVLINK_CMD_NEW,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
+ NLM_F_MULTI,
+ cb->extack);
if (err) {
mutex_unlock(&devlink->lock);
goto out;
@@ -778,10 +820,71 @@ static int devlink_port_type_set(struct devlink *devlink,
return -EOPNOTSUPP;
}
+static int
+devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port *port,
+ const struct nlattr *attr, struct netlink_ext_ack *extack)
+{
+ const struct devlink_ops *ops;
+ const u8 *hw_addr;
+ int hw_addr_len;
+ int err;
+
+ hw_addr = nla_data(attr);
+ hw_addr_len = nla_len(attr);
+ if (hw_addr_len > MAX_ADDR_LEN) {
+ NL_SET_ERR_MSG_MOD(extack, "Port function hardware address too long");
+ return -EINVAL;
+ }
+ if (port->type == DEVLINK_PORT_TYPE_ETH) {
+ if (hw_addr_len != ETH_ALEN) {
+ NL_SET_ERR_MSG_MOD(extack, "Address must be 6 bytes for Ethernet device");
+ return -EINVAL;
+ }
+ if (!is_unicast_ether_addr(hw_addr)) {
+ NL_SET_ERR_MSG_MOD(extack, "Non-unicast hardware address unsupported");
+ return -EINVAL;
+ }
+ }
+
+ ops = devlink->ops;
+ if (!ops->port_function_hw_addr_set) {
+ NL_SET_ERR_MSG_MOD(extack, "Port doesn't support function attributes");
+ return -EOPNOTSUPP;
+ }
+
+ err = ops->port_function_hw_addr_set(devlink, port, hw_addr, hw_addr_len, extack);
+ if (err)
+ return err;
+
+ devlink_port_notify(port, DEVLINK_CMD_PORT_NEW);
+ return 0;
+}
+
+static int
+devlink_port_function_set(struct devlink *devlink, struct devlink_port *port,
+ const struct nlattr *attr, struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(tb, DEVLINK_PORT_FUNCTION_ATTR_MAX, attr,
+ devlink_function_nl_policy, extack);
+ if (err < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Fail to parse port function attributes");
+ return err;
+ }
+
+ attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR];
+ if (attr)
+ err = devlink_port_function_hw_addr_set(devlink, port, attr, extack);
+
+ return err;
+}
+
static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_port *devlink_port = info->user_ptr[0];
+ struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink *devlink = devlink_port->devlink;
int err;
@@ -793,6 +896,16 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
if (err)
return err;
}
+
+ if (info->attrs[DEVLINK_ATTR_PORT_FUNCTION]) {
+ struct nlattr *attr = info->attrs[DEVLINK_ATTR_PORT_FUNCTION];
+ struct netlink_ext_ack *extack = info->extack;
+
+ err = devlink_port_function_set(devlink, devlink_port, attr, extack);
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -810,6 +923,7 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
+ struct devlink_port *devlink_port;
u32 port_index;
u32 count;
@@ -817,8 +931,27 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb,
!info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT])
return -EINVAL;
+ devlink_port = devlink_port_get_from_info(devlink, info);
port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]);
+
+ if (IS_ERR(devlink_port))
+ return -EINVAL;
+
+ if (!devlink_port->attrs.splittable) {
+ /* Split ports cannot be split. */
+ if (devlink_port->attrs.split)
+ NL_SET_ERR_MSG_MOD(info->extack, "Port cannot be split further");
+ else
+ NL_SET_ERR_MSG_MOD(info->extack, "Port cannot be split");
+ return -EINVAL;
+ }
+
+ if (count < 2 || !is_power_of_2(count) || count > devlink_port->attrs.lanes) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Invalid split count");
+ return -EINVAL;
+ }
+
return devlink_port_split(devlink, port_index, count, info->extack);
}
@@ -886,10 +1019,14 @@ static int devlink_nl_cmd_sb_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
- struct devlink_sb *devlink_sb = info->user_ptr[1];
+ struct devlink_sb *devlink_sb;
struct sk_buff *msg;
int err;
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
@@ -991,11 +1128,15 @@ static int devlink_nl_cmd_sb_pool_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
- struct devlink_sb *devlink_sb = info->user_ptr[1];
+ struct devlink_sb *devlink_sb;
struct sk_buff *msg;
u16 pool_index;
int err;
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
&pool_index);
if (err)
@@ -1102,12 +1243,16 @@ static int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
- struct devlink_sb *devlink_sb = info->user_ptr[1];
enum devlink_sb_threshold_type threshold_type;
+ struct devlink_sb *devlink_sb;
u16 pool_index;
u32 size;
int err;
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
&pool_index);
if (err)
@@ -1186,13 +1331,17 @@ nla_put_failure:
static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_port *devlink_port = info->user_ptr[0];
+ struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink *devlink = devlink_port->devlink;
- struct devlink_sb *devlink_sb = info->user_ptr[1];
+ struct devlink_sb *devlink_sb;
struct sk_buff *msg;
u16 pool_index;
int err;
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
&pool_index);
if (err)
@@ -1304,12 +1453,17 @@ static int devlink_sb_port_pool_set(struct devlink_port *devlink_port,
static int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_port *devlink_port = info->user_ptr[0];
- struct devlink_sb *devlink_sb = info->user_ptr[1];
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_sb *devlink_sb;
u16 pool_index;
u32 threshold;
int err;
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
&pool_index);
if (err)
@@ -1391,14 +1545,18 @@ nla_put_failure:
static int devlink_nl_cmd_sb_tc_pool_bind_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_port *devlink_port = info->user_ptr[0];
+ struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink *devlink = devlink_port->devlink;
- struct devlink_sb *devlink_sb = info->user_ptr[1];
+ struct devlink_sb *devlink_sb;
struct sk_buff *msg;
enum devlink_sb_pool_type pool_type;
u16 tc_index;
int err;
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
err = devlink_sb_pool_type_get_from_info(info, &pool_type);
if (err)
return err;
@@ -1540,14 +1698,19 @@ static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
static int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_port *devlink_port = info->user_ptr[0];
- struct devlink_sb *devlink_sb = info->user_ptr[1];
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ struct devlink *devlink = info->user_ptr[0];
enum devlink_sb_pool_type pool_type;
+ struct devlink_sb *devlink_sb;
u16 tc_index;
u16 pool_index;
u32 threshold;
int err;
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
err = devlink_sb_pool_type_get_from_info(info, &pool_type);
if (err)
return err;
@@ -1575,8 +1738,12 @@ static int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
- struct devlink_sb *devlink_sb = info->user_ptr[1];
const struct devlink_ops *ops = devlink->ops;
+ struct devlink_sb *devlink_sb;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
if (ops->sb_occ_snapshot)
return ops->sb_occ_snapshot(devlink, devlink_sb->index);
@@ -1587,8 +1754,12 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
- struct devlink_sb *devlink_sb = info->user_ptr[1];
const struct devlink_ops *ops = devlink->ops;
+ struct devlink_sb *devlink_sb;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
if (ops->sb_occ_max_clear)
return ops->sb_occ_max_clear(devlink, devlink_sb->index);
@@ -2772,7 +2943,7 @@ static void devlink_reload_netns_change(struct devlink *devlink,
DEVLINK_CMD_PARAM_NEW);
}
-static bool devlink_reload_supported(struct devlink *devlink)
+static bool devlink_reload_supported(const struct devlink *devlink)
{
return devlink->ops->reload_down && devlink->ops->reload_up;
}
@@ -2818,7 +2989,7 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
struct net *dest_net = NULL;
int err;
- if (!devlink_reload_supported(devlink) || !devlink->reload_enabled)
+ if (!devlink_reload_supported(devlink))
return -EOPNOTSUPP;
err = devlink_resources_validate(devlink, NULL, info);
@@ -4388,6 +4559,14 @@ int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn)
}
EXPORT_SYMBOL_GPL(devlink_info_serial_number_put);
+int devlink_info_board_serial_number_put(struct devlink_info_req *req,
+ const char *bsn)
+{
+ return nla_put_string(req->msg, DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER,
+ bsn);
+}
+EXPORT_SYMBOL_GPL(devlink_info_board_serial_number_put);
+
static int devlink_info_version_put(struct devlink_info_req *req, int attr,
const char *version_name,
const char *version_value)
@@ -5141,6 +5320,7 @@ struct devlink_health_reporter {
void *priv;
const struct devlink_health_reporter_ops *ops;
struct devlink *devlink;
+ struct devlink_port *devlink_port;
struct devlink_fmsg *dump_fmsg;
struct mutex dump_lock; /* lock parallel read/write from dump buffers */
u64 graceful_period;
@@ -5163,18 +5343,98 @@ devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
EXPORT_SYMBOL_GPL(devlink_health_reporter_priv);
static struct devlink_health_reporter *
-devlink_health_reporter_find_by_name(struct devlink *devlink,
- const char *reporter_name)
+__devlink_health_reporter_find_by_name(struct list_head *reporter_list,
+ struct mutex *list_lock,
+ const char *reporter_name)
{
struct devlink_health_reporter *reporter;
- lockdep_assert_held(&devlink->reporters_lock);
- list_for_each_entry(reporter, &devlink->reporter_list, list)
+ lockdep_assert_held(list_lock);
+ list_for_each_entry(reporter, reporter_list, list)
if (!strcmp(reporter->ops->name, reporter_name))
return reporter;
return NULL;
}
+static struct devlink_health_reporter *
+devlink_health_reporter_find_by_name(struct devlink *devlink,
+ const char *reporter_name)
+{
+ return __devlink_health_reporter_find_by_name(&devlink->reporter_list,
+ &devlink->reporters_lock,
+ reporter_name);
+}
+
+static struct devlink_health_reporter *
+devlink_port_health_reporter_find_by_name(struct devlink_port *devlink_port,
+ const char *reporter_name)
+{
+ return __devlink_health_reporter_find_by_name(&devlink_port->reporter_list,
+ &devlink_port->reporters_lock,
+ reporter_name);
+}
+
+static struct devlink_health_reporter *
+__devlink_health_reporter_create(struct devlink *devlink,
+ const struct devlink_health_reporter_ops *ops,
+ u64 graceful_period, void *priv)
+{
+ struct devlink_health_reporter *reporter;
+
+ if (WARN_ON(graceful_period && !ops->recover))
+ return ERR_PTR(-EINVAL);
+
+ reporter = kzalloc(sizeof(*reporter), GFP_KERNEL);
+ if (!reporter)
+ return ERR_PTR(-ENOMEM);
+
+ reporter->priv = priv;
+ reporter->ops = ops;
+ reporter->devlink = devlink;
+ reporter->graceful_period = graceful_period;
+ reporter->auto_recover = !!ops->recover;
+ reporter->auto_dump = !!ops->dump;
+ mutex_init(&reporter->dump_lock);
+ refcount_set(&reporter->refcount, 1);
+ return reporter;
+}
+
+/**
+ * devlink_port_health_reporter_create - create devlink health reporter for
+ * specified port instance
+ *
+ * @port: devlink_port which should contain the new reporter
+ * @ops: ops
+ * @graceful_period: to avoid recovery loops, in msecs
+ * @priv: priv
+ */
+struct devlink_health_reporter *
+devlink_port_health_reporter_create(struct devlink_port *port,
+ const struct devlink_health_reporter_ops *ops,
+ u64 graceful_period, void *priv)
+{
+ struct devlink_health_reporter *reporter;
+
+ mutex_lock(&port->reporters_lock);
+ if (__devlink_health_reporter_find_by_name(&port->reporter_list,
+ &port->reporters_lock, ops->name)) {
+ reporter = ERR_PTR(-EEXIST);
+ goto unlock;
+ }
+
+ reporter = __devlink_health_reporter_create(port->devlink, ops,
+ graceful_period, priv);
+ if (IS_ERR(reporter))
+ goto unlock;
+
+ reporter->devlink_port = port;
+ list_add_tail(&reporter->list, &port->reporter_list);
+unlock:
+ mutex_unlock(&port->reporters_lock);
+ return reporter;
+}
+EXPORT_SYMBOL_GPL(devlink_port_health_reporter_create);
+
/**
* devlink_health_reporter_create - create devlink health reporter
*
@@ -5196,25 +5456,11 @@ devlink_health_reporter_create(struct devlink *devlink,
goto unlock;
}
- if (WARN_ON(graceful_period && !ops->recover)) {
- reporter = ERR_PTR(-EINVAL);
- goto unlock;
- }
-
- reporter = kzalloc(sizeof(*reporter), GFP_KERNEL);
- if (!reporter) {
- reporter = ERR_PTR(-ENOMEM);
+ reporter = __devlink_health_reporter_create(devlink, ops,
+ graceful_period, priv);
+ if (IS_ERR(reporter))
goto unlock;
- }
- reporter->priv = priv;
- reporter->ops = ops;
- reporter->devlink = devlink;
- reporter->graceful_period = graceful_period;
- reporter->auto_recover = !!ops->recover;
- reporter->auto_dump = !!ops->dump;
- mutex_init(&reporter->dump_lock);
- refcount_set(&reporter->refcount, 1);
list_add_tail(&reporter->list, &devlink->reporter_list);
unlock:
mutex_unlock(&devlink->reporters_lock);
@@ -5222,6 +5468,29 @@ unlock:
}
EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
+static void
+devlink_health_reporter_free(struct devlink_health_reporter *reporter)
+{
+ mutex_destroy(&reporter->dump_lock);
+ if (reporter->dump_fmsg)
+ devlink_fmsg_free(reporter->dump_fmsg);
+ kfree(reporter);
+}
+
+static void
+devlink_health_reporter_put(struct devlink_health_reporter *reporter)
+{
+ if (refcount_dec_and_test(&reporter->refcount))
+ devlink_health_reporter_free(reporter);
+}
+
+static void
+__devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
+{
+ list_del(&reporter->list);
+ devlink_health_reporter_put(reporter);
+}
+
/**
* devlink_health_reporter_destroy - destroy devlink health reporter
*
@@ -5230,18 +5499,30 @@ EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
void
devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
{
- mutex_lock(&reporter->devlink->reporters_lock);
- list_del(&reporter->list);
- mutex_unlock(&reporter->devlink->reporters_lock);
- while (refcount_read(&reporter->refcount) > 1)
- msleep(100);
- mutex_destroy(&reporter->dump_lock);
- if (reporter->dump_fmsg)
- devlink_fmsg_free(reporter->dump_fmsg);
- kfree(reporter);
+ struct mutex *lock = &reporter->devlink->reporters_lock;
+
+ mutex_lock(lock);
+ __devlink_health_reporter_destroy(reporter);
+ mutex_unlock(lock);
}
EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
+/**
+ * devlink_port_health_reporter_destroy - destroy devlink port health reporter
+ *
+ * @reporter: devlink health reporter to destroy
+ */
+void
+devlink_port_health_reporter_destroy(struct devlink_health_reporter *reporter)
+{
+ struct mutex *lock = &reporter->devlink_port->reporters_lock;
+
+ mutex_lock(lock);
+ __devlink_health_reporter_destroy(reporter);
+ mutex_unlock(lock);
+}
+EXPORT_SYMBOL_GPL(devlink_port_health_reporter_destroy);
+
static int
devlink_nl_health_reporter_fill(struct sk_buff *msg,
struct devlink *devlink,
@@ -5259,6 +5540,10 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg,
if (devlink_nl_put_handle(msg, devlink))
goto genlmsg_cancel;
+ if (reporter->devlink_port) {
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, reporter->devlink_port->index))
+ goto genlmsg_cancel;
+ }
reporter_attr = nla_nest_start_noflag(msg,
DEVLINK_ATTR_HEALTH_REPORTER);
if (!reporter_attr)
@@ -5466,17 +5751,28 @@ devlink_health_reporter_get_from_attrs(struct devlink *devlink,
struct nlattr **attrs)
{
struct devlink_health_reporter *reporter;
+ struct devlink_port *devlink_port;
char *reporter_name;
if (!attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME])
return NULL;
reporter_name = nla_data(attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]);
- mutex_lock(&devlink->reporters_lock);
- reporter = devlink_health_reporter_find_by_name(devlink, reporter_name);
- if (reporter)
- refcount_inc(&reporter->refcount);
- mutex_unlock(&devlink->reporters_lock);
+ devlink_port = devlink_port_get_from_attrs(devlink, attrs);
+ if (IS_ERR(devlink_port)) {
+ mutex_lock(&devlink->reporters_lock);
+ reporter = devlink_health_reporter_find_by_name(devlink, reporter_name);
+ if (reporter)
+ refcount_inc(&reporter->refcount);
+ mutex_unlock(&devlink->reporters_lock);
+ } else {
+ mutex_lock(&devlink_port->reporters_lock);
+ reporter = devlink_port_health_reporter_find_by_name(devlink_port, reporter_name);
+ if (reporter)
+ refcount_inc(&reporter->refcount);
+ mutex_unlock(&devlink_port->reporters_lock);
+ }
+
return reporter;
}
@@ -5508,12 +5804,6 @@ unlock:
return NULL;
}
-static void
-devlink_health_reporter_put(struct devlink_health_reporter *reporter)
-{
- refcount_dec(&reporter->refcount);
-}
-
void
devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
enum devlink_health_reporter_state state)
@@ -5570,6 +5860,7 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
struct netlink_callback *cb)
{
struct devlink_health_reporter *reporter;
+ struct devlink_port *port;
struct devlink *devlink;
int start = cb->args[0];
int idx = 0;
@@ -5600,6 +5891,31 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
}
mutex_unlock(&devlink->reporters_lock);
}
+
+ list_for_each_entry(devlink, &devlink_list, list) {
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ continue;
+ list_for_each_entry(port, &devlink->port_list, list) {
+ mutex_lock(&port->reporters_lock);
+ list_for_each_entry(reporter, &port->reporter_list, list) {
+ if (idx < start) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_health_reporter_fill(msg, devlink, reporter,
+ DEVLINK_CMD_HEALTH_REPORTER_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ mutex_unlock(&port->reporters_lock);
+ goto out;
+ }
+ idx++;
+ }
+ mutex_unlock(&port->reporters_lock);
+ }
+ }
out:
mutex_unlock(&devlink_mutex);
@@ -6107,7 +6423,7 @@ static int __devlink_trap_action_set(struct devlink *devlink,
}
err = devlink->ops->trap_action_set(devlink, trap_item->trap,
- trap_action);
+ trap_action, extack);
if (err)
return err;
@@ -6397,7 +6713,8 @@ static int devlink_trap_group_set(struct devlink *devlink,
}
policer = policer_item ? policer_item->policer : NULL;
- err = devlink->ops->trap_group_set(devlink, group_item->group, policer);
+ err = devlink->ops->trap_group_set(devlink, group_item->group, policer,
+ extack);
if (err)
return err;
@@ -6721,6 +7038,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32 },
[DEVLINK_ATTR_TRAP_POLICER_RATE] = { .type = NLA_U64 },
[DEVLINK_ATTR_TRAP_POLICER_BURST] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_PORT_FUNCTION] = { .type = NLA_NESTED },
};
static const struct genl_ops devlink_nl_ops[] = {
@@ -6729,7 +7047,6 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_get_doit,
.dumpit = devlink_nl_cmd_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
@@ -6752,24 +7069,20 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_port_split_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_PORT_UNSPLIT,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_port_unsplit_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_SB_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_get_doit,
.dumpit = devlink_nl_cmd_sb_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NEED_SB,
/* can be retrieved by unprivileged users */
},
{
@@ -6777,8 +7090,6 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_pool_get_doit,
.dumpit = devlink_nl_cmd_sb_pool_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NEED_SB,
/* can be retrieved by unprivileged users */
},
{
@@ -6786,16 +7097,13 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_pool_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NEED_SB,
},
{
.cmd = DEVLINK_CMD_SB_PORT_POOL_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_port_pool_get_doit,
.dumpit = devlink_nl_cmd_sb_port_pool_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
- DEVLINK_NL_FLAG_NEED_SB,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
/* can be retrieved by unprivileged users */
},
{
@@ -6803,16 +7111,14 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_port_pool_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
- DEVLINK_NL_FLAG_NEED_SB,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
},
{
.cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit,
.dumpit = devlink_nl_cmd_sb_tc_pool_bind_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
- DEVLINK_NL_FLAG_NEED_SB,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
/* can be retrieved by unprivileged users */
},
{
@@ -6820,60 +7126,50 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
- DEVLINK_NL_FLAG_NEED_SB,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
},
{
.cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_occ_snapshot_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NEED_SB,
},
{
.cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_occ_max_clear_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NEED_SB,
},
{
.cmd = DEVLINK_CMD_ESWITCH_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_eswitch_get_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_ESWITCH_SET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_eswitch_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_dpipe_table_get,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_dpipe_entries_get,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_dpipe_headers_get,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
@@ -6881,20 +7177,17 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_dpipe_table_counters_set,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_RESOURCE_SET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_resource_set,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_RESOURCE_DUMP,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_resource_dump,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
@@ -6902,15 +7195,13 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_reload,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
- DEVLINK_NL_FLAG_NO_LOCK,
+ .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_PARAM_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_param_get_doit,
.dumpit = devlink_nl_cmd_param_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
@@ -6918,7 +7209,6 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_param_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_PORT_PARAM_GET,
@@ -6941,21 +7231,18 @@ static const struct genl_ops devlink_nl_ops[] = {
.doit = devlink_nl_cmd_region_get_doit,
.dumpit = devlink_nl_cmd_region_get_dumpit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_REGION_NEW,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_region_new,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_REGION_DEL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_region_del,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_REGION_READ,
@@ -6963,14 +7250,12 @@ static const struct genl_ops devlink_nl_ops[] = {
GENL_DONT_VALIDATE_DUMP_STRICT,
.dumpit = devlink_nl_cmd_region_read_dumpit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_INFO_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_info_get_doit,
.dumpit = devlink_nl_cmd_info_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
@@ -6978,7 +7263,7 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_get_doit,
.dumpit = devlink_nl_cmd_health_reporter_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
DEVLINK_NL_FLAG_NO_LOCK,
/* can be retrieved by unprivileged users */
},
@@ -6987,7 +7272,7 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
DEVLINK_NL_FLAG_NO_LOCK,
},
{
@@ -6995,7 +7280,7 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_recover_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
DEVLINK_NL_FLAG_NO_LOCK,
},
{
@@ -7003,7 +7288,7 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_diagnose_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
DEVLINK_NL_FLAG_NO_LOCK,
},
{
@@ -7012,7 +7297,7 @@ static const struct genl_ops devlink_nl_ops[] = {
GENL_DONT_VALIDATE_DUMP_STRICT,
.dumpit = devlink_nl_cmd_health_reporter_dump_get_dumpit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
DEVLINK_NL_FLAG_NO_LOCK,
},
{
@@ -7020,7 +7305,7 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_dump_clear_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
DEVLINK_NL_FLAG_NO_LOCK,
},
{
@@ -7028,46 +7313,39 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_flash_update,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_TRAP_GET,
.doit = devlink_nl_cmd_trap_get_doit,
.dumpit = devlink_nl_cmd_trap_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_TRAP_SET,
.doit = devlink_nl_cmd_trap_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_TRAP_GROUP_GET,
.doit = devlink_nl_cmd_trap_group_get_doit,
.dumpit = devlink_nl_cmd_trap_group_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_TRAP_GROUP_SET,
.doit = devlink_nl_cmd_trap_group_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
{
.cmd = DEVLINK_CMD_TRAP_POLICER_GET,
.doit = devlink_nl_cmd_trap_policer_get_doit,
.dumpit = devlink_nl_cmd_trap_policer_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_TRAP_POLICER_SET,
.doit = devlink_nl_cmd_trap_policer_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
};
@@ -7132,9 +7410,9 @@ EXPORT_SYMBOL_GPL(devlink_alloc);
*/
int devlink_register(struct devlink *devlink, struct device *dev)
{
- mutex_lock(&devlink_mutex);
devlink->dev = dev;
devlink->registered = true;
+ mutex_lock(&devlink_mutex);
list_add_tail(&devlink->list, &devlink_list);
devlink_notify(devlink, DEVLINK_CMD_NEW);
mutex_unlock(&devlink_mutex);
@@ -7280,6 +7558,8 @@ int devlink_port_register(struct devlink *devlink,
list_add_tail(&devlink_port->list, &devlink->port_list);
INIT_LIST_HEAD(&devlink_port->param_list);
mutex_unlock(&devlink->lock);
+ INIT_LIST_HEAD(&devlink_port->reporter_list);
+ mutex_init(&devlink_port->reporters_lock);
INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn);
devlink_port_type_warn_schedule(devlink_port);
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
@@ -7296,6 +7576,8 @@ void devlink_port_unregister(struct devlink_port *devlink_port)
{
struct devlink *devlink = devlink_port->devlink;
+ WARN_ON(!list_empty(&devlink_port->reporter_list));
+ mutex_destroy(&devlink_port->reporters_lock);
devlink_port_type_warn_cancel(devlink_port);
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
mutex_lock(&devlink->lock);
@@ -7389,24 +7671,20 @@ void devlink_port_type_clear(struct devlink_port *devlink_port)
EXPORT_SYMBOL_GPL(devlink_port_type_clear);
static int __devlink_port_attrs_set(struct devlink_port *devlink_port,
- enum devlink_port_flavour flavour,
- const unsigned char *switch_id,
- unsigned char switch_id_len)
+ enum devlink_port_flavour flavour)
{
struct devlink_port_attrs *attrs = &devlink_port->attrs;
if (WARN_ON(devlink_port->registered))
return -EEXIST;
- attrs->set = true;
+ devlink_port->attrs_set = true;
attrs->flavour = flavour;
- if (switch_id) {
- attrs->switch_port = true;
- if (WARN_ON(switch_id_len > MAX_PHYS_ITEM_ID_LEN))
- switch_id_len = MAX_PHYS_ITEM_ID_LEN;
- memcpy(attrs->switch_id.id, switch_id, switch_id_len);
- attrs->switch_id.id_len = switch_id_len;
+ if (attrs->switch_id.id_len) {
+ devlink_port->switch_port = true;
+ if (WARN_ON(attrs->switch_id.id_len > MAX_PHYS_ITEM_ID_LEN))
+ attrs->switch_id.id_len = MAX_PHYS_ITEM_ID_LEN;
} else {
- attrs->switch_port = false;
+ devlink_port->switch_port = false;
}
return 0;
}
@@ -7415,33 +7693,18 @@ static int __devlink_port_attrs_set(struct devlink_port *devlink_port,
* devlink_port_attrs_set - Set port attributes
*
* @devlink_port: devlink port
- * @flavour: flavour of the port
- * @port_number: number of the port that is facing user, for example
- * the front panel port number
- * @split: indicates if this is split port
- * @split_subport_number: if the port is split, this is the number
- * of subport.
- * @switch_id: if the port is part of switch, this is buffer with ID,
- * otwerwise this is NULL
- * @switch_id_len: length of the switch_id buffer
+ * @attrs: devlink port attrs
*/
void devlink_port_attrs_set(struct devlink_port *devlink_port,
- enum devlink_port_flavour flavour,
- u32 port_number, bool split,
- u32 split_subport_number,
- const unsigned char *switch_id,
- unsigned char switch_id_len)
+ struct devlink_port_attrs *attrs)
{
- struct devlink_port_attrs *attrs = &devlink_port->attrs;
int ret;
- ret = __devlink_port_attrs_set(devlink_port, flavour,
- switch_id, switch_id_len);
+ devlink_port->attrs = *attrs;
+ ret = __devlink_port_attrs_set(devlink_port, attrs->flavour);
if (ret)
return;
- attrs->split = split;
- attrs->phys.port_number = port_number;
- attrs->phys.split_subport_number = split_subport_number;
+ WARN_ON(attrs->splittable && attrs->split);
}
EXPORT_SYMBOL_GPL(devlink_port_attrs_set);
@@ -7450,20 +7713,14 @@ EXPORT_SYMBOL_GPL(devlink_port_attrs_set);
*
* @devlink_port: devlink port
* @pf: associated PF for the devlink port instance
- * @switch_id: if the port is part of switch, this is buffer with ID,
- * otherwise this is NULL
- * @switch_id_len: length of the switch_id buffer
*/
-void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port,
- const unsigned char *switch_id,
- unsigned char switch_id_len, u16 pf)
+void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u16 pf)
{
struct devlink_port_attrs *attrs = &devlink_port->attrs;
int ret;
ret = __devlink_port_attrs_set(devlink_port,
- DEVLINK_PORT_FLAVOUR_PCI_PF,
- switch_id, switch_id_len);
+ DEVLINK_PORT_FLAVOUR_PCI_PF);
if (ret)
return;
@@ -7477,21 +7734,15 @@ EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_pf_set);
* @devlink_port: devlink port
* @pf: associated PF for the devlink port instance
* @vf: associated VF of a PF for the devlink port instance
- * @switch_id: if the port is part of switch, this is buffer with ID,
- * otherwise this is NULL
- * @switch_id_len: length of the switch_id buffer
*/
void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port,
- const unsigned char *switch_id,
- unsigned char switch_id_len,
u16 pf, u16 vf)
{
struct devlink_port_attrs *attrs = &devlink_port->attrs;
int ret;
ret = __devlink_port_attrs_set(devlink_port,
- DEVLINK_PORT_FLAVOUR_PCI_VF,
- switch_id, switch_id_len);
+ DEVLINK_PORT_FLAVOUR_PCI_VF);
if (ret)
return;
attrs->pci_vf.pf = pf;
@@ -7505,7 +7756,7 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
struct devlink_port_attrs *attrs = &devlink_port->attrs;
int n = 0;
- if (!attrs->set)
+ if (!devlink_port->attrs_set)
return -EOPNOTSUPP;
switch (attrs->flavour) {
@@ -8551,6 +8802,7 @@ static const struct devlink_trap devlink_trap_generic[] = {
DEVLINK_TRAP(PTP_GENERAL, CONTROL),
DEVLINK_TRAP(FLOW_ACTION_SAMPLE, CONTROL),
DEVLINK_TRAP(FLOW_ACTION_TRAP, CONTROL),
+ DEVLINK_TRAP(EARLY_DROP, DROP),
};
#define DEVLINK_TRAP_GROUP(_id) \
@@ -8800,7 +9052,8 @@ static void devlink_trap_disable(struct devlink *devlink,
if (WARN_ON_ONCE(!trap_item))
return;
- devlink->ops->trap_action_set(devlink, trap, DEVLINK_TRAP_ACTION_DROP);
+ devlink->ops->trap_action_set(devlink, trap, DEVLINK_TRAP_ACTION_DROP,
+ NULL);
trap_item->action = DEVLINK_TRAP_ACTION_DROP;
}
@@ -9341,7 +9594,7 @@ int devlink_compat_switch_id_get(struct net_device *dev,
* any devlink lock as only permanent values are accessed.
*/
devlink_port = netdev_to_devlink_port(dev);
- if (!devlink_port || !devlink_port->attrs.switch_port)
+ if (!devlink_port || !devlink_port->switch_port)
return -EOPNOTSUPP;
memcpy(ppid, &devlink_port->attrs.switch_id, sizeof(*ppid));
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index bd7eba9066f8..51678a528f85 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -14,6 +14,20 @@
#include <net/sock.h>
#include <net/fib_rules.h>
#include <net/ip_tunnels.h>
+#include <linux/indirect_call_wrapper.h>
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+#define INDIRECT_CALL_MT(f, f2, f1, ...) \
+ INDIRECT_CALL_INET(f, f2, f1, __VA_ARGS__)
+#else
+#define INDIRECT_CALL_MT(f, f2, f1, ...) INDIRECT_CALL_1(f, f2, __VA_ARGS__)
+#endif
+#elif defined(CONFIG_IP_MULTIPLE_TABLES)
+#define INDIRECT_CALL_MT(f, f2, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__)
+#else
+#define INDIRECT_CALL_MT(f, f2, f1, ...) f(__VA_ARGS__)
+#endif
static const struct fib_kuid_range fib_kuid_range_unset = {
KUIDT_INIT(0),
@@ -267,7 +281,10 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
uid_gt(fl->flowi_uid, rule->uid_range.end))
goto out;
- ret = ops->match(rule, fl, flags);
+ ret = INDIRECT_CALL_MT(ops->match,
+ fib6_rule_match,
+ fib4_rule_match,
+ rule, fl, flags);
out:
return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
}
@@ -298,9 +315,15 @@ jumped:
} else if (rule->action == FR_ACT_NOP)
continue;
else
- err = ops->action(rule, fl, flags, arg);
-
- if (!err && ops->suppress && ops->suppress(rule, arg))
+ err = INDIRECT_CALL_MT(ops->action,
+ fib6_rule_action,
+ fib4_rule_action,
+ rule, fl, flags, arg);
+
+ if (!err && ops->suppress && INDIRECT_CALL_MT(ops->suppress,
+ fib6_rule_suppress,
+ fib4_rule_suppress,
+ rule, arg))
continue;
if (err != -EAGAIN) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 82e1b5b06167..7124f0fe6974 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -47,6 +47,7 @@
#include <linux/seccomp.h>
#include <linux/if_vlan.h>
#include <linux/bpf.h>
+#include <linux/btf.h>
#include <net/sch_generic.h>
#include <net/cls_cgroup.h>
#include <net/dst_metadata.h>
@@ -73,6 +74,31 @@
#include <net/lwtunnel.h>
#include <net/ipv6_stubs.h>
#include <net/bpf_sk_storage.h>
+#include <net/transp_v6.h>
+#include <linux/btf_ids.h>
+
+int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len)
+{
+ if (in_compat_syscall()) {
+ struct compat_sock_fprog f32;
+
+ if (len != sizeof(f32))
+ return -EINVAL;
+ if (copy_from_sockptr(&f32, src, sizeof(f32)))
+ return -EFAULT;
+ memset(dst, 0, sizeof(*dst));
+ dst->len = f32.len;
+ dst->filter = compat_ptr(f32.filter);
+ } else {
+ if (len != sizeof(*dst))
+ return -EINVAL;
+ if (copy_from_sockptr(dst, src, sizeof(*dst)))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user);
/**
* sk_filter_trim_cap - run a packet through a socket filter
@@ -3777,7 +3803,9 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
-static int bpf_skb_output_btf_ids[5];
+BTF_ID_LIST(bpf_skb_output_btf_ids)
+BTF_ID(struct, sk_buff)
+
const struct bpf_func_proto bpf_skb_output_proto = {
.func = bpf_skb_event_output,
.gpl_only = true,
@@ -4171,7 +4199,9 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
-static int bpf_xdp_output_btf_ids[5];
+BTF_ID_LIST(bpf_xdp_output_btf_ids)
+BTF_ID(struct, xdp_buff)
+
const struct bpf_func_proto bpf_xdp_output_proto = {
.func = bpf_xdp_event_output,
.gpl_only = true,
@@ -4289,10 +4319,10 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
char *optval, int optlen, u32 flags)
{
char devname[IFNAMSIZ];
+ int val, valbool;
struct net *net;
int ifindex;
int ret = 0;
- int val;
if (!sk_fullsock(sk))
return -EINVAL;
@@ -4303,6 +4333,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
if (optlen != sizeof(int) && optname != SO_BINDTODEVICE)
return -EINVAL;
val = *((int *)optval);
+ valbool = val ? 1 : 0;
/* Only some socketops are supported */
switch (optname) {
@@ -4361,6 +4392,11 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
}
ret = sock_bindtoindex(sk, ifindex, false);
break;
+ case SO_KEEPALIVE:
+ if (sk->sk_prot->keepalive)
+ sk->sk_prot->keepalive(sk, valbool);
+ sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
+ break;
default:
ret = -EINVAL;
}
@@ -4421,6 +4457,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
ret = tcp_set_congestion_control(sk, name, false,
reinit, true);
} else {
+ struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
if (optlen != sizeof(int))
@@ -4449,6 +4486,33 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
else
tp->save_syn = val;
break;
+ case TCP_KEEPIDLE:
+ ret = tcp_sock_set_keepidle_locked(sk, val);
+ break;
+ case TCP_KEEPINTVL:
+ if (val < 1 || val > MAX_TCP_KEEPINTVL)
+ ret = -EINVAL;
+ else
+ tp->keepalive_intvl = val * HZ;
+ break;
+ case TCP_KEEPCNT:
+ if (val < 1 || val > MAX_TCP_KEEPCNT)
+ ret = -EINVAL;
+ else
+ tp->keepalive_probes = val;
+ break;
+ case TCP_SYNCNT:
+ if (val < 1 || val > MAX_TCP_SYNCNT)
+ ret = -EINVAL;
+ else
+ icsk->icsk_syn_retries = val;
+ break;
+ case TCP_USER_TIMEOUT:
+ if (val < 0)
+ ret = -EINVAL;
+ else
+ icsk->icsk_user_timeout = val;
+ break;
default:
ret = -EINVAL;
}
@@ -6123,6 +6187,7 @@ bool bpf_helper_changes_pkt_data(void *func)
}
const struct bpf_func_proto bpf_event_output_data_proto __weak;
+const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto __weak;
static const struct bpf_func_proto *
sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
@@ -6155,6 +6220,8 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_cgroup_classid:
return &bpf_get_cgroup_classid_curr_proto;
#endif
+ case BPF_FUNC_sk_storage_get:
+ return &bpf_sk_storage_get_cg_sock_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -6858,6 +6925,7 @@ static bool __sock_filter_check_attach_type(int off,
case offsetof(struct bpf_sock, priority):
switch (attach_type) {
case BPF_CGROUP_INET_SOCK_CREATE:
+ case BPF_CGROUP_INET_SOCK_RELEASE:
goto full_access;
default:
return false;
@@ -9187,6 +9255,189 @@ const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
const struct bpf_prog_ops sk_reuseport_prog_ops = {
};
+
+DEFINE_STATIC_KEY_FALSE(bpf_sk_lookup_enabled);
+EXPORT_SYMBOL(bpf_sk_lookup_enabled);
+
+BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx,
+ struct sock *, sk, u64, flags)
+{
+ if (unlikely(flags & ~(BPF_SK_LOOKUP_F_REPLACE |
+ BPF_SK_LOOKUP_F_NO_REUSEPORT)))
+ return -EINVAL;
+ if (unlikely(sk && sk_is_refcounted(sk)))
+ return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */
+ if (unlikely(sk && sk->sk_state == TCP_ESTABLISHED))
+ return -ESOCKTNOSUPPORT; /* reject connected sockets */
+
+ /* Check if socket is suitable for packet L3/L4 protocol */
+ if (sk && sk->sk_protocol != ctx->protocol)
+ return -EPROTOTYPE;
+ if (sk && sk->sk_family != ctx->family &&
+ (sk->sk_family == AF_INET || ipv6_only_sock(sk)))
+ return -EAFNOSUPPORT;
+
+ if (ctx->selected_sk && !(flags & BPF_SK_LOOKUP_F_REPLACE))
+ return -EEXIST;
+
+ /* Select socket as lookup result */
+ ctx->selected_sk = sk;
+ ctx->no_reuseport = flags & BPF_SK_LOOKUP_F_NO_REUSEPORT;
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_sk_lookup_assign_proto = {
+ .func = bpf_sk_lookup_assign,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_SOCKET_OR_NULL,
+ .arg3_type = ARG_ANYTHING,
+};
+
+static const struct bpf_func_proto *
+sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_perf_event_output:
+ return &bpf_event_output_data_proto;
+ case BPF_FUNC_sk_assign:
+ return &bpf_sk_lookup_assign_proto;
+ case BPF_FUNC_sk_release:
+ return &bpf_sk_release_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
+
+static bool sk_lookup_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ if (off < 0 || off >= sizeof(struct bpf_sk_lookup))
+ return false;
+ if (off % size != 0)
+ return false;
+ if (type != BPF_READ)
+ return false;
+
+ switch (off) {
+ case offsetof(struct bpf_sk_lookup, sk):
+ info->reg_type = PTR_TO_SOCKET_OR_NULL;
+ return size == sizeof(__u64);
+
+ case bpf_ctx_range(struct bpf_sk_lookup, family):
+ case bpf_ctx_range(struct bpf_sk_lookup, protocol):
+ case bpf_ctx_range(struct bpf_sk_lookup, remote_ip4):
+ case bpf_ctx_range(struct bpf_sk_lookup, local_ip4):
+ case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]):
+ case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
+ case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
+ case bpf_ctx_range(struct bpf_sk_lookup, local_port):
+ bpf_ctx_record_field_size(info, sizeof(__u32));
+ return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32));
+
+ default:
+ return false;
+ }
+}
+
+static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog,
+ u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (si->off) {
+ case offsetof(struct bpf_sk_lookup, sk):
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sk_lookup_kern, selected_sk));
+ break;
+
+ case offsetof(struct bpf_sk_lookup, family):
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ family, 2, target_size));
+ break;
+
+ case offsetof(struct bpf_sk_lookup, protocol):
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ protocol, 2, target_size));
+ break;
+
+ case offsetof(struct bpf_sk_lookup, remote_ip4):
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ v4.saddr, 4, target_size));
+ break;
+
+ case offsetof(struct bpf_sk_lookup, local_ip4):
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ v4.daddr, 4, target_size));
+ break;
+
+ case bpf_ctx_range_till(struct bpf_sk_lookup,
+ remote_ip6[0], remote_ip6[3]): {
+#if IS_ENABLED(CONFIG_IPV6)
+ int off = si->off;
+
+ off -= offsetof(struct bpf_sk_lookup, remote_ip6[0]);
+ off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sk_lookup_kern, v6.saddr));
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
+#else
+ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+ break;
+ }
+ case bpf_ctx_range_till(struct bpf_sk_lookup,
+ local_ip6[0], local_ip6[3]): {
+#if IS_ENABLED(CONFIG_IPV6)
+ int off = si->off;
+
+ off -= offsetof(struct bpf_sk_lookup, local_ip6[0]);
+ off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sk_lookup_kern, v6.daddr));
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
+#else
+ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+ break;
+ }
+ case offsetof(struct bpf_sk_lookup, remote_port):
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ sport, 2, target_size));
+ break;
+
+ case offsetof(struct bpf_sk_lookup, local_port):
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ dport, 2, target_size));
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
+const struct bpf_prog_ops sk_lookup_prog_ops = {
+};
+
+const struct bpf_verifier_ops sk_lookup_verifier_ops = {
+ .get_func_proto = sk_lookup_func_proto,
+ .is_valid_access = sk_lookup_is_valid_access,
+ .convert_ctx_access = sk_lookup_convert_ctx_access,
+};
+
#endif /* CONFIG_INET */
DEFINE_BPF_DISPATCHER(xdp)
@@ -9195,3 +9446,132 @@ void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
{
bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
}
+
+#ifdef CONFIG_DEBUG_INFO_BTF
+BTF_ID_LIST_GLOBAL(btf_sock_ids)
+#define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type)
+BTF_SOCK_TYPE_xxx
+#undef BTF_SOCK_TYPE
+#else
+u32 btf_sock_ids[MAX_BTF_SOCK_TYPE];
+#endif
+
+static bool check_arg_btf_id(u32 btf_id, u32 arg)
+{
+ int i;
+
+ /* only one argument, no need to check arg */
+ for (i = 0; i < MAX_BTF_SOCK_TYPE; i++)
+ if (btf_sock_ids[i] == btf_id)
+ return true;
+ return false;
+}
+
+BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk)
+{
+ /* tcp6_sock type is not generated in dwarf and hence btf,
+ * trigger an explicit type generation here.
+ */
+ BTF_TYPE_EMIT(struct tcp6_sock);
+ if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP &&
+ sk->sk_family == AF_INET6)
+ return (unsigned long)sk;
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
+ .func = bpf_skc_to_tcp6_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .check_btf_id = check_arg_btf_id,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP6],
+};
+
+BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk)
+{
+ if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
+ return (unsigned long)sk;
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = {
+ .func = bpf_skc_to_tcp_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .check_btf_id = check_arg_btf_id,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
+};
+
+BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk)
+{
+#ifdef CONFIG_INET
+ if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT)
+ return (unsigned long)sk;
+#endif
+
+#if IS_BUILTIN(CONFIG_IPV6)
+ if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT)
+ return (unsigned long)sk;
+#endif
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = {
+ .func = bpf_skc_to_tcp_timewait_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .check_btf_id = check_arg_btf_id,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_TW],
+};
+
+BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk)
+{
+#ifdef CONFIG_INET
+ if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV)
+ return (unsigned long)sk;
+#endif
+
+#if IS_BUILTIN(CONFIG_IPV6)
+ if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV)
+ return (unsigned long)sk;
+#endif
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = {
+ .func = bpf_skc_to_tcp_request_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .check_btf_id = check_arg_btf_id,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ],
+};
+
+BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk)
+{
+ /* udp6_sock type is not generated in dwarf and hence btf,
+ * trigger an explicit type generation here.
+ */
+ BTF_TYPE_EMIT(struct udp6_sock);
+ if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP &&
+ sk->sk_type == SOCK_DGRAM && sk->sk_family == AF_INET6)
+ return (unsigned long)sk;
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
+ .func = bpf_skc_to_udp6_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .check_btf_id = check_arg_btf_id,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
+};
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 142a8824f0a8..29806eb765cf 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -383,6 +383,23 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
}
EXPORT_SYMBOL(skb_flow_dissect_tunnel_info);
+void skb_flow_dissect_hash(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container)
+{
+ struct flow_dissector_key_hash *key;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_HASH))
+ return;
+
+ key = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_HASH,
+ target_container);
+
+ key->hash = skb_get_hash_raw(skb);
+}
+EXPORT_SYMBOL(skb_flow_dissect_hash);
+
static enum flow_dissect_ret
__skb_flow_dissect_mpls(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 2076219b8ba5..d4474c812b64 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -430,7 +430,7 @@ EXPORT_SYMBOL(flow_indr_dev_unregister);
static void flow_block_indr_init(struct flow_block_cb *flow_block,
struct flow_block_offload *bo,
- struct net_device *dev, void *data,
+ struct net_device *dev, struct Qdisc *sch, void *data,
void *cb_priv,
void (*cleanup)(struct flow_block_cb *block_cb))
{
@@ -438,6 +438,7 @@ static void flow_block_indr_init(struct flow_block_cb *flow_block,
flow_block->indr.data = data;
flow_block->indr.cb_priv = cb_priv;
flow_block->indr.dev = dev;
+ flow_block->indr.sch = sch;
flow_block->indr.cleanup = cleanup;
}
@@ -445,7 +446,8 @@ struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb,
void *cb_ident, void *cb_priv,
void (*release)(void *cb_priv),
struct flow_block_offload *bo,
- struct net_device *dev, void *data,
+ struct net_device *dev,
+ struct Qdisc *sch, void *data,
void *indr_cb_priv,
void (*cleanup)(struct flow_block_cb *block_cb))
{
@@ -455,7 +457,7 @@ struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb,
if (IS_ERR(block_cb))
goto out;
- flow_block_indr_init(block_cb, bo, dev, data, indr_cb_priv, cleanup);
+ flow_block_indr_init(block_cb, bo, dev, sch, data, indr_cb_priv, cleanup);
list_add(&block_cb->indr.list, &flow_block_indr_list);
out:
@@ -463,7 +465,7 @@ out:
}
EXPORT_SYMBOL(flow_indr_block_cb_alloc);
-int flow_indr_dev_setup_offload(struct net_device *dev,
+int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch,
enum tc_setup_type type, void *data,
struct flow_block_offload *bo,
void (*cleanup)(struct flow_block_cb *block_cb))
@@ -472,7 +474,7 @@ int flow_indr_dev_setup_offload(struct net_device *dev,
mutex_lock(&flow_indr_block_lock);
list_for_each_entry(this, &flow_block_indr_dev_list, list)
- this->cb(dev, this->cb_priv, type, bo, data, cleanup);
+ this->cb(dev, sch, this->cb_priv, type, bo, data, cleanup);
mutex_unlock(&flow_indr_block_lock);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index ef6b5a8f629c..8e39e28b0a8d 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1783,6 +1783,7 @@ const struct nla_policy nda_policy[NDA_MAX+1] = {
[NDA_MASTER] = { .type = NLA_U32 },
[NDA_PROTOCOL] = { .type = NLA_U8 },
[NDA_NH_ID] = { .type = NLA_U32 },
+ [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED },
};
static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 85a4b0101f76..68e0682450c6 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1000,6 +1000,16 @@ static size_t rtnl_prop_list_size(const struct net_device *dev)
return size;
}
+static size_t rtnl_proto_down_size(const struct net_device *dev)
+{
+ size_t size = nla_total_size(1);
+
+ if (dev->proto_down_reason)
+ size += nla_total_size(0) + nla_total_size(4);
+
+ return size;
+}
+
static noinline size_t if_nlmsg_size(const struct net_device *dev,
u32 ext_filter_mask)
{
@@ -1041,7 +1051,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(4) /* IFLA_EVENT */
+ nla_total_size(4) /* IFLA_NEW_NETNSID */
+ nla_total_size(4) /* IFLA_NEW_IFINDEX */
- + nla_total_size(1) /* IFLA_PROTO_DOWN */
+ + rtnl_proto_down_size(dev) /* proto down */
+ nla_total_size(4) /* IFLA_TARGET_NETNSID */
+ nla_total_size(4) /* IFLA_CARRIER_UP_COUNT */
+ nla_total_size(4) /* IFLA_CARRIER_DOWN_COUNT */
@@ -1416,13 +1426,12 @@ static u32 rtnl_xdp_prog_skb(struct net_device *dev)
static u32 rtnl_xdp_prog_drv(struct net_device *dev)
{
- return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, XDP_QUERY_PROG);
+ return dev_xdp_prog_id(dev, XDP_MODE_DRV);
}
static u32 rtnl_xdp_prog_hw(struct net_device *dev)
{
- return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf,
- XDP_QUERY_PROG_HW);
+ return dev_xdp_prog_id(dev, XDP_MODE_HW);
}
static int rtnl_xdp_report_one(struct sk_buff *skb, struct net_device *dev,
@@ -1658,6 +1667,35 @@ nest_cancel:
return ret;
}
+static int rtnl_fill_proto_down(struct sk_buff *skb,
+ const struct net_device *dev)
+{
+ struct nlattr *pr;
+ u32 preason;
+
+ if (nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
+ goto nla_put_failure;
+
+ preason = dev->proto_down_reason;
+ if (!preason)
+ return 0;
+
+ pr = nla_nest_start(skb, IFLA_PROTO_DOWN_REASON);
+ if (!pr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, IFLA_PROTO_DOWN_REASON_VALUE, preason)) {
+ nla_nest_cancel(skb, pr);
+ goto nla_put_failure;
+ }
+
+ nla_nest_end(skb, pr);
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
static int rtnl_fill_ifinfo(struct sk_buff *skb,
struct net_device *dev, struct net *src_net,
int type, u32 pid, u32 seq, u32 change,
@@ -1708,13 +1746,15 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
nla_put_u32(skb, IFLA_CARRIER_CHANGES,
atomic_read(&dev->carrier_up_count) +
atomic_read(&dev->carrier_down_count)) ||
- nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down) ||
nla_put_u32(skb, IFLA_CARRIER_UP_COUNT,
atomic_read(&dev->carrier_up_count)) ||
nla_put_u32(skb, IFLA_CARRIER_DOWN_COUNT,
atomic_read(&dev->carrier_down_count)))
goto nla_put_failure;
+ if (rtnl_fill_proto_down(skb, dev))
+ goto nla_put_failure;
+
if (event != IFLA_EVENT_NONE) {
if (nla_put_u32(skb, IFLA_EVENT, event))
goto nla_put_failure;
@@ -1834,6 +1874,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_ALT_IFNAME] = { .type = NLA_STRING,
.len = ALTIFNAMSIZ - 1 },
[IFLA_PERM_ADDRESS] = { .type = NLA_REJECT },
+ [IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -2483,6 +2524,67 @@ static int do_set_master(struct net_device *dev, int ifindex,
return 0;
}
+static const struct nla_policy ifla_proto_down_reason_policy[IFLA_PROTO_DOWN_REASON_VALUE + 1] = {
+ [IFLA_PROTO_DOWN_REASON_MASK] = { .type = NLA_U32 },
+ [IFLA_PROTO_DOWN_REASON_VALUE] = { .type = NLA_U32 },
+};
+
+static int do_set_proto_down(struct net_device *dev,
+ struct nlattr *nl_proto_down,
+ struct nlattr *nl_proto_down_reason,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *pdreason[IFLA_PROTO_DOWN_REASON_MAX + 1];
+ const struct net_device_ops *ops = dev->netdev_ops;
+ unsigned long mask = 0;
+ u32 value;
+ bool proto_down;
+ int err;
+
+ if (!ops->ndo_change_proto_down) {
+ NL_SET_ERR_MSG(extack, "Protodown not supported by device");
+ return -EOPNOTSUPP;
+ }
+
+ if (nl_proto_down_reason) {
+ err = nla_parse_nested_deprecated(pdreason,
+ IFLA_PROTO_DOWN_REASON_MAX,
+ nl_proto_down_reason,
+ ifla_proto_down_reason_policy,
+ NULL);
+ if (err < 0)
+ return err;
+
+ if (!pdreason[IFLA_PROTO_DOWN_REASON_VALUE]) {
+ NL_SET_ERR_MSG(extack, "Invalid protodown reason value");
+ return -EINVAL;
+ }
+
+ value = nla_get_u32(pdreason[IFLA_PROTO_DOWN_REASON_VALUE]);
+
+ if (pdreason[IFLA_PROTO_DOWN_REASON_MASK])
+ mask = nla_get_u32(pdreason[IFLA_PROTO_DOWN_REASON_MASK]);
+
+ dev_change_proto_down_reason(dev, mask, value);
+ }
+
+ if (nl_proto_down) {
+ proto_down = nla_get_u8(nl_proto_down);
+
+ /* Dont turn off protodown if there are active reasons */
+ if (!proto_down && dev->proto_down_reason) {
+ NL_SET_ERR_MSG(extack, "Cannot clear protodown, active reasons");
+ return -EBUSY;
+ }
+ err = dev_change_proto_down(dev,
+ proto_down);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
#define DO_SETLINK_MODIFIED 0x01
/* notify flag means notify + modified. */
#define DO_SETLINK_NOTIFY 0x03
@@ -2771,9 +2873,9 @@ static int do_setlink(const struct sk_buff *skb,
}
err = 0;
- if (tb[IFLA_PROTO_DOWN]) {
- err = dev_change_proto_down(dev,
- nla_get_u8(tb[IFLA_PROTO_DOWN]));
+ if (tb[IFLA_PROTO_DOWN] || tb[IFLA_PROTO_DOWN_REASON]) {
+ err = do_set_proto_down(dev, tb[IFLA_PROTO_DOWN],
+ tb[IFLA_PROTO_DOWN_REASON], extack);
if (err)
goto errout;
status |= DO_SETLINK_NOTIFY;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b8afefe6f6b6..2828f6d5ba89 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3758,7 +3758,6 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
int err = -ENOMEM;
int i = 0;
int pos;
- int dummy;
if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) &&
(skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) {
@@ -3780,7 +3779,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
}
__skb_push(head_skb, doffset);
- proto = skb_network_protocol(head_skb, &dummy);
+ proto = skb_network_protocol(head_skb, NULL);
if (unlikely(!proto))
return ERR_PTR(-EINVAL);
@@ -4413,7 +4412,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
* at the moment even if they are anonymous).
*/
if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
- __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
+ !__pskb_pull_tail(skb, __skb_pagelen(skb)))
return -ENOMEM;
/* Easy case. Most of packets will go this way. */
@@ -4692,7 +4691,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
sk->sk_protocol == IPPROTO_TCP &&
sk->sk_type == SOCK_STREAM) {
- skb = tcp_get_timestamping_opt_stats(sk);
+ skb = tcp_get_timestamping_opt_stats(sk, orig_skb);
opt_stats = true;
} else
#endif
diff --git a/net/core/sock.c b/net/core/sock.c
index 2e5b7870e5d3..2c5dd1397775 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -113,6 +113,7 @@
#include <linux/static_key.h>
#include <linux/memcontrol.h>
#include <linux/prefetch.h>
+#include <linux/compat.h>
#include <linux/uaccess.h>
@@ -360,7 +361,8 @@ static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
return sizeof(tv);
}
-static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool old_timeval)
+static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
+ bool old_timeval)
{
struct __kernel_sock_timeval tv;
@@ -370,7 +372,7 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool
if (optlen < sizeof(tv32))
return -EINVAL;
- if (copy_from_user(&tv32, optval, sizeof(tv32)))
+ if (copy_from_sockptr(&tv32, optval, sizeof(tv32)))
return -EFAULT;
tv.tv_sec = tv32.tv_sec;
tv.tv_usec = tv32.tv_usec;
@@ -379,14 +381,14 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool
if (optlen < sizeof(old_tv))
return -EINVAL;
- if (copy_from_user(&old_tv, optval, sizeof(old_tv)))
+ if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv)))
return -EFAULT;
tv.tv_sec = old_tv.tv_sec;
tv.tv_usec = old_tv.tv_usec;
} else {
if (optlen < sizeof(tv))
return -EINVAL;
- if (copy_from_user(&tv, optval, sizeof(tv)))
+ if (copy_from_sockptr(&tv, optval, sizeof(tv)))
return -EFAULT;
}
if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
@@ -608,8 +610,7 @@ int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk)
}
EXPORT_SYMBOL(sock_bindtoindex);
-static int sock_setbindtodevice(struct sock *sk, char __user *optval,
- int optlen)
+static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)
{
int ret = -ENOPROTOOPT;
#ifdef CONFIG_NETDEVICES
@@ -631,7 +632,7 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval,
memset(devname, 0, sizeof(devname));
ret = -EFAULT;
- if (copy_from_user(devname, optval, optlen))
+ if (copy_from_sockptr(devname, optval, optlen))
goto out;
index = 0;
@@ -695,15 +696,6 @@ out:
return ret;
}
-static inline void sock_valbool_flag(struct sock *sk, enum sock_flags bit,
- int valbool)
-{
- if (valbool)
- sock_set_flag(sk, bit);
- else
- sock_reset_flag(sk, bit);
-}
-
bool sk_mc_loop(struct sock *sk)
{
if (dev_recursion_level())
@@ -834,7 +826,7 @@ EXPORT_SYMBOL(sock_set_rcvbuf);
*/
int sock_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock_txtime sk_txtime;
struct sock *sk = sock->sk;
@@ -853,7 +845,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *)optval))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
valbool = val ? 1 : 0;
@@ -966,7 +958,7 @@ set_sndbuf:
ret = -EINVAL; /* 1003.1g */
break;
}
- if (copy_from_user(&ling, optval, sizeof(ling))) {
+ if (copy_from_sockptr(&ling, optval, sizeof(ling))) {
ret = -EFAULT;
break;
}
@@ -1060,60 +1052,52 @@ set_sndbuf:
case SO_RCVTIMEO_OLD:
case SO_RCVTIMEO_NEW:
- ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen, optname == SO_RCVTIMEO_OLD);
+ ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
+ optlen, optname == SO_RCVTIMEO_OLD);
break;
case SO_SNDTIMEO_OLD:
case SO_SNDTIMEO_NEW:
- ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen, optname == SO_SNDTIMEO_OLD);
+ ret = sock_set_timeout(&sk->sk_sndtimeo, optval,
+ optlen, optname == SO_SNDTIMEO_OLD);
break;
- case SO_ATTACH_FILTER:
- ret = -EINVAL;
- if (optlen == sizeof(struct sock_fprog)) {
- struct sock_fprog fprog;
-
- ret = -EFAULT;
- if (copy_from_user(&fprog, optval, sizeof(fprog)))
- break;
+ case SO_ATTACH_FILTER: {
+ struct sock_fprog fprog;
+ ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
+ if (!ret)
ret = sk_attach_filter(&fprog, sk);
- }
break;
-
+ }
case SO_ATTACH_BPF:
ret = -EINVAL;
if (optlen == sizeof(u32)) {
u32 ufd;
ret = -EFAULT;
- if (copy_from_user(&ufd, optval, sizeof(ufd)))
+ if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
break;
ret = sk_attach_bpf(ufd, sk);
}
break;
- case SO_ATTACH_REUSEPORT_CBPF:
- ret = -EINVAL;
- if (optlen == sizeof(struct sock_fprog)) {
- struct sock_fprog fprog;
-
- ret = -EFAULT;
- if (copy_from_user(&fprog, optval, sizeof(fprog)))
- break;
+ case SO_ATTACH_REUSEPORT_CBPF: {
+ struct sock_fprog fprog;
+ ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
+ if (!ret)
ret = sk_reuseport_attach_filter(&fprog, sk);
- }
break;
-
+ }
case SO_ATTACH_REUSEPORT_EBPF:
ret = -EINVAL;
if (optlen == sizeof(u32)) {
u32 ufd;
ret = -EFAULT;
- if (copy_from_user(&ufd, optval, sizeof(ufd)))
+ if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
break;
ret = sk_reuseport_attach_bpf(ufd, sk);
@@ -1193,7 +1177,7 @@ set_sndbuf:
if (sizeof(ulval) != sizeof(val) &&
optlen >= sizeof(ulval) &&
- get_user(ulval, (unsigned long __user *)optval)) {
+ copy_from_sockptr(&ulval, optval, sizeof(ulval))) {
ret = -EFAULT;
break;
}
@@ -1236,7 +1220,7 @@ set_sndbuf:
if (optlen != sizeof(struct sock_txtime)) {
ret = -EINVAL;
break;
- } else if (copy_from_user(&sk_txtime, optval,
+ } else if (copy_from_sockptr(&sk_txtime, optval,
sizeof(struct sock_txtime))) {
ret = -EFAULT;
break;
@@ -2802,20 +2786,6 @@ int sock_no_shutdown(struct socket *sock, int how)
}
EXPORT_SYMBOL(sock_no_shutdown);
-int sock_no_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
-{
- return -EOPNOTSUPP;
-}
-EXPORT_SYMBOL(sock_no_setsockopt);
-
-int sock_no_getsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int __user *optlen)
-{
- return -EOPNOTSUPP;
-}
-EXPORT_SYMBOL(sock_no_getsockopt);
-
int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
{
return -EOPNOTSUPP;
@@ -3222,20 +3192,6 @@ int sock_common_getsockopt(struct socket *sock, int level, int optname,
}
EXPORT_SYMBOL(sock_common_getsockopt);
-#ifdef CONFIG_COMPAT
-int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int __user *optlen)
-{
- struct sock *sk = sock->sk;
-
- if (sk->sk_prot->compat_getsockopt != NULL)
- return sk->sk_prot->compat_getsockopt(sk, level, optname,
- optval, optlen);
- return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
-}
-EXPORT_SYMBOL(compat_sock_common_getsockopt);
-#endif
-
int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags)
{
@@ -3255,7 +3211,7 @@ EXPORT_SYMBOL(sock_common_recvmsg);
* Set socket options on an inet socket.
*/
int sock_common_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
@@ -3263,20 +3219,6 @@ int sock_common_setsockopt(struct socket *sock, int level, int optname,
}
EXPORT_SYMBOL(sock_common_setsockopt);
-#ifdef CONFIG_COMPAT
-int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
-{
- struct sock *sk = sock->sk;
-
- if (sk->sk_prot->compat_setsockopt != NULL)
- return sk->sk_prot->compat_setsockopt(sk, level, optname,
- optval, optlen);
- return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
-}
-EXPORT_SYMBOL(compat_sock_common_setsockopt);
-#endif
-
void sk_common_release(struct sock *sk)
{
if (sk->sk_prot->destroy)
@@ -3575,6 +3517,7 @@ int sock_load_diag_module(int family, int protocol)
#ifdef CONFIG_INET
if (family == AF_INET &&
protocol != IPPROTO_RAW &&
+ protocol < MAX_INET_PROTOS &&
!rcu_access_pointer(inet_protos[protocol]))
return -ENOENT;
#endif
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 0971f17e8e54..119f52a99dc1 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -681,6 +681,7 @@ const struct bpf_func_proto bpf_msg_redirect_map_proto = {
.arg4_type = ARG_ANYTHING,
};
+static int sock_map_btf_id;
const struct bpf_map_ops sock_map_ops = {
.map_alloc = sock_map_alloc,
.map_free = sock_map_free,
@@ -691,9 +692,11 @@ const struct bpf_map_ops sock_map_ops = {
.map_lookup_elem = sock_map_lookup,
.map_release_uref = sock_map_release_progs,
.map_check_btf = map_check_no_btf,
+ .map_btf_name = "bpf_stab",
+ .map_btf_id = &sock_map_btf_id,
};
-struct bpf_htab_elem {
+struct bpf_shtab_elem {
struct rcu_head rcu;
u32 hash;
struct sock *sk;
@@ -701,14 +704,14 @@ struct bpf_htab_elem {
u8 key[];
};
-struct bpf_htab_bucket {
+struct bpf_shtab_bucket {
struct hlist_head head;
raw_spinlock_t lock;
};
-struct bpf_htab {
+struct bpf_shtab {
struct bpf_map map;
- struct bpf_htab_bucket *buckets;
+ struct bpf_shtab_bucket *buckets;
u32 buckets_num;
u32 elem_size;
struct sk_psock_progs progs;
@@ -720,17 +723,17 @@ static inline u32 sock_hash_bucket_hash(const void *key, u32 len)
return jhash(key, len, 0);
}
-static struct bpf_htab_bucket *sock_hash_select_bucket(struct bpf_htab *htab,
- u32 hash)
+static struct bpf_shtab_bucket *sock_hash_select_bucket(struct bpf_shtab *htab,
+ u32 hash)
{
return &htab->buckets[hash & (htab->buckets_num - 1)];
}
-static struct bpf_htab_elem *
+static struct bpf_shtab_elem *
sock_hash_lookup_elem_raw(struct hlist_head *head, u32 hash, void *key,
u32 key_size)
{
- struct bpf_htab_elem *elem;
+ struct bpf_shtab_elem *elem;
hlist_for_each_entry_rcu(elem, head, node) {
if (elem->hash == hash &&
@@ -743,10 +746,10 @@ sock_hash_lookup_elem_raw(struct hlist_head *head, u32 hash, void *key,
static struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
u32 key_size = map->key_size, hash;
- struct bpf_htab_bucket *bucket;
- struct bpf_htab_elem *elem;
+ struct bpf_shtab_bucket *bucket;
+ struct bpf_shtab_elem *elem;
WARN_ON_ONCE(!rcu_read_lock_held());
@@ -757,8 +760,8 @@ static struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
return elem ? elem->sk : NULL;
}
-static void sock_hash_free_elem(struct bpf_htab *htab,
- struct bpf_htab_elem *elem)
+static void sock_hash_free_elem(struct bpf_shtab *htab,
+ struct bpf_shtab_elem *elem)
{
atomic_dec(&htab->count);
kfree_rcu(elem, rcu);
@@ -767,9 +770,9 @@ static void sock_hash_free_elem(struct bpf_htab *htab,
static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
void *link_raw)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct bpf_htab_elem *elem_probe, *elem = link_raw;
- struct bpf_htab_bucket *bucket;
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
+ struct bpf_shtab_elem *elem_probe, *elem = link_raw;
+ struct bpf_shtab_bucket *bucket;
WARN_ON_ONCE(!rcu_read_lock_held());
bucket = sock_hash_select_bucket(htab, elem->hash);
@@ -791,10 +794,10 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
static int sock_hash_delete_elem(struct bpf_map *map, void *key)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
u32 hash, key_size = map->key_size;
- struct bpf_htab_bucket *bucket;
- struct bpf_htab_elem *elem;
+ struct bpf_shtab_bucket *bucket;
+ struct bpf_shtab_elem *elem;
int ret = -ENOENT;
hash = sock_hash_bucket_hash(key, key_size);
@@ -812,12 +815,12 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key)
return ret;
}
-static struct bpf_htab_elem *sock_hash_alloc_elem(struct bpf_htab *htab,
- void *key, u32 key_size,
- u32 hash, struct sock *sk,
- struct bpf_htab_elem *old)
+static struct bpf_shtab_elem *sock_hash_alloc_elem(struct bpf_shtab *htab,
+ void *key, u32 key_size,
+ u32 hash, struct sock *sk,
+ struct bpf_shtab_elem *old)
{
- struct bpf_htab_elem *new;
+ struct bpf_shtab_elem *new;
if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
if (!old) {
@@ -841,10 +844,10 @@ static struct bpf_htab_elem *sock_hash_alloc_elem(struct bpf_htab *htab,
static int sock_hash_update_common(struct bpf_map *map, void *key,
struct sock *sk, u64 flags)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
u32 key_size = map->key_size, hash;
- struct bpf_htab_elem *elem, *elem_new;
- struct bpf_htab_bucket *bucket;
+ struct bpf_shtab_elem *elem, *elem_new;
+ struct bpf_shtab_bucket *bucket;
struct sk_psock_link *link;
struct sk_psock *psock;
int ret;
@@ -954,8 +957,8 @@ out:
static int sock_hash_get_next_key(struct bpf_map *map, void *key,
void *key_next)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct bpf_htab_elem *elem, *elem_next;
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
+ struct bpf_shtab_elem *elem, *elem_next;
u32 hash, key_size = map->key_size;
struct hlist_head *head;
int i = 0;
@@ -969,7 +972,7 @@ static int sock_hash_get_next_key(struct bpf_map *map, void *key,
goto find_first_elem;
elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&elem->node)),
- struct bpf_htab_elem, node);
+ struct bpf_shtab_elem, node);
if (elem_next) {
memcpy(key_next, elem_next->key, key_size);
return 0;
@@ -981,7 +984,7 @@ find_first_elem:
for (; i < htab->buckets_num; i++) {
head = &sock_hash_select_bucket(htab, i)->head;
elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
- struct bpf_htab_elem, node);
+ struct bpf_shtab_elem, node);
if (elem_next) {
memcpy(key_next, elem_next->key, key_size);
return 0;
@@ -993,7 +996,7 @@ find_first_elem:
static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
{
- struct bpf_htab *htab;
+ struct bpf_shtab *htab;
int i, err;
u64 cost;
@@ -1015,15 +1018,15 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
bpf_map_init_from_attr(&htab->map, attr);
htab->buckets_num = roundup_pow_of_two(htab->map.max_entries);
- htab->elem_size = sizeof(struct bpf_htab_elem) +
+ htab->elem_size = sizeof(struct bpf_shtab_elem) +
round_up(htab->map.key_size, 8);
if (htab->buckets_num == 0 ||
- htab->buckets_num > U32_MAX / sizeof(struct bpf_htab_bucket)) {
+ htab->buckets_num > U32_MAX / sizeof(struct bpf_shtab_bucket)) {
err = -EINVAL;
goto free_htab;
}
- cost = (u64) htab->buckets_num * sizeof(struct bpf_htab_bucket) +
+ cost = (u64) htab->buckets_num * sizeof(struct bpf_shtab_bucket) +
(u64) htab->elem_size * htab->map.max_entries;
if (cost >= U32_MAX - PAGE_SIZE) {
err = -EINVAL;
@@ -1034,7 +1037,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
goto free_htab;
htab->buckets = bpf_map_area_alloc(htab->buckets_num *
- sizeof(struct bpf_htab_bucket),
+ sizeof(struct bpf_shtab_bucket),
htab->map.numa_node);
if (!htab->buckets) {
bpf_map_charge_finish(&htab->map.memory);
@@ -1055,10 +1058,10 @@ free_htab:
static void sock_hash_free(struct bpf_map *map)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct bpf_htab_bucket *bucket;
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
+ struct bpf_shtab_bucket *bucket;
struct hlist_head unlink_list;
- struct bpf_htab_elem *elem;
+ struct bpf_shtab_elem *elem;
struct hlist_node *node;
int i;
@@ -1134,7 +1137,7 @@ static void *sock_hash_lookup(struct bpf_map *map, void *key)
static void sock_hash_release_progs(struct bpf_map *map)
{
- psock_progs_drop(&container_of(map, struct bpf_htab, map)->progs);
+ psock_progs_drop(&container_of(map, struct bpf_shtab, map)->progs);
}
BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, sops,
@@ -1214,6 +1217,7 @@ const struct bpf_func_proto bpf_msg_redirect_hash_proto = {
.arg4_type = ARG_ANYTHING,
};
+static int sock_hash_map_btf_id;
const struct bpf_map_ops sock_hash_ops = {
.map_alloc = sock_hash_alloc,
.map_free = sock_hash_free,
@@ -1224,6 +1228,8 @@ const struct bpf_map_ops sock_hash_ops = {
.map_lookup_elem_sys_only = sock_hash_lookup_sys,
.map_release_uref = sock_hash_release_progs,
.map_check_btf = map_check_no_btf,
+ .map_btf_name = "bpf_shtab",
+ .map_btf_id = &sock_hash_map_btf_id,
};
static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
@@ -1232,7 +1238,7 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
case BPF_MAP_TYPE_SOCKMAP:
return &container_of(map, struct bpf_stab, map)->progs;
case BPF_MAP_TYPE_SOCKHASH:
- return &container_of(map, struct bpf_htab, map)->progs;
+ return &container_of(map, struct bpf_shtab, map)->progs;
default:
break;
}
diff --git a/net/core/tso.c b/net/core/tso.c
index d4d5c077ad72..4148f6d48953 100644
--- a/net/core/tso.c
+++ b/net/core/tso.c
@@ -6,18 +6,17 @@
#include <asm/unaligned.h>
/* Calculate expected number of TX descriptors */
-int tso_count_descs(struct sk_buff *skb)
+int tso_count_descs(const struct sk_buff *skb)
{
/* The Marvell Way */
return skb_shinfo(skb)->gso_segs * 2 + skb_shinfo(skb)->nr_frags;
}
EXPORT_SYMBOL(tso_count_descs);
-void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
+void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso,
int size, bool is_last)
{
- struct tcphdr *tcph;
- int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ int hdr_len = skb_transport_offset(skb) + tso->tlen;
int mac_hdr_len = skb_network_offset(skb);
memcpy(hdr, skb->data, hdr_len);
@@ -30,23 +29,31 @@ void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
} else {
struct ipv6hdr *iph = (void *)(hdr + mac_hdr_len);
- iph->payload_len = htons(size + tcp_hdrlen(skb));
+ iph->payload_len = htons(size + tso->tlen);
}
- tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb));
- put_unaligned_be32(tso->tcp_seq, &tcph->seq);
+ hdr += skb_transport_offset(skb);
+ if (tso->tlen != sizeof(struct udphdr)) {
+ struct tcphdr *tcph = (struct tcphdr *)hdr;
- if (!is_last) {
- /* Clear all special flags for not last packet */
- tcph->psh = 0;
- tcph->fin = 0;
- tcph->rst = 0;
+ put_unaligned_be32(tso->tcp_seq, &tcph->seq);
+
+ if (!is_last) {
+ /* Clear all special flags for not last packet */
+ tcph->psh = 0;
+ tcph->fin = 0;
+ tcph->rst = 0;
+ }
+ } else {
+ struct udphdr *uh = (struct udphdr *)hdr;
+
+ uh->len = htons(sizeof(*uh) + size);
}
}
EXPORT_SYMBOL(tso_build_hdr);
-void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size)
+void tso_build_data(const struct sk_buff *skb, struct tso_t *tso, int size)
{
- tso->tcp_seq += size;
+ tso->tcp_seq += size; /* not worth avoiding this operation for UDP */
tso->size -= size;
tso->data += size;
@@ -62,12 +69,14 @@ void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size)
}
EXPORT_SYMBOL(tso_build_data);
-void tso_start(struct sk_buff *skb, struct tso_t *tso)
+int tso_start(struct sk_buff *skb, struct tso_t *tso)
{
- int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ int tlen = skb_is_gso_tcp(skb) ? tcp_hdrlen(skb) : sizeof(struct udphdr);
+ int hdr_len = skb_transport_offset(skb) + tlen;
+ tso->tlen = tlen;
tso->ip_id = ntohs(ip_hdr(skb)->id);
- tso->tcp_seq = ntohl(tcp_hdr(skb)->seq);
+ tso->tcp_seq = (tlen != sizeof(struct udphdr)) ? ntohl(tcp_hdr(skb)->seq) : 0;
tso->next_frag_idx = 0;
tso->ipv6 = vlan_get_protocol(skb) == htons(ETH_P_IPV6);
@@ -83,5 +92,6 @@ void tso_start(struct sk_buff *skb, struct tso_t *tso)
tso->data = skb_frag_address(frag);
tso->next_frag_idx++;
}
+ return hdr_len;
}
EXPORT_SYMBOL(tso_start);
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 3c45f99e26d5..48aba933a5a8 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -400,15 +400,6 @@ void __xdp_release_frame(void *data, struct xdp_mem_info *mem)
}
EXPORT_SYMBOL_GPL(__xdp_release_frame);
-int xdp_attachment_query(struct xdp_attachment_info *info,
- struct netdev_bpf *bpf)
-{
- bpf->prog_id = info->prog ? info->prog->aux->id : 0;
- bpf->prog_flags = info->prog ? info->flags : 0;
- return 0;
-}
-EXPORT_SYMBOL_GPL(xdp_attachment_query);
-
bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
struct netdev_bpf *bpf)
{
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index d2a4553bcf39..84dde5a2066e 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1736,7 +1736,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net_device *netdev;
struct dcbmsg *dcb = nlmsg_data(nlh);
struct nlattr *tb[DCB_ATTR_MAX + 1];
- u32 portid = skb ? NETLINK_CB(skb).portid : 0;
+ u32 portid = NETLINK_CB(skb).portid;
int ret = -EINVAL;
struct sk_buff *reply_skb;
struct nlmsghdr *reply_nlh = NULL;
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index 51ac2631fb48..0c7d2f66ba27 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -5,7 +5,7 @@ menuconfig IP_DCCP
help
Datagram Congestion Control Protocol (RFC 4340)
- From http://www.ietf.org/rfc/rfc4340.txt:
+ From https://www.ietf.org/rfc/rfc4340.txt:
The Datagram Congestion Control Protocol (DCCP) is a transport
protocol that implements bidirectional, unicast connections of
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 4d7771f36eff..a3eeb84d16f9 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -26,13 +26,13 @@ config IP_DCCP_CCID3
relatively smooth sending rate is of importance.
CCID-3 is further described in RFC 4342,
- http://www.ietf.org/rfc/rfc4342.txt
+ https://www.ietf.org/rfc/rfc4342.txt
The TFRC congestion control algorithms were initially described in
RFC 5348.
This text was extracted from RFC 4340 (sec. 10.2),
- http://www.ietf.org/rfc/rfc4340.txt
+ https://www.ietf.org/rfc/rfc4340.txt
If in doubt, say N.
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 9ef9bee9610f..aef72f6a2829 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -7,7 +7,7 @@
* An implementation of the DCCP protocol
*
* This code has been developed by the University of Waikato WAND
- * research group. For further information please see http://www.wand.net.nz/
+ * research group. For further information please see https://www.wand.net.nz/
*
* This code also uses code from Lulea University, rereleased as GPL by its
* authors:
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 081c195e7f7d..02e0fc9f6334 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -6,7 +6,7 @@
* An implementation of the DCCP protocol
*
* This code has been developed by the University of Waikato WAND
- * research group. For further information please see http://www.wand.net.nz/
+ * research group. For further information please see https://www.wand.net.nz/
* or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
*
* This code also uses code from Lulea University, rereleased as GPL by its
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 2d41bb036271..af08e2df7108 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -6,7 +6,7 @@
* An implementation of the DCCP protocol
*
* This code has been developed by the University of Waikato WAND
- * research group. For further information please see http://www.wand.net.nz/
+ * research group. For further information please see https://www.wand.net.nz/
* or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
*
* This code also uses code from Lulea University, rereleased as GPL by its
@@ -365,6 +365,7 @@ void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
/**
* tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against
+ * @h: The non-empty RX history object
*/
static inline struct tfrc_rx_hist_entry *
tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h)
@@ -374,6 +375,7 @@ static inline struct tfrc_rx_hist_entry *
/**
* tfrc_rx_hist_rtt_prev_s - previously suitable (wrt rtt_last_s) RTT-sampling entry
+ * @h: The non-empty RX history object
*/
static inline struct tfrc_rx_hist_entry *
tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h)
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index a157d874840b..159cc9326eab 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -6,7 +6,7 @@
* Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
*
* This code has been developed by the University of Waikato WAND
- * research group. For further information please see http://www.wand.net.nz/
+ * research group. For further information please see https://www.wand.net.nz/
* or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
*
* This code also uses code from Lulea University, rereleased as GPL by its
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 7dce4f6c7025..9cc9d1ee6cdb 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -295,13 +295,7 @@ int dccp_disconnect(struct sock *sk, int flags);
int dccp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
int dccp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
-#ifdef CONFIG_COMPAT
-int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen);
-int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
-#endif
+ sockptr_t optval, unsigned int optlen);
int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 9c3b5e056234..afc071ea1271 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -165,6 +165,8 @@ static const struct {
/**
* dccp_feat_index - Hash function to map feature number into array position
+ * @feat_num: feature to hash, one of %dccp_feature_numbers
+ *
* Returns consecutive array index or -1 if the feature is not understood.
*/
static int dccp_feat_index(u8 feat_num)
@@ -567,6 +569,8 @@ cloning_failed:
/**
* dccp_feat_valid_nn_length - Enforce length constraints on NN options
+ * @feat_num: feature to return length of, one of %dccp_feature_numbers
+ *
* Length is between 0 and %DCCP_OPTVAL_MAXLEN. Used for outgoing packets only,
* incoming options are accepted as long as their values are valid.
*/
@@ -1429,6 +1433,8 @@ int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
/**
* dccp_feat_init - Seed feature negotiation with host-specific defaults
+ * @sk: Socket to initialize.
+ *
* This initialises global defaults, depending on the value of the sysctls.
* These can later be overridden by registering changes via setsockopt calls.
* The last link in the chain is finalise_settings, to make sure that between
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 6dce68a55964..bd9cfdb67436 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -715,6 +715,7 @@ EXPORT_SYMBOL_GPL(dccp_rcv_state_process);
/**
* dccp_sample_rtt - Validate and finalise computation of RTT sample
+ * @sk: socket structure
* @delta: number of microseconds between packet and acknowledgment
*
* The routine is kept generic to work in different contexts. It should be
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index d19557c6d04b..9c28c8251125 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -694,6 +694,8 @@ EXPORT_SYMBOL_GPL(dccp_v4_do_rcv);
/**
* dccp_invalid_packet - check for malformed packets
+ * @skb: Packet to validate
+ *
* Implements RFC 4340, 8.5: Step 1: Check header basics
* Packets that fail these checks are ignored and do not receive Resets.
*/
@@ -911,10 +913,6 @@ static const struct inet_connection_sock_af_ops dccp_ipv4_af_ops = {
.getsockopt = ip_getsockopt,
.addr2sockaddr = inet_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in),
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_ip_setsockopt,
- .compat_getsockopt = compat_ip_getsockopt,
-#endif
};
static int dccp_v4_init_sock(struct sock *sk)
@@ -961,10 +959,6 @@ static struct proto dccp_v4_prot = {
.rsk_prot = &dccp_request_sock_ops,
.twsk_prot = &dccp_timewait_sock_ops,
.h.hashinfo = &dccp_hashinfo,
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_dccp_setsockopt,
- .compat_getsockopt = compat_dccp_getsockopt,
-#endif
};
static const struct net_protocol dccp_v4_protocol = {
@@ -997,10 +991,6 @@ static const struct proto_ops inet_dccp_ops = {
.recvmsg = sock_common_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_sock_common_setsockopt,
- .compat_getsockopt = compat_sock_common_getsockopt,
-#endif
};
static struct inet_protosw dccp_v4_protosw = {
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 650187d68851..ef4ab28cfde0 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -970,10 +970,6 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops = {
.getsockopt = ipv6_getsockopt,
.addr2sockaddr = inet6_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in6),
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_ipv6_setsockopt,
- .compat_getsockopt = compat_ipv6_getsockopt,
-#endif
};
/*
@@ -990,10 +986,6 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
.getsockopt = ipv6_getsockopt,
.addr2sockaddr = inet6_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in6),
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_ipv6_setsockopt,
- .compat_getsockopt = compat_ipv6_getsockopt,
-#endif
};
/* NOTE: A lot of things set to zero explicitly by call to
@@ -1049,10 +1041,6 @@ static struct proto dccp_v6_prot = {
.rsk_prot = &dccp6_request_sock_ops,
.twsk_prot = &dccp6_timewait_sock_ops,
.h.hashinfo = &dccp_hashinfo,
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_dccp_setsockopt,
- .compat_getsockopt = compat_dccp_getsockopt,
-#endif
};
static const struct inet6_protocol dccp_v6_protocol = {
@@ -1083,8 +1071,6 @@ static const struct proto_ops inet6_dccp_ops = {
.sendpage = sock_no_sendpage,
#ifdef CONFIG_COMPAT
.compat_ioctl = inet6_compat_ioctl,
- .compat_setsockopt = compat_sock_common_setsockopt,
- .compat_getsockopt = compat_sock_common_getsockopt,
#endif
};
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 3b42f5c6a63d..daa9eed92646 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -43,6 +43,7 @@ u64 dccp_decode_value_var(const u8 *bf, const u8 len)
* dccp_parse_options - Parse DCCP options present in @skb
* @sk: client|server|listening dccp socket (when @dreq != NULL)
* @dreq: request socket to use during connection setup, or NULL
+ * @skb: frame to parse
*/
int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
struct sk_buff *skb)
@@ -471,6 +472,8 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
/**
* dccp_insert_option_mandatory - Mandatory option (5.8.2)
+ * @skb: frame into which to insert option
+ *
* Note that since we are using skb_push, this function needs to be called
* _after_ inserting the option it is supposed to influence (stack order).
*/
@@ -486,6 +489,7 @@ int dccp_insert_option_mandatory(struct sk_buff *skb)
/**
* dccp_insert_fn_opt - Insert single Feature-Negotiation option into @skb
+ * @skb: frame to insert feature negotiation option into
* @type: %DCCPO_CHANGE_L, %DCCPO_CHANGE_R, %DCCPO_CONFIRM_L, %DCCPO_CONFIRM_R
* @feat: one out of %dccp_feature_numbers
* @val: NN value or SP array (preferred element first) to copy
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index c13b6609474b..d148ab1530e5 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -375,6 +375,15 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
goto out;
switch (cmd) {
+ case SIOCOUTQ: {
+ int amount = sk_wmem_alloc_get(sk);
+ /* Using sk_wmem_alloc here because sk_wmem_queued is not used by DCCP and
+ * always 0, comparably to UDP.
+ */
+
+ rc = put_user(amount, (int __user *)arg);
+ }
+ break;
case SIOCINQ: {
struct sk_buff *skb;
unsigned long amount = 0;
@@ -402,7 +411,7 @@ out:
EXPORT_SYMBOL_GPL(dccp_ioctl);
static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_service_list *sl = NULL;
@@ -417,9 +426,8 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
return -ENOMEM;
sl->dccpsl_nr = optlen / sizeof(u32) - 1;
- if (copy_from_user(sl->dccpsl_list,
- optval + sizeof(service),
- optlen - sizeof(service)) ||
+ if (copy_from_sockptr_offset(sl->dccpsl_list, optval,
+ sizeof(service), optlen - sizeof(service)) ||
dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
kfree(sl);
return -EFAULT;
@@ -473,7 +481,7 @@ static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
}
static int dccp_setsockopt_ccid(struct sock *sk, int type,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
u8 *val;
int rc = 0;
@@ -481,7 +489,7 @@ static int dccp_setsockopt_ccid(struct sock *sk, int type,
if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
return -EINVAL;
- val = memdup_user(optval, optlen);
+ val = memdup_sockptr(optval, optlen);
if (IS_ERR(val))
return PTR_ERR(val);
@@ -498,7 +506,7 @@ static int dccp_setsockopt_ccid(struct sock *sk, int type,
}
static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct dccp_sock *dp = dccp_sk(sk);
int val, err = 0;
@@ -520,7 +528,7 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
if (optlen < (int)sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *)optval))
+ if (copy_from_sockptr(&val, optval, sizeof(int)))
return -EFAULT;
if (optname == DCCP_SOCKOPT_SERVICE)
@@ -563,8 +571,8 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
return err;
}
-int dccp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+int dccp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+ unsigned int optlen)
{
if (level != SOL_DCCP)
return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
@@ -575,19 +583,6 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
EXPORT_SYMBOL_GPL(dccp_setsockopt);
-#ifdef CONFIG_COMPAT
-int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
-{
- if (level != SOL_DCCP)
- return inet_csk_compat_setsockopt(sk, level, optname,
- optval, optlen);
- return do_dccp_setsockopt(sk, level, optname, optval, optlen);
-}
-
-EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
-#endif
-
static int dccp_getsockopt_service(struct sock *sk, int len,
__be32 __user *optval,
int __user *optlen)
@@ -696,19 +691,6 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
EXPORT_SYMBOL_GPL(dccp_getsockopt);
-#ifdef CONFIG_COMPAT
-int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
-{
- if (level != SOL_DCCP)
- return inet_csk_compat_getsockopt(sk, level, optname,
- optval, optlen);
- return do_dccp_getsockopt(sk, level, optname, optval, optlen);
-}
-
-EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
-#endif
-
static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
{
struct cmsghdr *cmsg;
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index c0b3672637c4..0e06dfc32273 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -216,6 +216,8 @@ out:
/**
* dccp_write_xmitlet - Workhorse for CCID packet dequeueing interface
+ * @data: Socket to act on
+ *
* See the comments above %ccid_dequeueing_decision for supported modes.
*/
static void dccp_write_xmitlet(unsigned long data)
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 0a46ea3bddd5..3b53d766789d 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -150,7 +150,8 @@ static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
static struct hlist_head dn_wild_sk;
static atomic_long_t decnet_memory_allocated;
-static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen, int flags);
+static int __dn_setsockopt(struct socket *sock, int level, int optname,
+ sockptr_t optval, unsigned int optlen, int flags);
static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags);
static struct hlist_head *dn_find_list(struct sock *sk)
@@ -1320,7 +1321,8 @@ out:
return err;
}
-static int dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
+static int dn_setsockopt(struct socket *sock, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
int err;
@@ -1338,7 +1340,8 @@ static int dn_setsockopt(struct socket *sock, int level, int optname, char __use
return err;
}
-static int __dn_setsockopt(struct socket *sock, int level,int optname, char __user *optval, unsigned int optlen, int flags)
+static int __dn_setsockopt(struct socket *sock, int level, int optname,
+ sockptr_t optval, unsigned int optlen, int flags)
{
struct sock *sk = sock->sk;
struct dn_scp *scp = DN_SK(sk);
@@ -1354,13 +1357,13 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us
} u;
int err;
- if (optlen && !optval)
+ if (optlen && sockptr_is_null(optval))
return -EINVAL;
if (optlen > sizeof(u))
return -EINVAL;
- if (copy_from_user(&u, optval, optlen))
+ if (copy_from_sockptr(&u, optval, optlen))
return -EFAULT;
switch (optname) {
@@ -2134,14 +2137,11 @@ static struct sock *dn_socket_get_next(struct seq_file *seq,
struct dn_iter_state *state = seq->private;
n = sk_next(n);
-try_again:
- if (n)
- goto out;
- if (++state->bucket >= DN_SK_HASH_SIZE)
- goto out;
- n = sk_head(&dn_sk_hash[state->bucket]);
- goto try_again;
-out:
+ while (!n) {
+ if (++state->bucket >= DN_SK_HASH_SIZE)
+ break;
+ n = sk_head(&dn_sk_hash[state->bucket]);
+ }
return n;
}
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 65abcf1b3210..15d42353f1a3 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -462,7 +462,9 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg)
switch (cmd) {
case SIOCGIFADDR:
*((__le16 *)sdn->sdn_nodeaddr) = ifa->ifa_local;
- goto rarok;
+ if (copy_to_user(arg, ifr, DN_IFREQ_SIZE))
+ ret = -EFAULT;
+ break;
case SIOCSIFADDR:
if (!ifa) {
@@ -485,10 +487,6 @@ done:
rtnl_unlock();
return ret;
-rarok:
- if (copy_to_user(arg, ifr, DN_IFREQ_SIZE))
- ret = -EFAULT;
- goto done;
}
struct net_device *dn_dev_get_default(void)
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 06b9983325cc..4cac31d22a50 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -494,6 +494,8 @@ static int dn_return_long(struct sk_buff *skb)
/**
* dn_route_rx_packet - Try and find a route for an incoming packet
+ * @net: The applicable net namespace
+ * @sk: Socket packet transmitted on
* @skb: The packet to find a route for
*
* Returns: result of input function if route is found, error code otherwise
@@ -670,7 +672,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
if (decnet_debug_level & 1)
printk(KERN_DEBUG
"dn_route_rcv: got 0x%02x from %s [%d %d %d]\n",
- (int)flags, (dev) ? dev->name : "???", len, skb->len,
+ (int)flags, dev->name, len, skb->len,
padlen);
if (flags & DN_RT_PKT_CNTL) {
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index dc705769acc9..26a9193df783 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -6,7 +6,7 @@
*
* DECnet Routing Message Grabulator
*
- * (C) 2000 ChyGwyn Limited - http://www.chygwyn.com/
+ * (C) 2000 ChyGwyn Limited - https://www.chygwyn.com/
*
* Author: Steven Whitehouse <steve@chygwyn.com>
*/
diff --git a/net/devres.c b/net/devres.c
index 57a6a88d11f6..1f9be2133787 100644
--- a/net/devres.c
+++ b/net/devres.c
@@ -39,7 +39,7 @@ struct net_device *devm_alloc_etherdev_mqs(struct device *dev, int sizeof_priv,
}
EXPORT_SYMBOL(devm_alloc_etherdev_mqs);
-static void devm_netdev_release(struct device *dev, void *this)
+static void devm_unregister_netdev(struct device *dev, void *this)
{
struct net_device_devres *res = this;
@@ -77,7 +77,7 @@ int devm_register_netdev(struct device *dev, struct net_device *ndev)
netdev_devres_match, ndev)))
return -EINVAL;
- dr = devres_alloc(devm_netdev_release, sizeof(*dr), GFP_KERNEL);
+ dr = devres_alloc(devm_unregister_netdev, sizeof(*dr), GFP_KERNEL);
if (!dr)
return -ENOMEM;
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index d5bc6ac599ef..1f9b9b11008c 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -86,6 +86,13 @@ config NET_DSA_TAG_KSZ
Say Y if you want to enable support for tagging frames for the
Microchip 8795/9477/9893 families of switches.
+config NET_DSA_TAG_RTL4_A
+ tristate "Tag driver for Realtek 4 byte protocol A tags"
+ help
+ Say Y or M if you want to enable support for tagging frames for the
+ Realtek switches with 4 byte protocol A tags, sich as found in
+ the Realtek RTL8366RB.
+
config NET_DSA_TAG_OCELOT
tristate "Tag driver for Ocelot family of switches"
select PACKING
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 108486cfdeef..4f47b2025ff5 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
obj-$(CONFIG_NET_DSA_TAG_GSWIP) += tag_gswip.o
obj-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o
+obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o
obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o
obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
obj-$(CONFIG_NET_DSA_TAG_OCELOT) += tag_ocelot.o
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 076908fdd29b..c0ffc7a2b65f 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -261,10 +261,15 @@ static int dsa_port_setup(struct dsa_port *dp)
struct devlink_port *dlp = &dp->devlink_port;
bool dsa_port_link_registered = false;
bool devlink_port_registered = false;
+ struct devlink_port_attrs attrs = {};
struct devlink *dl = ds->devlink;
bool dsa_port_enabled = false;
int err = 0;
+ attrs.phys.port_number = dp->index;
+ memcpy(attrs.switch_id.id, id, len);
+ attrs.switch_id.id_len = len;
+
if (dp->setup)
return 0;
@@ -274,8 +279,8 @@ static int dsa_port_setup(struct dsa_port *dp)
break;
case DSA_PORT_TYPE_CPU:
memset(dlp, 0, sizeof(*dlp));
- devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_CPU,
- dp->index, false, 0, id, len);
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_CPU;
+ devlink_port_attrs_set(dlp, &attrs);
err = devlink_port_register(dl, dlp, dp->index);
if (err)
break;
@@ -294,8 +299,8 @@ static int dsa_port_setup(struct dsa_port *dp)
break;
case DSA_PORT_TYPE_DSA:
memset(dlp, 0, sizeof(*dlp));
- devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_DSA,
- dp->index, false, 0, id, len);
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_DSA;
+ devlink_port_attrs_set(dlp, &attrs);
err = devlink_port_register(dl, dlp, dp->index);
if (err)
break;
@@ -314,8 +319,8 @@ static int dsa_port_setup(struct dsa_port *dp)
break;
case DSA_PORT_TYPE_USER:
memset(dlp, 0, sizeof(*dlp));
- devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_PHYSICAL,
- dp->index, false, 0, id, len);
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
+ devlink_port_attrs_set(dlp, &attrs);
err = devlink_port_register(dl, dlp, dp->index);
if (err)
break;
@@ -722,8 +727,12 @@ static int dsa_switch_parse_ports_of(struct dsa_switch *ds,
ports = of_get_child_by_name(dn, "ports");
if (!ports) {
- dev_err(ds->dev, "no ports child node found\n");
- return -EINVAL;
+ /* The second possibility is "ethernet-ports" */
+ ports = of_get_child_by_name(dn, "ethernet-ports");
+ if (!ports) {
+ dev_err(ds->dev, "no ports child node found\n");
+ return -EINVAL;
+ }
}
for_each_available_child_of_node(ports, port) {
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index adecf73bd608..1653e3377cb3 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -77,7 +77,7 @@ struct dsa_slave_priv {
struct sk_buff * (*xmit)(struct sk_buff *skb,
struct net_device *dev);
- struct pcpu_sw_netstats *stats64;
+ struct pcpu_sw_netstats __percpu *stats64;
struct gro_cells gcells;
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 480a61460c23..61615ebc70e9 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -186,17 +186,6 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
}
}
-static int dsa_master_get_phys_port_name(struct net_device *dev,
- char *name, size_t len)
-{
- struct dsa_port *cpu_dp = dev->dsa_ptr;
-
- if (snprintf(name, len, "p%d", cpu_dp->index) >= len)
- return -EINVAL;
-
- return 0;
-}
-
static int dsa_master_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
@@ -220,12 +209,16 @@ static int dsa_master_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
break;
}
- if (cpu_dp->orig_ndo_ops && cpu_dp->orig_ndo_ops->ndo_do_ioctl)
- err = cpu_dp->orig_ndo_ops->ndo_do_ioctl(dev, ifr, cmd);
+ if (dev->netdev_ops->ndo_do_ioctl)
+ err = dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
return err;
}
+static const struct dsa_netdevice_ops dsa_netdev_ops = {
+ .ndo_do_ioctl = dsa_master_ioctl,
+};
+
static int dsa_master_ethtool_setup(struct net_device *dev)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
@@ -260,38 +253,10 @@ static void dsa_master_ethtool_teardown(struct net_device *dev)
cpu_dp->orig_ethtool_ops = NULL;
}
-static int dsa_master_ndo_setup(struct net_device *dev)
+static void dsa_netdev_ops_set(struct net_device *dev,
+ const struct dsa_netdevice_ops *ops)
{
- struct dsa_port *cpu_dp = dev->dsa_ptr;
- struct dsa_switch *ds = cpu_dp->ds;
- struct net_device_ops *ops;
-
- if (dev->netdev_ops->ndo_get_phys_port_name)
- return 0;
-
- ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL);
- if (!ops)
- return -ENOMEM;
-
- cpu_dp->orig_ndo_ops = dev->netdev_ops;
- if (cpu_dp->orig_ndo_ops)
- memcpy(ops, cpu_dp->orig_ndo_ops, sizeof(*ops));
-
- ops->ndo_get_phys_port_name = dsa_master_get_phys_port_name;
- ops->ndo_do_ioctl = dsa_master_ioctl;
-
- dev->netdev_ops = ops;
-
- return 0;
-}
-
-static void dsa_master_ndo_teardown(struct net_device *dev)
-{
- struct dsa_port *cpu_dp = dev->dsa_ptr;
-
- if (cpu_dp->orig_ndo_ops)
- dev->netdev_ops = cpu_dp->orig_ndo_ops;
- cpu_dp->orig_ndo_ops = NULL;
+ dev->dsa_ptr->netdev_ops = ops;
}
static ssize_t tagging_show(struct device *d, struct device_attribute *attr,
@@ -353,9 +318,7 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
if (ret)
return ret;
- ret = dsa_master_ndo_setup(dev);
- if (ret)
- goto out_err_ethtool_teardown;
+ dsa_netdev_ops_set(dev, &dsa_netdev_ops);
ret = sysfs_create_group(&dev->dev.kobj, &dsa_group);
if (ret)
@@ -364,8 +327,7 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
return ret;
out_err_ndo_teardown:
- dsa_master_ndo_teardown(dev);
-out_err_ethtool_teardown:
+ dsa_netdev_ops_set(dev, NULL);
dsa_master_ethtool_teardown(dev);
return ret;
}
@@ -373,7 +335,7 @@ out_err_ethtool_teardown:
void dsa_master_teardown(struct net_device *dev)
{
sysfs_remove_group(&dev->dev.kobj, &dsa_group);
- dsa_master_ndo_teardown(dev);
+ dsa_netdev_ops_set(dev, NULL);
dsa_master_ethtool_teardown(dev);
dsa_master_reset_mtu(dev);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 4c7f086a047b..41d60eeefdbd 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1754,11 +1754,8 @@ int dsa_slave_create(struct dsa_port *port)
eth_hw_addr_inherit(slave_dev, master);
slave_dev->priv_flags |= IFF_NO_QUEUE;
slave_dev->netdev_ops = &dsa_slave_netdev_ops;
- slave_dev->min_mtu = 0;
if (ds->ops->port_max_mtu)
slave_dev->max_mtu = ds->ops->port_max_mtu(ds, port->index);
- else
- slave_dev->max_mtu = ETH_MAX_MTU;
SET_NETDEV_DEVTYPE(slave_dev, &dsa_type);
netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one,
@@ -1795,7 +1792,8 @@ int dsa_slave_create(struct dsa_port *port)
ret = dsa_slave_phy_setup(slave_dev);
if (ret) {
- netdev_err(master, "error %d setting up slave phy\n", ret);
+ netdev_err(master, "error %d setting up slave PHY for %s\n",
+ ret, slave_dev->name);
goto out_gcells;
}
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 90d055c4df9e..bd1a3158d79a 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -156,8 +156,9 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb,
{
struct dsa_port *dp = dsa_slave_to_port(dev);
struct sk_buff *nskb;
- u16 *tag;
+ __be16 *tag;
u8 *addr;
+ u16 val;
nskb = ksz_common_xmit(skb, dev, KSZ9477_INGRESS_TAG_LEN);
if (!nskb)
@@ -167,12 +168,12 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb,
tag = skb_put(nskb, KSZ9477_INGRESS_TAG_LEN);
addr = skb_mac_header(nskb);
- *tag = BIT(dp->index);
+ val = BIT(dp->index);
if (is_link_local_ether_addr(addr))
- *tag |= KSZ9477_TAIL_TAG_OVERRIDE;
+ val |= KSZ9477_TAIL_TAG_OVERRIDE;
- *tag = cpu_to_be16(*tag);
+ *tag = cpu_to_be16(val);
return nskb;
}
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index eb0e7a32e53d..ccfb6f641bbf 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -55,7 +55,8 @@ static int lan9303_xmit_use_arl(struct dsa_port *dp, u8 *dest_addr)
static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
- u16 *lan9303_tag;
+ __be16 *lan9303_tag;
+ u16 tag;
/* insert a special VLAN tag between the MAC addresses
* and the current ethertype field.
@@ -72,12 +73,12 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
/* make room between MACs and Ether-Type */
memmove(skb->data, skb->data + LAN9303_TAG_LEN, 2 * ETH_ALEN);
- lan9303_tag = (u16 *)(skb->data + 2 * ETH_ALEN);
+ lan9303_tag = (__be16 *)(skb->data + 2 * ETH_ALEN);
+ tag = lan9303_xmit_use_arl(dp, skb->data) ?
+ LAN9303_TAG_TX_USE_ALR :
+ dp->index | LAN9303_TAG_TX_STP_OVERRIDE;
lan9303_tag[0] = htons(ETH_P_8021Q);
- lan9303_tag[1] = lan9303_xmit_use_arl(dp, skb->data) ?
- LAN9303_TAG_TX_USE_ALR :
- dp->index | LAN9303_TAG_TX_STP_OVERRIDE;
- lan9303_tag[1] = htons(lan9303_tag[1]);
+ lan9303_tag[1] = htons(tag);
return skb;
}
@@ -85,7 +86,7 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt)
{
- u16 *lan9303_tag;
+ __be16 *lan9303_tag;
u16 lan9303_tag1;
unsigned int source_port;
@@ -101,7 +102,7 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
* ^
* ->data
*/
- lan9303_tag = (u16 *)(skb->data - 2);
+ lan9303_tag = (__be16 *)(skb->data - 2);
if (lan9303_tag[0] != htons(ETH_P_8021Q)) {
dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid VLAN marker\n");
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index d6619edd53e5..f602fc758d68 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -67,8 +67,9 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt)
{
+ u16 hdr;
int port;
- __be16 *phdr, hdr;
+ __be16 *phdr;
unsigned char *dest = eth_hdr(skb)->h_dest;
bool is_multicast_skb = is_multicast_ether_addr(dest) &&
!is_broadcast_ether_addr(dest);
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index b0c98ee4e13b..42f327c06dca 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -137,11 +137,10 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
struct net_device *netdev)
{
struct dsa_port *dp = dsa_slave_to_port(netdev);
- u64 bypass, dest, src, qos_class, rew_op;
struct dsa_switch *ds = dp->ds;
- int port = dp->index;
struct ocelot *ocelot = ds->priv;
- struct ocelot_port *ocelot_port = ocelot->ports[port];
+ struct ocelot_port *ocelot_port;
+ u64 qos_class, rew_op;
u8 *injection;
if (unlikely(skb_cow_head(skb, OCELOT_TAG_LEN) < 0)) {
@@ -149,19 +148,15 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
return NULL;
}
- injection = skb_push(skb, OCELOT_TAG_LEN);
+ ocelot_port = ocelot->ports[dp->index];
- memset(injection, 0, OCELOT_TAG_LEN);
+ injection = skb_push(skb, OCELOT_TAG_LEN);
- /* Set the source port as the CPU port module and not the NPI port */
- src = ocelot->num_phys_ports;
- dest = BIT(port);
- bypass = true;
+ memcpy(injection, ocelot_port->xmit_template, OCELOT_TAG_LEN);
+ /* Fix up the fields which are not statically determined
+ * in the template
+ */
qos_class = skb->priority;
-
- packing(injection, &bypass, 127, 127, OCELOT_TAG_LEN, PACK, 0);
- packing(injection, &dest, 68, 56, OCELOT_TAG_LEN, PACK, 0);
- packing(injection, &src, 46, 43, OCELOT_TAG_LEN, PACK, 0);
packing(injection, &qos_class, 19, 17, OCELOT_TAG_LEN, PACK, 0);
if (ocelot->ptp && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 70db7c909f74..7066f5e697d7 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -31,7 +31,8 @@
static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
- u16 *phdr, hdr;
+ __be16 *phdr;
+ u16 hdr;
if (skb_cow_head(skb, QCA_HDR_LEN) < 0)
return NULL;
@@ -39,7 +40,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
skb_push(skb, QCA_HDR_LEN);
memmove(skb->data, skb->data + QCA_HDR_LEN, 2 * ETH_ALEN);
- phdr = (u16 *)(skb->data + 2 * ETH_ALEN);
+ phdr = (__be16 *)(skb->data + 2 * ETH_ALEN);
/* Set the version field, and set destination port information */
hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S |
@@ -54,8 +55,9 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt)
{
u8 ver;
+ u16 hdr;
int port;
- __be16 *phdr, hdr;
+ __be16 *phdr;
if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN)))
return NULL;
diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c
new file mode 100644
index 000000000000..7b63010fa87b
--- /dev/null
+++ b/net/dsa/tag_rtl4_a.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Handler for Realtek 4 byte DSA switch tags
+ * Currently only supports protocol "A" found in RTL8366RB
+ * Copyright (c) 2020 Linus Walleij <linus.walleij@linaro.org>
+ *
+ * This "proprietary tag" header looks like so:
+ *
+ * -------------------------------------------------
+ * | MAC DA | MAC SA | 0x8899 | 2 bytes tag | Type |
+ * -------------------------------------------------
+ *
+ * The 2 bytes tag form a 16 bit big endian word. The exact
+ * meaning has been guessed from packet dumps from ingress
+ * frames, as no working egress traffic has been available
+ * we do not know the format of the egress tags or if they
+ * are even supported.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/bits.h>
+
+#include "dsa_priv.h"
+
+#define RTL4_A_HDR_LEN 4
+#define RTL4_A_ETHERTYPE 0x8899
+#define RTL4_A_PROTOCOL_SHIFT 12
+/*
+ * 0x1 = Realtek Remote Control protocol (RRCP)
+ * 0x2/0x3 seems to be used for loopback testing
+ * 0x9 = RTL8306 DSA protocol
+ * 0xa = RTL8366RB DSA protocol
+ */
+#define RTL4_A_PROTOCOL_RTL8366RB 0xa
+
+static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ /*
+ * Just let it pass thru, we don't know if it is possible
+ * to tag a frame with the 0x8899 ethertype and direct it
+ * to a specific port, all attempts at reverse-engineering have
+ * ended up with the frames getting dropped.
+ *
+ * The VLAN set-up needs to restrict the frames to the right port.
+ *
+ * If you have documentation on the tagging format for RTL8366RB
+ * (tag type A) then please contribute.
+ */
+ return skb;
+}
+
+static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb,
+ struct net_device *dev,
+ struct packet_type *pt)
+{
+ u16 protport;
+ __be16 *p;
+ u16 etype;
+ u8 *tag;
+ u8 prot;
+ u8 port;
+
+ if (unlikely(!pskb_may_pull(skb, RTL4_A_HDR_LEN)))
+ return NULL;
+
+ /* The RTL4 header has its own custom Ethertype 0x8899 and that
+ * starts right at the beginning of the packet, after the src
+ * ethernet addr. Apparantly skb->data always points 2 bytes in,
+ * behind the Ethertype.
+ */
+ tag = skb->data - 2;
+ p = (__be16 *)tag;
+ etype = ntohs(*p);
+ if (etype != RTL4_A_ETHERTYPE) {
+ /* Not custom, just pass through */
+ netdev_dbg(dev, "non-realtek ethertype 0x%04x\n", etype);
+ return skb;
+ }
+ p = (__be16 *)(tag + 2);
+ protport = ntohs(*p);
+ /* The 4 upper bits are the protocol */
+ prot = (protport >> RTL4_A_PROTOCOL_SHIFT) & 0x0f;
+ if (prot != RTL4_A_PROTOCOL_RTL8366RB) {
+ netdev_err(dev, "unknown realtek protocol 0x%01x\n", prot);
+ return NULL;
+ }
+ port = protport & 0xff;
+
+ skb->dev = dsa_master_find_slave(dev, 0, port);
+ if (!skb->dev) {
+ netdev_dbg(dev, "could not find slave for port %d\n", port);
+ return NULL;
+ }
+
+ /* Remove RTL4 tag and recalculate checksum */
+ skb_pull_rcsum(skb, RTL4_A_HDR_LEN);
+
+ /* Move ethernet DA and SA in front of the data */
+ memmove(skb->data - ETH_HLEN,
+ skb->data - ETH_HLEN - RTL4_A_HDR_LEN,
+ 2 * ETH_ALEN);
+
+ skb->offload_fwd_mark = 1;
+
+ return skb;
+}
+
+static int rtl4a_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto,
+ int *offset)
+{
+ *offset = RTL4_A_HDR_LEN;
+ /* Skip past the tag and fetch the encapsulated Ethertype */
+ *proto = ((__be16 *)skb->data)[1];
+
+ return 0;
+}
+
+static const struct dsa_device_ops rtl4a_netdev_ops = {
+ .name = "rtl4a",
+ .proto = DSA_TAG_PROTO_RTL4_A,
+ .xmit = rtl4a_tag_xmit,
+ .rcv = rtl4a_tag_rcv,
+ .flow_dissect = rtl4a_tag_flow_dissect,
+ .overhead = RTL4_A_HDR_LEN,
+};
+module_dsa_tag_driver(rtl4a_netdev_ops);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_RTL4_A);
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index 0c2b94f20499..7a849ff22dad 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -6,4 +6,5 @@ obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o
ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \
linkstate.o debug.o wol.o features.o privflags.o rings.o \
- channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o
+ channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
+ tunnels.o
diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c
index 7194956aa09e..888f6e101f34 100644
--- a/net/ethtool/cabletest.c
+++ b/net/ethtool/cabletest.c
@@ -58,6 +58,7 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *tb[ETHTOOL_A_CABLE_TEST_MAX + 1];
struct ethnl_req_info req_info = {};
+ const struct ethtool_phy_ops *ops;
struct net_device *dev;
int ret;
@@ -81,11 +82,17 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info)
}
rtnl_lock();
+ ops = ethtool_phy_ops;
+ if (!ops || !ops->start_cable_test) {
+ ret = -EOPNOTSUPP;
+ goto out_rtnl;
+ }
+
ret = ethnl_ops_begin(dev);
if (ret < 0)
goto out_rtnl;
- ret = phy_start_cable_test(dev->phydev, info->extack);
+ ret = ops->start_cable_test(dev->phydev, info->extack);
ethnl_ops_complete(dev);
@@ -308,6 +315,7 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *tb[ETHTOOL_A_CABLE_TEST_TDR_MAX + 1];
struct ethnl_req_info req_info = {};
+ const struct ethtool_phy_ops *ops;
struct phy_tdr_config cfg;
struct net_device *dev;
int ret;
@@ -337,11 +345,17 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info)
goto out_dev_put;
rtnl_lock();
+ ops = ethtool_phy_ops;
+ if (!ops || !ops->start_cable_test_tdr) {
+ ret = -EOPNOTSUPP;
+ goto out_rtnl;
+ }
+
ret = ethnl_ops_begin(dev);
if (ret < 0)
goto out_rtnl;
- ret = phy_start_cable_test_tdr(dev->phydev, info->extack, &cfg);
+ ret = ops->start_cable_test_tdr(dev->phydev, info->extack, &cfg);
ethnl_ops_complete(dev);
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index aaecfc916a4d..ed19573fccd7 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/ethtool_netlink.h>
#include <linux/net_tstamp.h>
#include <linux/phy.h>
+#include <linux/rtnetlink.h>
#include "common.h"
@@ -175,6 +177,21 @@ const char link_mode_names[][ETH_GSTRING_LEN] = {
__DEFINE_LINK_MODE_NAME(400000, DR8, Full),
__DEFINE_LINK_MODE_NAME(400000, CR8, Full),
__DEFINE_SPECIAL_MODE_NAME(FEC_LLRS, "LLRS"),
+ __DEFINE_LINK_MODE_NAME(100000, KR, Full),
+ __DEFINE_LINK_MODE_NAME(100000, SR, Full),
+ __DEFINE_LINK_MODE_NAME(100000, LR_ER_FR, Full),
+ __DEFINE_LINK_MODE_NAME(100000, DR, Full),
+ __DEFINE_LINK_MODE_NAME(100000, CR, Full),
+ __DEFINE_LINK_MODE_NAME(200000, KR2, Full),
+ __DEFINE_LINK_MODE_NAME(200000, SR2, Full),
+ __DEFINE_LINK_MODE_NAME(200000, LR2_ER2_FR2, Full),
+ __DEFINE_LINK_MODE_NAME(200000, DR2, Full),
+ __DEFINE_LINK_MODE_NAME(200000, CR2, Full),
+ __DEFINE_LINK_MODE_NAME(400000, KR4, Full),
+ __DEFINE_LINK_MODE_NAME(400000, SR4, Full),
+ __DEFINE_LINK_MODE_NAME(400000, LR4_ER4_FR4, Full),
+ __DEFINE_LINK_MODE_NAME(400000, DR4, Full),
+ __DEFINE_LINK_MODE_NAME(400000, CR4, Full),
};
static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
@@ -256,6 +273,14 @@ const char ts_rx_filter_names[][ETH_GSTRING_LEN] = {
};
static_assert(ARRAY_SIZE(ts_rx_filter_names) == __HWTSTAMP_FILTER_CNT);
+const char udp_tunnel_type_names[][ETH_GSTRING_LEN] = {
+ [ETHTOOL_UDP_TUNNEL_TYPE_VXLAN] = "vxlan",
+ [ETHTOOL_UDP_TUNNEL_TYPE_GENEVE] = "geneve",
+ [ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE] = "vxlan-gpe",
+};
+static_assert(ARRAY_SIZE(udp_tunnel_type_names) ==
+ __ETHTOOL_UDP_TUNNEL_TYPE_CNT);
+
/* return false if legacy contained non-0 deprecated fields
* maxtxpkt/maxrxpkt. rest of ksettings always updated
*/
@@ -373,3 +398,13 @@ int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info)
return 0;
}
+
+const struct ethtool_phy_ops *ethtool_phy_ops;
+
+void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops)
+{
+ rtnl_lock();
+ ethtool_phy_ops = ops;
+ rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(ethtool_set_ethtool_phy_ops);
diff --git a/net/ethtool/common.h b/net/ethtool/common.h
index a62f68ccc43a..3d9251c95a8b 100644
--- a/net/ethtool/common.h
+++ b/net/ethtool/common.h
@@ -28,6 +28,7 @@ extern const char wol_mode_names[][ETH_GSTRING_LEN];
extern const char sof_timestamping_names[][ETH_GSTRING_LEN];
extern const char ts_tx_type_names[][ETH_GSTRING_LEN];
extern const char ts_rx_filter_names[][ETH_GSTRING_LEN];
+extern const char udp_tunnel_type_names[][ETH_GSTRING_LEN];
int __ethtool_get_link(struct net_device *dev);
@@ -37,4 +38,6 @@ bool convert_legacy_settings_to_link_ksettings(
int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max);
int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info);
+extern const struct ethtool_phy_ops *ethtool_phy_ops;
+
#endif /* _ETHTOOL_COMMON_H */
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 21d5fc0f6bb3..441794e0034f 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -135,6 +135,7 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
static int __ethtool_get_sset_count(struct net_device *dev, int sset)
{
+ const struct ethtool_phy_ops *phy_ops = ethtool_phy_ops;
const struct ethtool_ops *ops = dev->ethtool_ops;
if (sset == ETH_SS_FEATURES)
@@ -150,8 +151,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
return ARRAY_SIZE(phy_tunable_strings);
if (sset == ETH_SS_PHY_STATS && dev->phydev &&
- !ops->get_ethtool_phy_stats)
- return phy_ethtool_get_sset_count(dev->phydev);
+ !ops->get_ethtool_phy_stats &&
+ phy_ops && phy_ops->get_sset_count)
+ return phy_ops->get_sset_count(dev->phydev);
if (sset == ETH_SS_LINK_MODES)
return __ETHTOOL_LINK_MODE_MASK_NBITS;
@@ -165,6 +167,7 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
static void __ethtool_get_strings(struct net_device *dev,
u32 stringset, u8 *data)
{
+ const struct ethtool_phy_ops *phy_ops = ethtool_phy_ops;
const struct ethtool_ops *ops = dev->ethtool_ops;
if (stringset == ETH_SS_FEATURES)
@@ -178,8 +181,9 @@ static void __ethtool_get_strings(struct net_device *dev,
else if (stringset == ETH_SS_PHY_TUNABLES)
memcpy(data, phy_tunable_strings, sizeof(phy_tunable_strings));
else if (stringset == ETH_SS_PHY_STATS && dev->phydev &&
- !ops->get_ethtool_phy_stats)
- phy_ethtool_get_strings(dev->phydev, data);
+ !ops->get_ethtool_phy_stats && phy_ops &&
+ phy_ops->get_strings)
+ phy_ops->get_strings(dev->phydev, data);
else if (stringset == ETH_SS_LINK_MODES)
memcpy(data, link_mode_names,
__ETHTOOL_LINK_MODE_MASK_NBITS * ETH_GSTRING_LEN);
@@ -1918,7 +1922,7 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
if (copy_to_user(useraddr, &stats, sizeof(stats)))
goto out;
useraddr += sizeof(stats);
- if (n_stats && copy_to_user(useraddr, data, n_stats * sizeof(u64)))
+ if (n_stats && copy_to_user(useraddr, data, array_size(n_stats, sizeof(u64))))
goto out;
ret = 0;
@@ -1929,6 +1933,7 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
{
+ const struct ethtool_phy_ops *phy_ops = ethtool_phy_ops;
const struct ethtool_ops *ops = dev->ethtool_ops;
struct phy_device *phydev = dev->phydev;
struct ethtool_stats stats;
@@ -1938,8 +1943,9 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
if (!phydev && (!ops->get_ethtool_phy_stats || !ops->get_sset_count))
return -EOPNOTSUPP;
- if (dev->phydev && !ops->get_ethtool_phy_stats)
- n_stats = phy_ethtool_get_sset_count(dev->phydev);
+ if (dev->phydev && !ops->get_ethtool_phy_stats &&
+ phy_ops && phy_ops->get_sset_count)
+ n_stats = phy_ops->get_sset_count(dev->phydev);
else
n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS);
if (n_stats < 0)
@@ -1958,8 +1964,9 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
if (!data)
return -ENOMEM;
- if (dev->phydev && !ops->get_ethtool_phy_stats) {
- ret = phy_ethtool_get_stats(dev->phydev, &stats, data);
+ if (dev->phydev && !ops->get_ethtool_phy_stats &&
+ phy_ops && phy_ops->get_stats) {
+ ret = phy_ops->get_stats(dev->phydev, &stats, data);
if (ret < 0)
goto out;
} else {
@@ -1973,7 +1980,7 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
if (copy_to_user(useraddr, &stats, sizeof(stats)))
goto out;
useraddr += sizeof(stats);
- if (n_stats && copy_to_user(useraddr, data, n_stats * sizeof(u64)))
+ if (n_stats && copy_to_user(useraddr, data, array_size(n_stats, sizeof(u64))))
goto out;
ret = 0;
diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c
index fd4f3e58c6f6..7044a2853886 100644
--- a/net/ethtool/linkmodes.c
+++ b/net/ethtool/linkmodes.c
@@ -257,6 +257,21 @@ static const struct link_mode_info link_mode_params[] = {
__DEFINE_LINK_MODE_PARAMS(400000, DR8, Full),
__DEFINE_LINK_MODE_PARAMS(400000, CR8, Full),
__DEFINE_SPECIAL_MODE_PARAMS(FEC_LLRS),
+ __DEFINE_LINK_MODE_PARAMS(100000, KR, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, SR, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, LR_ER_FR, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, DR, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, CR, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, KR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, SR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, LR2_ER2_FR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, DR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, CR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, KR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, SR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, LR4_ER4_FR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, DR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, CR4, Full),
};
static const struct nla_policy
@@ -406,8 +421,7 @@ int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info)
ret = __ethtool_get_link_ksettings(dev, &ksettings);
if (ret < 0) {
- if (info)
- GENL_SET_ERR_MSG(info, "failed to retrieve link settings");
+ GENL_SET_ERR_MSG(info, "failed to retrieve link settings");
goto out_ops;
}
diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c
index afe5ac8a0f00..4834091ec24c 100644
--- a/net/ethtool/linkstate.c
+++ b/net/ethtool/linkstate.c
@@ -9,10 +9,12 @@ struct linkstate_req_info {
};
struct linkstate_reply_data {
- struct ethnl_reply_data base;
- int link;
- int sqi;
- int sqi_max;
+ struct ethnl_reply_data base;
+ int link;
+ int sqi;
+ int sqi_max;
+ bool link_ext_state_provided;
+ struct ethtool_link_ext_state_info ethtool_link_ext_state_info;
};
#define LINKSTATE_REPDATA(__reply_base) \
@@ -25,6 +27,8 @@ linkstate_get_policy[ETHTOOL_A_LINKSTATE_MAX + 1] = {
[ETHTOOL_A_LINKSTATE_LINK] = { .type = NLA_REJECT },
[ETHTOOL_A_LINKSTATE_SQI] = { .type = NLA_REJECT },
[ETHTOOL_A_LINKSTATE_SQI_MAX] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKSTATE_EXT_STATE] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKSTATE_EXT_SUBSTATE] = { .type = NLA_REJECT },
};
static int linkstate_get_sqi(struct net_device *dev)
@@ -61,6 +65,23 @@ static int linkstate_get_sqi_max(struct net_device *dev)
mutex_unlock(&phydev->lock);
return ret;
+};
+
+static int linkstate_get_link_ext_state(struct net_device *dev,
+ struct linkstate_reply_data *data)
+{
+ int err;
+
+ if (!dev->ethtool_ops->get_link_ext_state)
+ return -EOPNOTSUPP;
+
+ err = dev->ethtool_ops->get_link_ext_state(dev, &data->ethtool_link_ext_state_info);
+ if (err)
+ return err;
+
+ data->link_ext_state_provided = true;
+
+ return 0;
}
static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
@@ -86,6 +107,12 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
goto out;
data->sqi_max = ret;
+ if (dev->flags & IFF_UP) {
+ ret = linkstate_get_link_ext_state(dev, data);
+ if (ret < 0 && ret != -EOPNOTSUPP && ret != -ENODATA)
+ goto out;
+ }
+
ret = 0;
out:
ethnl_ops_complete(dev);
@@ -107,6 +134,12 @@ static int linkstate_reply_size(const struct ethnl_req_info *req_base,
if (data->sqi_max != -EOPNOTSUPP)
len += nla_total_size(sizeof(u32));
+ if (data->link_ext_state_provided)
+ len += nla_total_size(sizeof(u8)); /* LINKSTATE_EXT_STATE */
+
+ if (data->ethtool_link_ext_state_info.__link_ext_substate)
+ len += nla_total_size(sizeof(u8)); /* LINKSTATE_EXT_SUBSTATE */
+
return len;
}
@@ -128,6 +161,17 @@ static int linkstate_fill_reply(struct sk_buff *skb,
nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, data->sqi_max))
return -EMSGSIZE;
+ if (data->link_ext_state_provided) {
+ if (nla_put_u8(skb, ETHTOOL_A_LINKSTATE_EXT_STATE,
+ data->ethtool_link_ext_state_info.link_ext_state))
+ return -EMSGSIZE;
+
+ if (data->ethtool_link_ext_state_info.__link_ext_substate &&
+ nla_put_u8(skb, ETHTOOL_A_LINKSTATE_EXT_SUBSTATE,
+ data->ethtool_link_ext_state_info.__link_ext_substate))
+ return -EMSGSIZE;
+ }
+
return 0;
}
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index dd8a1c1dc07d..5c2072765be7 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -181,6 +181,12 @@ err:
return NULL;
}
+void *ethnl_dump_put(struct sk_buff *skb, struct netlink_callback *cb, u8 cmd)
+{
+ return genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ &ethtool_genl_family, 0, cmd);
+}
+
void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd)
{
return genlmsg_put(skb, 0, ++ethnl_bcast_seq, &ethtool_genl_family, 0,
@@ -848,6 +854,12 @@ static const struct genl_ops ethtool_genl_ops[] = {
.flags = GENL_UNS_ADMIN_PERM,
.doit = ethnl_act_cable_test_tdr,
},
+ {
+ .cmd = ETHTOOL_MSG_TUNNEL_INFO_GET,
+ .doit = ethnl_tunnel_info_doit,
+ .start = ethnl_tunnel_info_start,
+ .dumpit = ethnl_tunnel_info_dumpit,
+ },
};
static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 9a96b6e90dc2..e2085005caac 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -19,6 +19,7 @@ int ethnl_fill_reply_header(struct sk_buff *skb, struct net_device *dev,
struct sk_buff *ethnl_reply_init(size_t payload, struct net_device *dev, u8 cmd,
u16 hdr_attrtype, struct genl_info *info,
void **ehdrp);
+void *ethnl_dump_put(struct sk_buff *skb, struct netlink_callback *cb, u8 cmd);
void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd);
int ethnl_multicast(struct sk_buff *skb, struct net_device *dev);
@@ -361,5 +362,8 @@ int ethnl_set_pause(struct sk_buff *skb, struct genl_info *info);
int ethnl_set_eee(struct sk_buff *skb, struct genl_info *info);
int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info);
int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info);
+int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info);
+int ethnl_tunnel_info_start(struct netlink_callback *cb);
+int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
#endif /* _NET_ETHTOOL_NETLINK_H */
diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c
index 0eed4e4909ab..82707b662fe4 100644
--- a/net/ethtool/strset.c
+++ b/net/ethtool/strset.c
@@ -75,6 +75,11 @@ static const struct strset_info info_template[] = {
.count = __HWTSTAMP_FILTER_CNT,
.strings = ts_rx_filter_names,
},
+ [ETH_SS_UDP_TUNNEL_TYPES] = {
+ .per_dev = false,
+ .count = __ETHTOOL_UDP_TUNNEL_TYPE_CNT,
+ .strings = udp_tunnel_type_names,
+ },
};
struct strset_req_info {
@@ -209,13 +214,15 @@ static void strset_cleanup_data(struct ethnl_reply_data *reply_base)
static int strset_prepare_set(struct strset_info *info, struct net_device *dev,
unsigned int id, bool counts_only)
{
+ const struct ethtool_phy_ops *phy_ops = ethtool_phy_ops;
const struct ethtool_ops *ops = dev->ethtool_ops;
void *strings;
int count, ret;
if (id == ETH_SS_PHY_STATS && dev->phydev &&
- !ops->get_ethtool_phy_stats)
- ret = phy_ethtool_get_sset_count(dev->phydev);
+ !ops->get_ethtool_phy_stats && phy_ops &&
+ phy_ops->get_sset_count)
+ ret = phy_ops->get_sset_count(dev->phydev);
else if (ops->get_sset_count && ops->get_strings)
ret = ops->get_sset_count(dev, id);
else
@@ -231,8 +238,9 @@ static int strset_prepare_set(struct strset_info *info, struct net_device *dev,
if (!strings)
return -ENOMEM;
if (id == ETH_SS_PHY_STATS && dev->phydev &&
- !ops->get_ethtool_phy_stats)
- phy_ethtool_get_strings(dev->phydev, strings);
+ !ops->get_ethtool_phy_stats && phy_ops &&
+ phy_ops->get_strings)
+ phy_ops->get_strings(dev->phydev, strings);
else
ops->get_strings(dev, id, strings);
info->strings = strings;
diff --git a/net/ethtool/tunnels.c b/net/ethtool/tunnels.c
new file mode 100644
index 000000000000..84f23289475b
--- /dev/null
+++ b/net/ethtool/tunnels.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/ethtool_netlink.h>
+#include <net/udp_tunnel.h>
+#include <net/vxlan.h>
+
+#include "bitset.h"
+#include "common.h"
+#include "netlink.h"
+
+static const struct nla_policy
+ethtool_tunnel_info_policy[ETHTOOL_A_TUNNEL_INFO_MAX + 1] = {
+ [ETHTOOL_A_TUNNEL_INFO_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_TUNNEL_INFO_HEADER] = { .type = NLA_NESTED },
+};
+
+static_assert(ETHTOOL_UDP_TUNNEL_TYPE_VXLAN == ilog2(UDP_TUNNEL_TYPE_VXLAN));
+static_assert(ETHTOOL_UDP_TUNNEL_TYPE_GENEVE == ilog2(UDP_TUNNEL_TYPE_GENEVE));
+static_assert(ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE ==
+ ilog2(UDP_TUNNEL_TYPE_VXLAN_GPE));
+
+static ssize_t ethnl_udp_table_reply_size(unsigned int types, bool compact)
+{
+ ssize_t size;
+
+ size = ethnl_bitset32_size(&types, NULL, __ETHTOOL_UDP_TUNNEL_TYPE_CNT,
+ udp_tunnel_type_names, compact);
+ if (size < 0)
+ return size;
+
+ return size +
+ nla_total_size(0) + /* _UDP_TABLE */
+ nla_total_size(sizeof(u32)); /* _UDP_TABLE_SIZE */
+}
+
+static ssize_t
+ethnl_tunnel_info_reply_size(const struct ethnl_req_info *req_base,
+ struct netlink_ext_ack *extack)
+{
+ bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
+ const struct udp_tunnel_nic_info *info;
+ unsigned int i;
+ ssize_t ret;
+ size_t size;
+
+ info = req_base->dev->udp_tunnel_nic_info;
+ if (!info) {
+ NL_SET_ERR_MSG(extack,
+ "device does not report tunnel offload info");
+ return -EOPNOTSUPP;
+ }
+
+ size = nla_total_size(0); /* _INFO_UDP_PORTS */
+
+ for (i = 0; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) {
+ if (!info->tables[i].n_entries)
+ break;
+
+ ret = ethnl_udp_table_reply_size(info->tables[i].tunnel_types,
+ compact);
+ if (ret < 0)
+ return ret;
+ size += ret;
+
+ size += udp_tunnel_nic_dump_size(req_base->dev, i);
+ }
+
+ if (info->flags & UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN) {
+ ret = ethnl_udp_table_reply_size(0, compact);
+ if (ret < 0)
+ return ret;
+ size += ret;
+
+ size += nla_total_size(0) + /* _TABLE_ENTRY */
+ nla_total_size(sizeof(__be16)) + /* _ENTRY_PORT */
+ nla_total_size(sizeof(u32)); /* _ENTRY_TYPE */
+ }
+
+ return size;
+}
+
+static int
+ethnl_tunnel_info_fill_reply(const struct ethnl_req_info *req_base,
+ struct sk_buff *skb)
+{
+ bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
+ const struct udp_tunnel_nic_info *info;
+ struct nlattr *ports, *table, *entry;
+ unsigned int i;
+
+ info = req_base->dev->udp_tunnel_nic_info;
+ if (!info)
+ return -EOPNOTSUPP;
+
+ ports = nla_nest_start(skb, ETHTOOL_A_TUNNEL_INFO_UDP_PORTS);
+ if (!ports)
+ return -EMSGSIZE;
+
+ for (i = 0; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) {
+ if (!info->tables[i].n_entries)
+ break;
+
+ table = nla_nest_start(skb, ETHTOOL_A_TUNNEL_UDP_TABLE);
+ if (!table)
+ goto err_cancel_ports;
+
+ if (nla_put_u32(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE,
+ info->tables[i].n_entries))
+ goto err_cancel_table;
+
+ if (ethnl_put_bitset32(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES,
+ &info->tables[i].tunnel_types, NULL,
+ __ETHTOOL_UDP_TUNNEL_TYPE_CNT,
+ udp_tunnel_type_names, compact))
+ goto err_cancel_table;
+
+ if (udp_tunnel_nic_dump_write(req_base->dev, i, skb))
+ goto err_cancel_table;
+
+ nla_nest_end(skb, table);
+ }
+
+ if (info->flags & UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN) {
+ u32 zero = 0;
+
+ table = nla_nest_start(skb, ETHTOOL_A_TUNNEL_UDP_TABLE);
+ if (!table)
+ goto err_cancel_ports;
+
+ if (nla_put_u32(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE, 1))
+ goto err_cancel_table;
+
+ if (ethnl_put_bitset32(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES,
+ &zero, NULL,
+ __ETHTOOL_UDP_TUNNEL_TYPE_CNT,
+ udp_tunnel_type_names, compact))
+ goto err_cancel_table;
+
+ entry = nla_nest_start(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY);
+
+ if (nla_put_be16(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT,
+ htons(IANA_VXLAN_UDP_PORT)) ||
+ nla_put_u32(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE,
+ ilog2(UDP_TUNNEL_TYPE_VXLAN)))
+ goto err_cancel_entry;
+
+ nla_nest_end(skb, entry);
+ nla_nest_end(skb, table);
+ }
+
+ nla_nest_end(skb, ports);
+
+ return 0;
+
+err_cancel_entry:
+ nla_nest_cancel(skb, entry);
+err_cancel_table:
+ nla_nest_cancel(skb, table);
+err_cancel_ports:
+ nla_nest_cancel(skb, ports);
+ return -EMSGSIZE;
+}
+
+static int
+ethnl_tunnel_info_req_parse(struct ethnl_req_info *req_info,
+ const struct nlmsghdr *nlhdr, struct net *net,
+ struct netlink_ext_ack *extack, bool require_dev)
+{
+ struct nlattr *tb[ETHTOOL_A_TUNNEL_INFO_MAX + 1];
+ int ret;
+
+ ret = nlmsg_parse(nlhdr, GENL_HDRLEN, tb, ETHTOOL_A_TUNNEL_INFO_MAX,
+ ethtool_tunnel_info_policy, extack);
+ if (ret < 0)
+ return ret;
+
+ return ethnl_parse_header_dev_get(req_info,
+ tb[ETHTOOL_A_TUNNEL_INFO_HEADER],
+ net, extack, require_dev);
+}
+
+int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ethnl_req_info req_info = {};
+ struct sk_buff *rskb;
+ void *reply_payload;
+ int reply_len;
+ int ret;
+
+ ret = ethnl_tunnel_info_req_parse(&req_info, info->nlhdr,
+ genl_info_net(info), info->extack,
+ true);
+ if (ret < 0)
+ return ret;
+
+ rtnl_lock();
+ ret = ethnl_tunnel_info_reply_size(&req_info, info->extack);
+ if (ret < 0)
+ goto err_unlock_rtnl;
+ reply_len = ret + ethnl_reply_header_size();
+
+ rskb = ethnl_reply_init(reply_len, req_info.dev,
+ ETHTOOL_MSG_TUNNEL_INFO_GET,
+ ETHTOOL_A_TUNNEL_INFO_HEADER,
+ info, &reply_payload);
+ if (!rskb) {
+ ret = -ENOMEM;
+ goto err_unlock_rtnl;
+ }
+
+ ret = ethnl_tunnel_info_fill_reply(&req_info, rskb);
+ if (ret)
+ goto err_free_msg;
+ rtnl_unlock();
+ dev_put(req_info.dev);
+ genlmsg_end(rskb, reply_payload);
+
+ return genlmsg_reply(rskb, info);
+
+err_free_msg:
+ nlmsg_free(rskb);
+err_unlock_rtnl:
+ rtnl_unlock();
+ dev_put(req_info.dev);
+ return ret;
+}
+
+struct ethnl_tunnel_info_dump_ctx {
+ struct ethnl_req_info req_info;
+ int pos_hash;
+ int pos_idx;
+};
+
+int ethnl_tunnel_info_start(struct netlink_callback *cb)
+{
+ struct ethnl_tunnel_info_dump_ctx *ctx = (void *)cb->ctx;
+ int ret;
+
+ BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
+
+ memset(ctx, 0, sizeof(*ctx));
+
+ ret = ethnl_tunnel_info_req_parse(&ctx->req_info, cb->nlh,
+ sock_net(cb->skb->sk), cb->extack,
+ false);
+ if (ctx->req_info.dev) {
+ dev_put(ctx->req_info.dev);
+ ctx->req_info.dev = NULL;
+ }
+
+ return ret;
+}
+
+int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct ethnl_tunnel_info_dump_ctx *ctx = (void *)cb->ctx;
+ struct net *net = sock_net(skb->sk);
+ int s_idx = ctx->pos_idx;
+ int h, idx = 0;
+ int ret = 0;
+ void *ehdr;
+
+ rtnl_lock();
+ cb->seq = net->dev_base_seq;
+ for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+ struct hlist_head *head;
+ struct net_device *dev;
+
+ head = &net->dev_index_head[h];
+ idx = 0;
+ hlist_for_each_entry(dev, head, index_hlist) {
+ if (idx < s_idx)
+ goto cont;
+
+ ehdr = ethnl_dump_put(skb, cb,
+ ETHTOOL_MSG_TUNNEL_INFO_GET);
+ if (!ehdr) {
+ ret = -EMSGSIZE;
+ goto out;
+ }
+
+ ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_TUNNEL_INFO_HEADER);
+ if (ret < 0) {
+ genlmsg_cancel(skb, ehdr);
+ goto out;
+ }
+
+ ctx->req_info.dev = dev;
+ ret = ethnl_tunnel_info_fill_reply(&ctx->req_info, skb);
+ ctx->req_info.dev = NULL;
+ if (ret < 0) {
+ genlmsg_cancel(skb, ehdr);
+ if (ret == -EOPNOTSUPP)
+ goto cont;
+ goto out;
+ }
+ genlmsg_end(skb, ehdr);
+cont:
+ idx++;
+ }
+ }
+out:
+ rtnl_unlock();
+
+ ctx->pos_hash = h;
+ ctx->pos_idx = idx;
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+
+ if (ret == -EMSGSIZE && skb->len)
+ return skb->len;
+ return ret;
+}
diff --git a/net/hsr/Kconfig b/net/hsr/Kconfig
index 8095b034e76e..1b048c17b6c8 100644
--- a/net/hsr/Kconfig
+++ b/net/hsr/Kconfig
@@ -4,24 +4,35 @@
#
config HSR
- tristate "High-availability Seamless Redundancy (HSR)"
+ tristate "High-availability Seamless Redundancy (HSR & PRP)"
help
+ This enables IEC 62439 defined High-availability Seamless
+ Redundancy (HSR) and Parallel Redundancy Protocol (PRP).
+
If you say Y here, then your Linux box will be able to act as a
- DANH ("Doubly attached node implementing HSR"). For this to work,
- your Linux box needs (at least) two physical Ethernet interfaces,
- and it must be connected as a node in a ring network together with
- other HSR capable nodes.
+ DANH ("Doubly attached node implementing HSR") or DANP ("Doubly
+ attached node implementing PRP"). For this to work, your Linux box
+ needs (at least) two physical Ethernet interfaces.
+
+ For DANH, it must be connected as a node in a ring network together
+ with other HSR capable nodes. All Ethernet frames sent over the HSR
+ device will be sent in both directions on the ring (over both slave
+ ports), giving a redundant, instant fail-over network. Each HSR node
+ in the ring acts like a bridge for HSR frames, but filters frames
+ that have been forwarded earlier.
- All Ethernet frames sent over the hsr device will be sent in both
- directions on the ring (over both slave ports), giving a redundant,
- instant fail-over network. Each HSR node in the ring acts like a
- bridge for HSR frames, but filters frames that have been forwarded
- earlier.
+ For DANP, it must be connected as a node connecting to two
+ separate networks over the two slave interfaces. Like HSR, Ethernet
+ frames sent over the PRP device will be sent to both networks giving
+ a redundant, instant fail-over network. Unlike HSR, PRP networks
+ can have Singly Attached Nodes (SAN) such as PC, printer, bridges
+ etc and will be able to communicate with DANP nodes.
This code is a "best effort" to comply with the HSR standard as
described in IEC 62439-3:2010 (HSRv0) and IEC 62439-3:2012 (HSRv1),
- but no compliancy tests have been made. Use iproute2 to select
- the version you desire.
+ and PRP standard described in IEC 62439-4:2012 (PRP), but no
+ compliancy tests have been made. Use iproute2 to select the protocol
+ you would like to use.
You need to perform any and all necessary tests yourself before
relying on this code in a safety critical system!
diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c
index 9787ef11ca71..7e11a6c35bc3 100644
--- a/net/hsr/hsr_debugfs.c
+++ b/net/hsr/hsr_debugfs.c
@@ -1,5 +1,5 @@
/*
- * hsr_debugfs code
+ * debugfs code for HSR & PRP
* Copyright (C) 2019 Texas Instruments Incorporated
*
* Author(s):
@@ -22,12 +22,6 @@
static struct dentry *hsr_debugfs_root_dir;
-static void print_mac_address(struct seq_file *sfp, unsigned char *mac)
-{
- seq_printf(sfp, "%02x:%02x:%02x:%02x:%02x:%02x:",
- mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
-}
-
/* hsr_node_table_show - Formats and prints node_table entries */
static int
hsr_node_table_show(struct seq_file *sfp, void *data)
@@ -35,20 +29,32 @@ hsr_node_table_show(struct seq_file *sfp, void *data)
struct hsr_priv *priv = (struct hsr_priv *)sfp->private;
struct hsr_node *node;
- seq_puts(sfp, "Node Table entries\n");
- seq_puts(sfp, "MAC-Address-A, MAC-Address-B, time_in[A], ");
- seq_puts(sfp, "time_in[B], Address-B port\n");
+ seq_printf(sfp, "Node Table entries for (%s) device\n",
+ (priv->prot_version == PRP_V1 ? "PRP" : "HSR"));
+ seq_puts(sfp, "MAC-Address-A, MAC-Address-B, time_in[A], ");
+ seq_puts(sfp, "time_in[B], Address-B port, ");
+ if (priv->prot_version == PRP_V1)
+ seq_puts(sfp, "SAN-A, SAN-B, DAN-P\n");
+ else
+ seq_puts(sfp, "DAN-H\n");
+
rcu_read_lock();
list_for_each_entry_rcu(node, &priv->node_db, mac_list) {
/* skip self node */
if (hsr_addr_is_self(priv, node->macaddress_A))
continue;
- print_mac_address(sfp, &node->macaddress_A[0]);
- seq_puts(sfp, " ");
- print_mac_address(sfp, &node->macaddress_B[0]);
- seq_printf(sfp, "0x%lx, ", node->time_in[HSR_PT_SLAVE_A]);
- seq_printf(sfp, "0x%lx ", node->time_in[HSR_PT_SLAVE_B]);
- seq_printf(sfp, "0x%x\n", node->addr_B_port);
+ seq_printf(sfp, "%pM ", &node->macaddress_A[0]);
+ seq_printf(sfp, "%pM ", &node->macaddress_B[0]);
+ seq_printf(sfp, "%10lx, ", node->time_in[HSR_PT_SLAVE_A]);
+ seq_printf(sfp, "%10lx, ", node->time_in[HSR_PT_SLAVE_B]);
+ seq_printf(sfp, "%14x, ", node->addr_B_port);
+
+ if (priv->prot_version == PRP_V1)
+ seq_printf(sfp, "%5x, %5x, %5x\n",
+ node->san_a, node->san_b,
+ (node->san_a == 0 && node->san_b == 0));
+ else
+ seq_printf(sfp, "%5x\n", 1);
}
rcu_read_unlock();
return 0;
@@ -57,7 +63,8 @@ hsr_node_table_show(struct seq_file *sfp, void *data)
/* hsr_node_table_open - Open the node_table file
*
* Description:
- * This routine opens a debugfs file node_table of specific hsr device
+ * This routine opens a debugfs file node_table of specific hsr
+ * or prp device
*/
static int
hsr_node_table_open(struct inode *inode, struct file *filp)
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index a6f4e9f65b14..ab953a1a0d6c 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -3,9 +3,8 @@
*
* Author(s):
* 2011-2014 Arvid Brodin, arvid.brodin@alten.se
- *
* This file contains device methods for creating, using and destroying
- * virtual HSR devices.
+ * virtual HSR or PRP devices.
*/
#include <linux/netdevice.h>
@@ -210,7 +209,7 @@ static netdev_features_t hsr_fix_features(struct net_device *dev,
return hsr_features_recompute(hsr, features);
}
-static int hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct hsr_priv *hsr = netdev_priv(dev);
struct hsr_port *master;
@@ -231,66 +230,103 @@ static const struct header_ops hsr_header_ops = {
.parse = eth_header_parse,
};
-static void send_hsr_supervision_frame(struct hsr_port *master,
- u8 type, u8 hsr_ver)
+static struct sk_buff *hsr_init_skb(struct hsr_port *master, u16 proto)
{
+ struct hsr_priv *hsr = master->hsr;
struct sk_buff *skb;
int hlen, tlen;
- struct hsr_tag *hsr_tag;
- struct hsr_sup_tag *hsr_stag;
- struct hsr_sup_payload *hsr_sp;
- unsigned long irqflags;
hlen = LL_RESERVED_SPACE(master->dev);
tlen = master->dev->needed_tailroom;
+ /* skb size is same for PRP/HSR frames, only difference
+ * being, for PRP it is a trailer and for HSR it is a
+ * header
+ */
skb = dev_alloc_skb(sizeof(struct hsr_tag) +
sizeof(struct hsr_sup_tag) +
sizeof(struct hsr_sup_payload) + hlen + tlen);
if (!skb)
- return;
+ return skb;
skb_reserve(skb, hlen);
-
skb->dev = master->dev;
- skb->protocol = htons(hsr_ver ? ETH_P_HSR : ETH_P_PRP);
+ skb->protocol = htons(proto);
skb->priority = TC_PRIO_CONTROL;
- if (dev_hard_header(skb, skb->dev, (hsr_ver ? ETH_P_HSR : ETH_P_PRP),
- master->hsr->sup_multicast_addr,
+ if (dev_hard_header(skb, skb->dev, proto,
+ hsr->sup_multicast_addr,
skb->dev->dev_addr, skb->len) <= 0)
goto out;
+
skb_reset_mac_header(skb);
skb_reset_network_header(skb);
skb_reset_transport_header(skb);
- if (hsr_ver > 0) {
+ return skb;
+out:
+ kfree_skb(skb);
+
+ return NULL;
+}
+
+static void send_hsr_supervision_frame(struct hsr_port *master,
+ unsigned long *interval)
+{
+ struct hsr_priv *hsr = master->hsr;
+ __u8 type = HSR_TLV_LIFE_CHECK;
+ struct hsr_tag *hsr_tag = NULL;
+ struct hsr_sup_payload *hsr_sp;
+ struct hsr_sup_tag *hsr_stag;
+ unsigned long irqflags;
+ struct sk_buff *skb;
+ u16 proto;
+
+ *interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL);
+ if (hsr->announce_count < 3 && hsr->prot_version == 0) {
+ type = HSR_TLV_ANNOUNCE;
+ *interval = msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
+ hsr->announce_count++;
+ }
+
+ if (!hsr->prot_version)
+ proto = ETH_P_PRP;
+ else
+ proto = ETH_P_HSR;
+
+ skb = hsr_init_skb(master, proto);
+ if (!skb) {
+ WARN_ONCE(1, "HSR: Could not send supervision frame\n");
+ return;
+ }
+
+ if (hsr->prot_version > 0) {
hsr_tag = skb_put(skb, sizeof(struct hsr_tag));
hsr_tag->encap_proto = htons(ETH_P_PRP);
set_hsr_tag_LSDU_size(hsr_tag, HSR_V1_SUP_LSDUSIZE);
}
hsr_stag = skb_put(skb, sizeof(struct hsr_sup_tag));
- set_hsr_stag_path(hsr_stag, (hsr_ver ? 0x0 : 0xf));
- set_hsr_stag_HSR_ver(hsr_stag, hsr_ver);
+ set_hsr_stag_path(hsr_stag, (hsr->prot_version ? 0x0 : 0xf));
+ set_hsr_stag_HSR_ver(hsr_stag, hsr->prot_version);
/* From HSRv1 on we have separate supervision sequence numbers. */
spin_lock_irqsave(&master->hsr->seqnr_lock, irqflags);
- if (hsr_ver > 0) {
- hsr_stag->sequence_nr = htons(master->hsr->sup_sequence_nr);
- hsr_tag->sequence_nr = htons(master->hsr->sequence_nr);
- master->hsr->sup_sequence_nr++;
- master->hsr->sequence_nr++;
+ if (hsr->prot_version > 0) {
+ hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr);
+ hsr->sup_sequence_nr++;
+ hsr_tag->sequence_nr = htons(hsr->sequence_nr);
+ hsr->sequence_nr++;
} else {
- hsr_stag->sequence_nr = htons(master->hsr->sequence_nr);
- master->hsr->sequence_nr++;
+ hsr_stag->sequence_nr = htons(hsr->sequence_nr);
+ hsr->sequence_nr++;
}
spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags);
hsr_stag->HSR_TLV_type = type;
/* TODO: Why 12 in HSRv0? */
- hsr_stag->HSR_TLV_length =
- hsr_ver ? sizeof(struct hsr_sup_payload) : 12;
+ hsr_stag->HSR_TLV_length = hsr->prot_version ?
+ sizeof(struct hsr_sup_payload) : 12;
/* Payload: MacAddressA */
hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload));
@@ -300,11 +336,57 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
return;
hsr_forward_skb(skb, master);
+
return;
+}
-out:
- WARN_ONCE(1, "HSR: Could not send supervision frame\n");
- kfree_skb(skb);
+static void send_prp_supervision_frame(struct hsr_port *master,
+ unsigned long *interval)
+{
+ struct hsr_priv *hsr = master->hsr;
+ struct hsr_sup_payload *hsr_sp;
+ struct hsr_sup_tag *hsr_stag;
+ unsigned long irqflags;
+ struct sk_buff *skb;
+ struct prp_rct *rct;
+ u8 *tail;
+
+ skb = hsr_init_skb(master, ETH_P_PRP);
+ if (!skb) {
+ WARN_ONCE(1, "PRP: Could not send supervision frame\n");
+ return;
+ }
+
+ *interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL);
+ hsr_stag = skb_put(skb, sizeof(struct hsr_sup_tag));
+ set_hsr_stag_path(hsr_stag, (hsr->prot_version ? 0x0 : 0xf));
+ set_hsr_stag_HSR_ver(hsr_stag, (hsr->prot_version ? 1 : 0));
+
+ /* From HSRv1 on we have separate supervision sequence numbers. */
+ spin_lock_irqsave(&master->hsr->seqnr_lock, irqflags);
+ hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr);
+ hsr->sup_sequence_nr++;
+ hsr_stag->HSR_TLV_type = PRP_TLV_LIFE_CHECK_DD;
+ hsr_stag->HSR_TLV_length = sizeof(struct hsr_sup_payload);
+
+ /* Payload: MacAddressA */
+ hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload));
+ ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr);
+
+ if (skb_put_padto(skb, ETH_ZLEN + HSR_HLEN)) {
+ spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags);
+ return;
+ }
+
+ tail = skb_tail_pointer(skb) - HSR_HLEN;
+ rct = (struct prp_rct *)tail;
+ rct->PRP_suffix = htons(ETH_P_PRP);
+ set_prp_LSDU_size(rct, HSR_V1_SUP_LSDUSIZE);
+ rct->sequence_nr = htons(hsr->sequence_nr);
+ hsr->sequence_nr++;
+ spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags);
+
+ hsr_forward_skb(skb, master);
}
/* Announce (supervision frame) timer function
@@ -319,19 +401,7 @@ static void hsr_announce(struct timer_list *t)
rcu_read_lock();
master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
-
- if (hsr->announce_count < 3 && hsr->prot_version == 0) {
- send_hsr_supervision_frame(master, HSR_TLV_ANNOUNCE,
- hsr->prot_version);
- hsr->announce_count++;
-
- interval = msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
- } else {
- send_hsr_supervision_frame(master, HSR_TLV_LIFE_CHECK,
- hsr->prot_version);
-
- interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL);
- }
+ hsr->proto_ops->send_sv_frame(master, &interval);
if (is_admin_up(master->dev))
mod_timer(&hsr->announce_timer, jiffies + interval);
@@ -368,6 +438,24 @@ static struct device_type hsr_type = {
.name = "hsr",
};
+static struct hsr_proto_ops hsr_ops = {
+ .send_sv_frame = send_hsr_supervision_frame,
+ .create_tagged_frame = hsr_create_tagged_frame,
+ .get_untagged_frame = hsr_get_untagged_frame,
+ .fill_frame_info = hsr_fill_frame_info,
+ .invalid_dan_ingress_frame = hsr_invalid_dan_ingress_frame,
+};
+
+static struct hsr_proto_ops prp_ops = {
+ .send_sv_frame = send_prp_supervision_frame,
+ .create_tagged_frame = prp_create_tagged_frame,
+ .get_untagged_frame = prp_get_untagged_frame,
+ .drop_frame = prp_drop_frame,
+ .fill_frame_info = prp_fill_frame_info,
+ .handle_san_frame = prp_handle_san_frame,
+ .update_san_info = prp_update_san_info,
+};
+
void hsr_dev_setup(struct net_device *dev)
{
eth_hw_addr_random(dev);
@@ -427,6 +515,17 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
ether_addr_copy(hsr_dev->dev_addr, slave[0]->dev_addr);
+ /* initialize protocol specific functions */
+ if (protocol_version == PRP_V1) {
+ /* For PRP, lan_id has most significant 3 bits holding
+ * the net_id of PRP_LAN_ID
+ */
+ hsr->net_id = PRP_LAN_ID << 1;
+ hsr->proto_ops = &prp_ops;
+ } else {
+ hsr->proto_ops = &hsr_ops;
+ }
+
/* Make sure we recognize frames from ourselves in hsr_rcv() */
res = hsr_create_self_node(hsr, hsr_dev->dev_addr,
slave[1]->dev_addr);
diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h
index b8f9262ed101..868373822ee4 100644
--- a/net/hsr/hsr_device.h
+++ b/net/hsr/hsr_device.h
@@ -3,6 +3,8 @@
*
* Author(s):
* 2011-2014 Arvid Brodin, arvid.brodin@alten.se
+ *
+ * include file for HSR and PRP.
*/
#ifndef __HSR_DEVICE_H
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 1ea17752fffc..cadfccd7876e 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -3,6 +3,8 @@
*
* Author(s):
* 2011-2014 Arvid Brodin, arvid.brodin@alten.se
+ *
+ * Frame router for HSR and PRP.
*/
#include "hsr_forward.h"
@@ -15,18 +17,6 @@
struct hsr_node;
-struct hsr_frame_info {
- struct sk_buff *skb_std;
- struct sk_buff *skb_hsr;
- struct hsr_port *port_rcv;
- struct hsr_node *node_src;
- u16 sequence_nr;
- bool is_supervision;
- bool is_vlan;
- bool is_local_dest;
- bool is_local_exclusive;
-};
-
/* The uses I can see for these HSR supervision frames are:
* 1) Use the frames that are sent after node initialization ("HSR_TLV.Type =
* 22") to reset any sequence_nr counters belonging to that node. Useful if
@@ -74,7 +64,9 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb)
}
if (hsr_sup_tag->HSR_TLV_type != HSR_TLV_ANNOUNCE &&
- hsr_sup_tag->HSR_TLV_type != HSR_TLV_LIFE_CHECK)
+ hsr_sup_tag->HSR_TLV_type != HSR_TLV_LIFE_CHECK &&
+ hsr_sup_tag->HSR_TLV_type != PRP_TLV_LIFE_CHECK_DD &&
+ hsr_sup_tag->HSR_TLV_type != PRP_TLV_LIFE_CHECK_DA)
return false;
if (hsr_sup_tag->HSR_TLV_length != 12 &&
hsr_sup_tag->HSR_TLV_length != sizeof(struct hsr_sup_payload))
@@ -83,8 +75,8 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb)
return true;
}
-static struct sk_buff *create_stripped_skb(struct sk_buff *skb_in,
- struct hsr_frame_info *frame)
+static struct sk_buff *create_stripped_skb_hsr(struct sk_buff *skb_in,
+ struct hsr_frame_info *frame)
{
struct sk_buff *skb;
int copylen;
@@ -112,38 +104,123 @@ static struct sk_buff *create_stripped_skb(struct sk_buff *skb_in,
return skb;
}
-static struct sk_buff *frame_get_stripped_skb(struct hsr_frame_info *frame,
- struct hsr_port *port)
+struct sk_buff *hsr_get_untagged_frame(struct hsr_frame_info *frame,
+ struct hsr_port *port)
{
- if (!frame->skb_std)
- frame->skb_std = create_stripped_skb(frame->skb_hsr, frame);
+ if (!frame->skb_std) {
+ if (frame->skb_hsr) {
+ frame->skb_std =
+ create_stripped_skb_hsr(frame->skb_hsr, frame);
+ } else {
+ /* Unexpected */
+ WARN_ONCE(1, "%s:%d: Unexpected frame received (port_src %s)\n",
+ __FILE__, __LINE__, port->dev->name);
+ return NULL;
+ }
+ }
+
return skb_clone(frame->skb_std, GFP_ATOMIC);
}
+struct sk_buff *prp_get_untagged_frame(struct hsr_frame_info *frame,
+ struct hsr_port *port)
+{
+ if (!frame->skb_std) {
+ if (frame->skb_prp) {
+ /* trim the skb by len - HSR_HLEN to exclude RCT */
+ skb_trim(frame->skb_prp,
+ frame->skb_prp->len - HSR_HLEN);
+ frame->skb_std =
+ __pskb_copy(frame->skb_prp,
+ skb_headroom(frame->skb_prp),
+ GFP_ATOMIC);
+ } else {
+ /* Unexpected */
+ WARN_ONCE(1, "%s:%d: Unexpected frame received (port_src %s)\n",
+ __FILE__, __LINE__, port->dev->name);
+ return NULL;
+ }
+ }
+
+ return skb_clone(frame->skb_std, GFP_ATOMIC);
+}
+
+static void prp_set_lan_id(struct prp_rct *trailer,
+ struct hsr_port *port)
+{
+ int lane_id;
+
+ if (port->type == HSR_PT_SLAVE_A)
+ lane_id = 0;
+ else
+ lane_id = 1;
+
+ /* Add net_id in the upper 3 bits of lane_id */
+ lane_id |= port->hsr->net_id;
+ set_prp_lan_id(trailer, lane_id);
+}
+
+/* Tailroom for PRP rct should have been created before calling this */
+static struct sk_buff *prp_fill_rct(struct sk_buff *skb,
+ struct hsr_frame_info *frame,
+ struct hsr_port *port)
+{
+ struct prp_rct *trailer;
+ int min_size = ETH_ZLEN;
+ int lsdu_size;
+
+ if (!skb)
+ return skb;
+
+ if (frame->is_vlan)
+ min_size = VLAN_ETH_ZLEN;
+
+ if (skb_put_padto(skb, min_size))
+ return NULL;
+
+ trailer = (struct prp_rct *)skb_put(skb, HSR_HLEN);
+ lsdu_size = skb->len - 14;
+ if (frame->is_vlan)
+ lsdu_size -= 4;
+ prp_set_lan_id(trailer, port);
+ set_prp_LSDU_size(trailer, lsdu_size);
+ trailer->sequence_nr = htons(frame->sequence_nr);
+ trailer->PRP_suffix = htons(ETH_P_PRP);
+
+ return skb;
+}
+
+static void hsr_set_path_id(struct hsr_ethhdr *hsr_ethhdr,
+ struct hsr_port *port)
+{
+ int path_id;
+
+ if (port->type == HSR_PT_SLAVE_A)
+ path_id = 0;
+ else
+ path_id = 1;
+
+ set_hsr_tag_path(&hsr_ethhdr->hsr_tag, path_id);
+}
+
static struct sk_buff *hsr_fill_tag(struct sk_buff *skb,
struct hsr_frame_info *frame,
struct hsr_port *port, u8 proto_version)
{
struct hsr_ethhdr *hsr_ethhdr;
- int lane_id;
int lsdu_size;
/* pad to minimum packet size which is 60 + 6 (HSR tag) */
if (skb_put_padto(skb, ETH_ZLEN + HSR_HLEN))
return NULL;
- if (port->type == HSR_PT_SLAVE_A)
- lane_id = 0;
- else
- lane_id = 1;
-
lsdu_size = skb->len - 14;
if (frame->is_vlan)
lsdu_size -= 4;
hsr_ethhdr = (struct hsr_ethhdr *)skb_mac_header(skb);
- set_hsr_tag_path(&hsr_ethhdr->hsr_tag, lane_id);
+ hsr_set_path_id(hsr_ethhdr, port);
set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, lsdu_size);
hsr_ethhdr->hsr_tag.sequence_nr = htons(frame->sequence_nr);
hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto;
@@ -153,16 +230,28 @@ static struct sk_buff *hsr_fill_tag(struct sk_buff *skb,
return skb;
}
-static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o,
- struct hsr_frame_info *frame,
- struct hsr_port *port)
+/* If the original frame was an HSR tagged frame, just clone it to be sent
+ * unchanged. Otherwise, create a private frame especially tagged for 'port'.
+ */
+struct sk_buff *hsr_create_tagged_frame(struct hsr_frame_info *frame,
+ struct hsr_port *port)
{
- int movelen;
unsigned char *dst, *src;
struct sk_buff *skb;
+ int movelen;
+
+ if (frame->skb_hsr) {
+ struct hsr_ethhdr *hsr_ethhdr =
+ (struct hsr_ethhdr *)skb_mac_header(frame->skb_hsr);
+
+ /* set the lane id properly */
+ hsr_set_path_id(hsr_ethhdr, port);
+ return skb_clone(frame->skb_hsr, GFP_ATOMIC);
+ }
/* Create the new skb with enough headroom to fit the HSR tag */
- skb = __pskb_copy(skb_o, skb_headroom(skb_o) + HSR_HLEN, GFP_ATOMIC);
+ skb = __pskb_copy(frame->skb_std,
+ skb_headroom(frame->skb_std) + HSR_HLEN, GFP_ATOMIC);
if (!skb)
return NULL;
skb_reset_mac_header(skb);
@@ -185,21 +274,29 @@ static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o,
return hsr_fill_tag(skb, frame, port, port->hsr->prot_version);
}
-/* If the original frame was an HSR tagged frame, just clone it to be sent
- * unchanged. Otherwise, create a private frame especially tagged for 'port'.
- */
-static struct sk_buff *frame_get_tagged_skb(struct hsr_frame_info *frame,
- struct hsr_port *port)
+struct sk_buff *prp_create_tagged_frame(struct hsr_frame_info *frame,
+ struct hsr_port *port)
{
- if (frame->skb_hsr)
- return skb_clone(frame->skb_hsr, GFP_ATOMIC);
+ struct sk_buff *skb;
- if (port->type != HSR_PT_SLAVE_A && port->type != HSR_PT_SLAVE_B) {
- WARN_ONCE(1, "HSR: Bug: trying to create a tagged frame for a non-ring port");
- return NULL;
+ if (frame->skb_prp) {
+ struct prp_rct *trailer = skb_get_PRP_rct(frame->skb_prp);
+
+ if (trailer) {
+ prp_set_lan_id(trailer, port);
+ } else {
+ WARN_ONCE(!trailer, "errored PRP skb");
+ return NULL;
+ }
+ return skb_clone(frame->skb_prp, GFP_ATOMIC);
}
- return create_tagged_skb(frame->skb_std, frame, port);
+ skb = skb_copy_expand(frame->skb_std, 0,
+ skb_tailroom(frame->skb_std) + HSR_HLEN,
+ GFP_ATOMIC);
+ prp_fill_rct(skb, frame, port);
+
+ return skb;
}
static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev,
@@ -236,9 +333,18 @@ static int hsr_xmit(struct sk_buff *skb, struct hsr_port *port,
return dev_queue_xmit(skb);
}
+bool prp_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port)
+{
+ return ((frame->port_rcv->type == HSR_PT_SLAVE_A &&
+ port->type == HSR_PT_SLAVE_B) ||
+ (frame->port_rcv->type == HSR_PT_SLAVE_B &&
+ port->type == HSR_PT_SLAVE_A));
+}
+
/* Forward the frame through all devices except:
* - Back through the receiving device
* - If it's a HSR frame: through a device where it has passed before
+ * - if it's a PRP frame: through another PRP slave device (no bridge)
* - To the local HSR master only if the frame is directly addressed to it, or
* a non-supervision multicast or broadcast frame.
*
@@ -253,6 +359,7 @@ static void hsr_forward_do(struct hsr_frame_info *frame)
struct sk_buff *skb;
hsr_for_each_port(frame->port_rcv->hsr, port) {
+ struct hsr_priv *hsr = port->hsr;
/* Don't send frame back the way it came */
if (port == frame->port_rcv)
continue;
@@ -265,24 +372,33 @@ static void hsr_forward_do(struct hsr_frame_info *frame)
if (port->type != HSR_PT_MASTER && frame->is_local_exclusive)
continue;
- /* Don't send frame over port where it has been sent before */
- if (hsr_register_frame_out(port, frame->node_src,
+ /* Don't send frame over port where it has been sent before.
+ * Also fro SAN, this shouldn't be done.
+ */
+ if (!frame->is_from_san &&
+ hsr_register_frame_out(port, frame->node_src,
frame->sequence_nr))
continue;
if (frame->is_supervision && port->type == HSR_PT_MASTER) {
- hsr_handle_sup_frame(frame->skb_hsr,
- frame->node_src,
- frame->port_rcv);
+ hsr_handle_sup_frame(frame);
continue;
}
+ /* Check if frame is to be dropped. Eg. for PRP no forward
+ * between ports.
+ */
+ if (hsr->proto_ops->drop_frame &&
+ hsr->proto_ops->drop_frame(frame, port))
+ continue;
+
if (port->type != HSR_PT_MASTER)
- skb = frame_get_tagged_skb(frame, port);
+ skb = hsr->proto_ops->create_tagged_frame(frame, port);
else
- skb = frame_get_stripped_skb(frame, port);
+ skb = hsr->proto_ops->get_untagged_frame(frame, port);
+
if (!skb) {
- /* FIXME: Record the dropped frame? */
+ frame->port_rcv->dev->stats.rx_dropped++;
continue;
}
@@ -313,40 +429,95 @@ static void check_local_dest(struct hsr_priv *hsr, struct sk_buff *skb,
}
}
-static int hsr_fill_frame_info(struct hsr_frame_info *frame,
- struct sk_buff *skb, struct hsr_port *port)
+static void handle_std_frame(struct sk_buff *skb,
+ struct hsr_frame_info *frame)
{
- struct ethhdr *ethhdr;
+ struct hsr_port *port = frame->port_rcv;
+ struct hsr_priv *hsr = port->hsr;
unsigned long irqflags;
+ frame->skb_hsr = NULL;
+ frame->skb_prp = NULL;
+ frame->skb_std = skb;
+
+ if (port->type != HSR_PT_MASTER) {
+ frame->is_from_san = true;
+ } else {
+ /* Sequence nr for the master node */
+ spin_lock_irqsave(&hsr->seqnr_lock, irqflags);
+ frame->sequence_nr = hsr->sequence_nr;
+ hsr->sequence_nr++;
+ spin_unlock_irqrestore(&hsr->seqnr_lock, irqflags);
+ }
+}
+
+void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb,
+ struct hsr_frame_info *frame)
+{
+ if (proto == htons(ETH_P_PRP) ||
+ proto == htons(ETH_P_HSR)) {
+ /* HSR tagged frame :- Data or Supervision */
+ frame->skb_std = NULL;
+ frame->skb_prp = NULL;
+ frame->skb_hsr = skb;
+ frame->sequence_nr = hsr_get_skb_sequence_nr(skb);
+ return;
+ }
+
+ /* Standard frame or PRP from master port */
+ handle_std_frame(skb, frame);
+}
+
+void prp_fill_frame_info(__be16 proto, struct sk_buff *skb,
+ struct hsr_frame_info *frame)
+{
+ /* Supervision frame */
+ struct prp_rct *rct = skb_get_PRP_rct(skb);
+
+ if (rct &&
+ prp_check_lsdu_size(skb, rct, frame->is_supervision)) {
+ frame->skb_hsr = NULL;
+ frame->skb_std = NULL;
+ frame->skb_prp = skb;
+ frame->sequence_nr = prp_get_skb_sequence_nr(rct);
+ return;
+ }
+ handle_std_frame(skb, frame);
+}
+
+static int fill_frame_info(struct hsr_frame_info *frame,
+ struct sk_buff *skb, struct hsr_port *port)
+{
+ struct hsr_priv *hsr = port->hsr;
+ struct hsr_vlan_ethhdr *vlan_hdr;
+ struct ethhdr *ethhdr;
+ __be16 proto;
+
+ memset(frame, 0, sizeof(*frame));
frame->is_supervision = is_supervision_frame(port->hsr, skb);
- frame->node_src = hsr_get_node(port, skb, frame->is_supervision);
+ frame->node_src = hsr_get_node(port, &hsr->node_db, skb,
+ frame->is_supervision,
+ port->type);
if (!frame->node_src)
return -1; /* Unknown node and !is_supervision, or no mem */
ethhdr = (struct ethhdr *)skb_mac_header(skb);
frame->is_vlan = false;
- if (ethhdr->h_proto == htons(ETH_P_8021Q)) {
+ proto = ethhdr->h_proto;
+
+ if (proto == htons(ETH_P_8021Q))
frame->is_vlan = true;
+
+ if (frame->is_vlan) {
+ vlan_hdr = (struct hsr_vlan_ethhdr *)ethhdr;
+ proto = vlan_hdr->vlanhdr.h_vlan_encapsulated_proto;
/* FIXME: */
netdev_warn_once(skb->dev, "VLAN not yet supported");
}
- if (ethhdr->h_proto == htons(ETH_P_PRP) ||
- ethhdr->h_proto == htons(ETH_P_HSR)) {
- frame->skb_std = NULL;
- frame->skb_hsr = skb;
- frame->sequence_nr = hsr_get_skb_sequence_nr(skb);
- } else {
- frame->skb_std = skb;
- frame->skb_hsr = NULL;
- /* Sequence nr for the master node */
- spin_lock_irqsave(&port->hsr->seqnr_lock, irqflags);
- frame->sequence_nr = port->hsr->sequence_nr;
- port->hsr->sequence_nr++;
- spin_unlock_irqrestore(&port->hsr->seqnr_lock, irqflags);
- }
+ frame->is_from_san = false;
frame->port_rcv = port;
+ hsr->proto_ops->fill_frame_info(proto, skb, frame);
check_local_dest(port->hsr, skb, frame);
return 0;
@@ -363,8 +534,9 @@ void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port)
goto out_drop;
}
- if (hsr_fill_frame_info(&frame, skb, port) < 0)
+ if (fill_frame_info(&frame, skb, port) < 0)
goto out_drop;
+
hsr_register_frame_in(frame.node_src, port, frame.sequence_nr);
hsr_forward_do(&frame);
/* Gets called for ingress frames as well as egress from master port.
@@ -375,10 +547,9 @@ void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port)
port->dev->stats.tx_bytes += skb->len;
}
- if (frame.skb_hsr)
- kfree_skb(frame.skb_hsr);
- if (frame.skb_std)
- kfree_skb(frame.skb_std);
+ kfree_skb(frame.skb_hsr);
+ kfree_skb(frame.skb_prp);
+ kfree_skb(frame.skb_std);
return;
out_drop:
diff --git a/net/hsr/hsr_forward.h b/net/hsr/hsr_forward.h
index 51a69295566c..618140d484ad 100644
--- a/net/hsr/hsr_forward.h
+++ b/net/hsr/hsr_forward.h
@@ -3,6 +3,8 @@
*
* Author(s):
* 2011-2014 Arvid Brodin, arvid.brodin@alten.se
+ *
+ * include file for HSR and PRP.
*/
#ifndef __HSR_FORWARD_H
@@ -12,5 +14,17 @@
#include "hsr_main.h"
void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port);
-
+struct sk_buff *prp_create_tagged_frame(struct hsr_frame_info *frame,
+ struct hsr_port *port);
+struct sk_buff *hsr_create_tagged_frame(struct hsr_frame_info *frame,
+ struct hsr_port *port);
+struct sk_buff *hsr_get_untagged_frame(struct hsr_frame_info *frame,
+ struct hsr_port *port);
+struct sk_buff *prp_get_untagged_frame(struct hsr_frame_info *frame,
+ struct hsr_port *port);
+bool prp_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port);
+void prp_fill_frame_info(__be16 proto, struct sk_buff *skb,
+ struct hsr_frame_info *frame);
+void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb,
+ struct hsr_frame_info *frame);
#endif /* __HSR_FORWARD_H */
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 530de24b1fb5..5c97de459905 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -8,6 +8,7 @@
* interface. A frame is identified by its source MAC address and its HSR
* sequence number. This code keeps track of senders and their sequence numbers
* to allow filtering of duplicate frames, and to detect HSR ring errors.
+ * Same code handles filtering of duplicates for PRP as well.
*/
#include <linux/if_ether.h>
@@ -126,6 +127,19 @@ void hsr_del_nodes(struct list_head *node_db)
kfree(node);
}
+void prp_handle_san_frame(bool san, enum hsr_port_type port,
+ struct hsr_node *node)
+{
+ /* Mark if the SAN node is over LAN_A or LAN_B */
+ if (port == HSR_PT_SLAVE_A) {
+ node->san_a = true;
+ return;
+ }
+
+ if (port == HSR_PT_SLAVE_B)
+ node->san_b = true;
+}
+
/* Allocate an hsr_node and add it to node_db. 'addr' is the node's address_A;
* seq_out is used to initialize filtering of outgoing duplicate frames
* originating from the newly added node.
@@ -133,7 +147,8 @@ void hsr_del_nodes(struct list_head *node_db)
static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
struct list_head *node_db,
unsigned char addr[],
- u16 seq_out)
+ u16 seq_out, bool san,
+ enum hsr_port_type rx_port)
{
struct hsr_node *new_node, *node;
unsigned long now;
@@ -154,6 +169,9 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
for (i = 0; i < HSR_PT_PORTS; i++)
new_node->seq_out[i] = seq_out;
+ if (san && hsr->proto_ops->handle_san_frame)
+ hsr->proto_ops->handle_san_frame(san, rx_port, new_node);
+
spin_lock_bh(&hsr->list_lock);
list_for_each_entry_rcu(node, node_db, mac_list,
lockdep_is_held(&hsr->list_lock)) {
@@ -171,15 +189,26 @@ out:
return node;
}
+void prp_update_san_info(struct hsr_node *node, bool is_sup)
+{
+ if (!is_sup)
+ return;
+
+ node->san_a = false;
+ node->san_b = false;
+}
+
/* Get the hsr_node from which 'skb' was sent.
*/
-struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
- bool is_sup)
+struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db,
+ struct sk_buff *skb, bool is_sup,
+ enum hsr_port_type rx_port)
{
- struct list_head *node_db = &port->hsr->node_db;
struct hsr_priv *hsr = port->hsr;
struct hsr_node *node;
struct ethhdr *ethhdr;
+ struct prp_rct *rct;
+ bool san = false;
u16 seq_out;
if (!skb_mac_header_was_set(skb))
@@ -188,14 +217,21 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
ethhdr = (struct ethhdr *)skb_mac_header(skb);
list_for_each_entry_rcu(node, node_db, mac_list) {
- if (ether_addr_equal(node->macaddress_A, ethhdr->h_source))
+ if (ether_addr_equal(node->macaddress_A, ethhdr->h_source)) {
+ if (hsr->proto_ops->update_san_info)
+ hsr->proto_ops->update_san_info(node, is_sup);
return node;
- if (ether_addr_equal(node->macaddress_B, ethhdr->h_source))
+ }
+ if (ether_addr_equal(node->macaddress_B, ethhdr->h_source)) {
+ if (hsr->proto_ops->update_san_info)
+ hsr->proto_ops->update_san_info(node, is_sup);
return node;
+ }
}
- /* Everyone may create a node entry, connected node to a HSR device. */
-
+ /* Everyone may create a node entry, connected node to a HSR/PRP
+ * device.
+ */
if (ethhdr->h_proto == htons(ETH_P_PRP) ||
ethhdr->h_proto == htons(ETH_P_HSR)) {
/* Use the existing sequence_nr from the tag as starting point
@@ -203,31 +239,47 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
*/
seq_out = hsr_get_skb_sequence_nr(skb) - 1;
} else {
- /* this is called also for frames from master port and
- * so warn only for non master ports
- */
- if (port->type != HSR_PT_MASTER)
- WARN_ONCE(1, "%s: Non-HSR frame\n", __func__);
- seq_out = HSR_SEQNR_START;
+ rct = skb_get_PRP_rct(skb);
+ if (rct && prp_check_lsdu_size(skb, rct, is_sup)) {
+ seq_out = prp_get_skb_sequence_nr(rct);
+ } else {
+ if (rx_port != HSR_PT_MASTER)
+ san = true;
+ seq_out = HSR_SEQNR_START;
+ }
}
- return hsr_add_node(hsr, node_db, ethhdr->h_source, seq_out);
+ return hsr_add_node(hsr, node_db, ethhdr->h_source, seq_out,
+ san, rx_port);
}
/* Use the Supervision frame's info about an eventual macaddress_B for merging
* nodes that has previously had their macaddress_B registered as a separate
* node.
*/
-void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
- struct hsr_port *port_rcv)
+void hsr_handle_sup_frame(struct hsr_frame_info *frame)
{
+ struct hsr_node *node_curr = frame->node_src;
+ struct hsr_port *port_rcv = frame->port_rcv;
struct hsr_priv *hsr = port_rcv->hsr;
struct hsr_sup_payload *hsr_sp;
struct hsr_node *node_real;
+ struct sk_buff *skb = NULL;
struct list_head *node_db;
struct ethhdr *ethhdr;
int i;
+ /* Here either frame->skb_hsr or frame->skb_prp should be
+ * valid as supervision frame always will have protocol
+ * header info.
+ */
+ if (frame->skb_hsr)
+ skb = frame->skb_hsr;
+ else if (frame->skb_prp)
+ skb = frame->skb_prp;
+ if (!skb)
+ return;
+
ethhdr = (struct ethhdr *)skb_mac_header(skb);
/* Leave the ethernet header. */
@@ -248,7 +300,8 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
if (!node_real)
/* No frame received from AddrA of this node yet */
node_real = hsr_add_node(hsr, node_db, hsr_sp->macaddress_A,
- HSR_SEQNR_START - 1);
+ HSR_SEQNR_START - 1, true,
+ port_rcv->type);
if (!node_real)
goto done; /* No mem */
if (node_real == node_curr)
@@ -274,7 +327,11 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
kfree_rcu(node_curr, rcu_head);
done:
- skb_push(skb, sizeof(struct hsrv1_ethhdr_sp));
+ /* PRP uses v0 header */
+ if (ethhdr->h_proto == htons(ETH_P_HSR))
+ skb_push(skb, sizeof(struct hsrv1_ethhdr_sp));
+ else
+ skb_push(skb, sizeof(struct hsrv0_ethhdr_sp));
}
/* 'skb' is a frame meant for this host, that is to be passed to upper layers.
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index 0f0fa12b4329..86b43f539f2c 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -3,6 +3,8 @@
*
* Author(s):
* 2011-2014 Arvid Brodin, arvid.brodin@alten.se
+ *
+ * include file for HSR and PRP.
*/
#ifndef __HSR_FRAMEREG_H
@@ -12,12 +14,26 @@
struct hsr_node;
+struct hsr_frame_info {
+ struct sk_buff *skb_std;
+ struct sk_buff *skb_hsr;
+ struct sk_buff *skb_prp;
+ struct hsr_port *port_rcv;
+ struct hsr_node *node_src;
+ u16 sequence_nr;
+ bool is_supervision;
+ bool is_vlan;
+ bool is_local_dest;
+ bool is_local_exclusive;
+ bool is_from_san;
+};
+
void hsr_del_self_node(struct hsr_priv *hsr);
void hsr_del_nodes(struct list_head *node_db);
-struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
- bool is_sup);
-void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
- struct hsr_port *port);
+struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db,
+ struct sk_buff *skb, bool is_sup,
+ enum hsr_port_type rx_port);
+void hsr_handle_sup_frame(struct hsr_frame_info *frame);
bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr);
void hsr_addr_subst_source(struct hsr_node *node, struct sk_buff *skb);
@@ -47,6 +63,10 @@ int hsr_get_node_data(struct hsr_priv *hsr,
int *if2_age,
u16 *if2_seq);
+void prp_handle_san_frame(bool san, enum hsr_port_type port,
+ struct hsr_node *node);
+void prp_update_san_info(struct hsr_node *node, bool is_sup);
+
struct hsr_node {
struct list_head mac_list;
unsigned char macaddress_A[ETH_ALEN];
@@ -55,6 +75,9 @@ struct hsr_node {
enum hsr_port_type addr_B_port;
unsigned long time_in[HSR_PT_PORTS];
bool time_in_stale[HSR_PT_PORTS];
+ /* if the node is a SAN */
+ bool san_a;
+ bool san_b;
u16 seq_out[HSR_PT_PORTS];
struct rcu_head rcu_head;
};
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index 144da15f0a81..2fd1976e5b1c 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -3,6 +3,8 @@
*
* Author(s):
* 2011-2014 Arvid Brodin, arvid.brodin@alten.se
+ *
+ * Event handling for HSR and PRP devices.
*/
#include <linux/netdevice.h>
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index f74193465bf5..7dc92ce5a134 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -3,6 +3,8 @@
*
* Author(s):
* 2011-2014 Arvid Brodin, arvid.brodin@alten.se
+ *
+ * include file for HSR and PRP.
*/
#ifndef __HSR_PRIVATE_H
@@ -10,6 +12,7 @@
#include <linux/netdevice.h>
#include <linux/list.h>
+#include <linux/if_vlan.h>
/* Time constants as specified in the HSR specification (IEC-62439-3 2010)
* Table 8.
@@ -33,6 +36,10 @@
#define HSR_TLV_ANNOUNCE 22
#define HSR_TLV_LIFE_CHECK 23
+/* PRP V1 life check for Duplicate discard */
+#define PRP_TLV_LIFE_CHECK_DD 20
+/* PRP V1 life check for Duplicate Accept */
+#define PRP_TLV_LIFE_CHECK_DA 21
/* HSR Tag.
* As defined in IEC-62439-3:2010, the HSR tag is really { ethertype = 0x88FB,
@@ -80,7 +87,12 @@ struct hsr_ethhdr {
struct hsr_tag hsr_tag;
} __packed;
-/* HSR Supervision Frame data types.
+struct hsr_vlan_ethhdr {
+ struct vlan_ethhdr vlanhdr;
+ struct hsr_tag hsr_tag;
+} __packed;
+
+/* HSR/PRP Supervision Frame data types.
* Field names as defined in the IEC:2010 standard for HSR.
*/
struct hsr_sup_tag {
@@ -124,6 +136,34 @@ enum hsr_port_type {
HSR_PT_PORTS, /* This must be the last item in the enum */
};
+/* PRP Redunancy Control Trailor (RCT).
+ * As defined in IEC-62439-4:2012, the PRP RCT is really { sequence Nr,
+ * Lan indentifier (LanId), LSDU_size and PRP_suffix = 0x88FB }.
+ *
+ * Field names as defined in the IEC:2012 standard for PRP.
+ */
+struct prp_rct {
+ __be16 sequence_nr;
+ __be16 lan_id_and_LSDU_size;
+ __be16 PRP_suffix;
+} __packed;
+
+static inline u16 get_prp_LSDU_size(struct prp_rct *rct)
+{
+ return ntohs(rct->lan_id_and_LSDU_size) & 0x0FFF;
+}
+
+static inline void set_prp_lan_id(struct prp_rct *rct, u16 lan_id)
+{
+ rct->lan_id_and_LSDU_size = htons((ntohs(rct->lan_id_and_LSDU_size) &
+ 0x0FFF) | (lan_id << 12));
+}
+static inline void set_prp_LSDU_size(struct prp_rct *rct, u16 LSDU_size)
+{
+ rct->lan_id_and_LSDU_size = htons((ntohs(rct->lan_id_and_LSDU_size) &
+ 0xF000) | (LSDU_size & 0x0FFF));
+}
+
struct hsr_port {
struct list_head port_list;
struct net_device *dev;
@@ -131,6 +171,32 @@ struct hsr_port {
enum hsr_port_type type;
};
+/* used by driver internally to differentiate various protocols */
+enum hsr_version {
+ HSR_V0 = 0,
+ HSR_V1,
+ PRP_V1,
+};
+
+struct hsr_frame_info;
+struct hsr_node;
+
+struct hsr_proto_ops {
+ /* format and send supervision frame */
+ void (*send_sv_frame)(struct hsr_port *port, unsigned long *interval);
+ void (*handle_san_frame)(bool san, enum hsr_port_type port,
+ struct hsr_node *node);
+ bool (*drop_frame)(struct hsr_frame_info *frame, struct hsr_port *port);
+ struct sk_buff * (*get_untagged_frame)(struct hsr_frame_info *frame,
+ struct hsr_port *port);
+ struct sk_buff * (*create_tagged_frame)(struct hsr_frame_info *frame,
+ struct hsr_port *port);
+ void (*fill_frame_info)(__be16 proto, struct sk_buff *skb,
+ struct hsr_frame_info *frame);
+ bool (*invalid_dan_ingress_frame)(__be16 protocol);
+ void (*update_san_info)(struct hsr_node *node, bool is_sup);
+};
+
struct hsr_priv {
struct rcu_head rcu_head;
struct list_head ports;
@@ -141,9 +207,16 @@ struct hsr_priv {
int announce_count;
u16 sequence_nr;
u16 sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */
- u8 prot_version; /* Indicate if HSRv0 or HSRv1. */
+ enum hsr_version prot_version; /* Indicate if HSRv0, HSRv1 or PRPv1 */
spinlock_t seqnr_lock; /* locking for sequence_nr */
spinlock_t list_lock; /* locking for node list */
+ struct hsr_proto_ops *proto_ops;
+#define PRP_LAN_ID 0x5 /* 0x1010 for A and 0x1011 for B. Bit 0 is set
+ * based on SLAVE_A or SLAVE_B
+ */
+ u8 net_id; /* for PRP, it occupies most significant 3 bits
+ * of lan_id
+ */
unsigned char sup_multicast_addr[ETH_ALEN];
#ifdef CONFIG_DEBUG_FS
struct dentry *node_tbl_root;
@@ -164,6 +237,49 @@ static inline u16 hsr_get_skb_sequence_nr(struct sk_buff *skb)
return ntohs(hsr_ethhdr->hsr_tag.sequence_nr);
}
+static inline struct prp_rct *skb_get_PRP_rct(struct sk_buff *skb)
+{
+ unsigned char *tail = skb_tail_pointer(skb) - HSR_HLEN;
+
+ struct prp_rct *rct = (struct prp_rct *)tail;
+
+ if (rct->PRP_suffix == htons(ETH_P_PRP))
+ return rct;
+
+ return NULL;
+}
+
+/* Assume caller has confirmed this skb is PRP suffixed */
+static inline u16 prp_get_skb_sequence_nr(struct prp_rct *rct)
+{
+ return ntohs(rct->sequence_nr);
+}
+
+static inline u16 get_prp_lan_id(struct prp_rct *rct)
+{
+ return ntohs(rct->lan_id_and_LSDU_size) >> 12;
+}
+
+/* assume there is a valid rct */
+static inline bool prp_check_lsdu_size(struct sk_buff *skb,
+ struct prp_rct *rct,
+ bool is_sup)
+{
+ struct ethhdr *ethhdr;
+ int expected_lsdu_size;
+
+ if (is_sup) {
+ expected_lsdu_size = HSR_V1_SUP_LSDUSIZE;
+ } else {
+ ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ expected_lsdu_size = skb->len - 14;
+ if (ethhdr->h_proto == htons(ETH_P_8021Q))
+ expected_lsdu_size -= 4;
+ }
+
+ return (expected_lsdu_size == get_prp_LSDU_size(rct));
+}
+
#if IS_ENABLED(CONFIG_DEBUG_FS)
void hsr_debugfs_rename(struct net_device *dev);
void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev);
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index 6e14b7d22639..06c3cd988760 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -4,7 +4,7 @@
* Author(s):
* 2011-2014 Arvid Brodin, arvid.brodin@alten.se
*
- * Routines for handling Netlink messages for HSR.
+ * Routines for handling Netlink messages for HSR and PRP.
*/
#include "hsr_netlink.h"
@@ -22,6 +22,7 @@ static const struct nla_policy hsr_policy[IFLA_HSR_MAX + 1] = {
[IFLA_HSR_VERSION] = { .type = NLA_U8 },
[IFLA_HSR_SUPERVISION_ADDR] = { .len = ETH_ALEN },
[IFLA_HSR_SEQ_NR] = { .type = NLA_U16 },
+ [IFLA_HSR_PROTOCOL] = { .type = NLA_U8 },
};
/* Here, it seems a netdevice has already been allocated for us, and the
@@ -31,8 +32,10 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
+ enum hsr_version proto_version;
+ unsigned char multicast_spec;
+ u8 proto = HSR_PROTOCOL_HSR;
struct net_device *link[2];
- unsigned char multicast_spec, hsr_version;
if (!data) {
NL_SET_ERR_MSG_MOD(extack, "No slave devices specified");
@@ -69,18 +72,34 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
else
multicast_spec = nla_get_u8(data[IFLA_HSR_MULTICAST_SPEC]);
+ if (data[IFLA_HSR_PROTOCOL])
+ proto = nla_get_u8(data[IFLA_HSR_PROTOCOL]);
+
+ if (proto >= HSR_PROTOCOL_MAX) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported protocol\n");
+ return -EINVAL;
+ }
+
if (!data[IFLA_HSR_VERSION]) {
- hsr_version = 0;
+ proto_version = HSR_V0;
} else {
- hsr_version = nla_get_u8(data[IFLA_HSR_VERSION]);
- if (hsr_version > 1) {
+ if (proto == HSR_PROTOCOL_PRP) {
+ NL_SET_ERR_MSG_MOD(extack, "PRP version unsupported\n");
+ return -EINVAL;
+ }
+
+ proto_version = nla_get_u8(data[IFLA_HSR_VERSION]);
+ if (proto_version > HSR_V1) {
NL_SET_ERR_MSG_MOD(extack,
- "Only versions 0..1 are supported");
+ "Only HSR version 0/1 supported\n");
return -EINVAL;
}
}
- return hsr_dev_finalize(dev, link, multicast_spec, hsr_version, extack);
+ if (proto == HSR_PROTOCOL_PRP)
+ proto_version = PRP_V1;
+
+ return hsr_dev_finalize(dev, link, multicast_spec, proto_version, extack);
}
static void hsr_dellink(struct net_device *dev, struct list_head *head)
@@ -102,6 +121,7 @@ static void hsr_dellink(struct net_device *dev, struct list_head *head)
static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct hsr_priv *hsr = netdev_priv(dev);
+ u8 proto = HSR_PROTOCOL_HSR;
struct hsr_port *port;
port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A);
@@ -120,6 +140,10 @@ static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev)
hsr->sup_multicast_addr) ||
nla_put_u16(skb, IFLA_HSR_SEQ_NR, hsr->sequence_nr))
goto nla_put_failure;
+ if (hsr->prot_version == PRP_V1)
+ proto = HSR_PROTOCOL_PRP;
+ if (nla_put_u8(skb, IFLA_HSR_PROTOCOL, proto))
+ goto nla_put_failure;
return 0;
diff --git a/net/hsr/hsr_netlink.h b/net/hsr/hsr_netlink.h
index 1121bb192a18..501552d9753b 100644
--- a/net/hsr/hsr_netlink.h
+++ b/net/hsr/hsr_netlink.h
@@ -3,6 +3,8 @@
*
* Author(s):
* 2011-2014 Arvid Brodin, arvid.brodin@alten.se
+ *
+ * include file for HSR and PRP.
*/
#ifndef __HSR_NETLINK_H
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index 25b6ffba26cd..36d5fcf09c61 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -3,6 +3,8 @@
*
* Author(s):
* 2011-2014 Arvid Brodin, arvid.brodin@alten.se
+ *
+ * Frame handler other utility functions for HSR and PRP.
*/
#include "hsr_slave.h"
@@ -14,12 +16,22 @@
#include "hsr_forward.h"
#include "hsr_framereg.h"
+bool hsr_invalid_dan_ingress_frame(__be16 protocol)
+{
+ return (protocol != htons(ETH_P_PRP) && protocol != htons(ETH_P_HSR));
+}
+
static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb)
{
struct sk_buff *skb = *pskb;
struct hsr_port *port;
+ struct hsr_priv *hsr;
__be16 protocol;
+ /* Packets from dev_loopback_xmit() do not have L2 header, bail out */
+ if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
+ return RX_HANDLER_PASS;
+
if (!skb_mac_header_was_set(skb)) {
WARN_ONCE(1, "%s: skb invalid", __func__);
return RX_HANDLER_PASS;
@@ -28,6 +40,7 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb)
port = hsr_port_get_rcu(skb->dev);
if (!port)
goto finish_pass;
+ hsr = port->hsr;
if (hsr_addr_is_self(port->hsr, eth_hdr(skb)->h_source)) {
/* Directly kill frames sent by ourselves */
@@ -35,12 +48,23 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb)
goto finish_consume;
}
+ /* For HSR, only tagged frames are expected, but for PRP
+ * there could be non tagged frames as well from Single
+ * attached nodes (SANs).
+ */
protocol = eth_hdr(skb)->h_proto;
- if (protocol != htons(ETH_P_PRP) && protocol != htons(ETH_P_HSR))
+ if (hsr->proto_ops->invalid_dan_ingress_frame &&
+ hsr->proto_ops->invalid_dan_ingress_frame(protocol))
goto finish_pass;
skb_push(skb, ETH_HLEN);
+ if (skb_mac_header(skb) != skb->data) {
+ WARN_ONCE(1, "%s:%d: Malformed frame at source port %s)\n",
+ __func__, __LINE__, port->dev->name);
+ goto finish_consume;
+ }
+
hsr_forward_skb(skb, port);
finish_consume:
diff --git a/net/hsr/hsr_slave.h b/net/hsr/hsr_slave.h
index 8953ea279ce9..edc4612bb009 100644
--- a/net/hsr/hsr_slave.h
+++ b/net/hsr/hsr_slave.h
@@ -2,6 +2,8 @@
/* Copyright 2011-2014 Autronica Fire and Security AS
*
* 2011-2014 Arvid Brodin, arvid.brodin@alten.se
+ *
+ * include file for HSR and PRP.
*/
#ifndef __HSR_SLAVE_H
@@ -30,4 +32,6 @@ static inline struct hsr_port *hsr_port_get_rcu(const struct net_device *dev)
rcu_dereference(dev->rx_handler_data) : NULL;
}
+bool hsr_invalid_dan_ingress_frame(__be16 protocol);
+
#endif /* __HSR_SLAVE_H */
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index d93d4531aa9b..a45a0401adc5 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -382,7 +382,7 @@ static int raw_getsockopt(struct sock *sk, int level, int optname,
}
static int raw_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
return -EOPNOTSUPP;
}
@@ -423,10 +423,6 @@ static const struct proto_ops ieee802154_raw_ops = {
.recvmsg = sock_common_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_sock_common_setsockopt,
- .compat_getsockopt = compat_sock_common_getsockopt,
-#endif
};
/* DGRAM Sockets (802.15.4 dataframes) */
@@ -876,7 +872,7 @@ static int dgram_getsockopt(struct sock *sk, int level, int optname,
}
static int dgram_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct dgram_sock *ro = dgram_sk(sk);
struct net *net = sock_net(sk);
@@ -886,7 +882,7 @@ static int dgram_setsockopt(struct sock *sk, int level, int optname,
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *)optval))
+ if (copy_from_sockptr(&val, optval, sizeof(int)))
return -EFAULT;
lock_sock(sk);
@@ -986,10 +982,6 @@ static const struct proto_ops ieee802154_dgram_ops = {
.recvmsg = sock_common_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_sock_common_setsockopt,
- .compat_getsockopt = compat_sock_common_getsockopt,
-#endif
};
/* Create a socket. Initialise the socket, blank the addresses
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index e64e59b536d3..60db5a6487cc 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -10,7 +10,7 @@ config IP_MULTICAST
intend to participate in the MBONE, a high bandwidth network on top
of the Internet which carries audio and video broadcasts. More
information about the MBONE is on the WWW at
- <http://www.savetz.com/mbone/>. For most people, it's safe to say N.
+ <https://www.savetz.com/mbone/>. For most people, it's safe to say N.
config IP_ADVANCED_ROUTER
bool "IP: advanced router"
@@ -73,7 +73,7 @@ config IP_MULTIPLE_TABLES
If you need more information, see the Linux Advanced
Routing and Traffic Control documentation at
- <http://lartc.org/howto/lartc.rpdb.html>
+ <https://lartc.org/howto/lartc.rpdb.html>
If unsure, say N.
@@ -280,7 +280,7 @@ config SYN_COOKIES
continue to connect, even when your machine is under attack. There
is no need for the legitimate users to change their TCP/IP software;
SYN cookies work transparently to them. For technical information
- about SYN cookies, check out <http://cr.yp.to/syncookies.html>.
+ about SYN cookies, check out <https://cr.yp.to/syncookies.html>.
If you are SYN flooded, the source address reported by the kernel is
likely to have been forged by the attacker; it is only reported as
@@ -525,7 +525,7 @@ config TCP_CONG_HSTCP
A modification to TCP's congestion control mechanism for use
with large congestion windows. A table indicates how much to
increase the congestion window by when an ACK is received.
- For more detail see http://www.icir.org/floyd/hstcp.html
+ For more detail see https://www.icir.org/floyd/hstcp.html
config TCP_CONG_HYBLA
tristate "TCP-Hybla congestion control algorithm"
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 9e1a186a3671..5b77a46885b9 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -14,7 +14,7 @@ obj-y := route.o inetpeer.o protocol.o \
udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \
inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
- metrics.o netlink.o nexthop.o
+ metrics.o netlink.o nexthop.o udp_tunnel_stub.o
obj-$(CONFIG_BPFILTER) += bpfilter/
@@ -29,6 +29,7 @@ gre-y := gre_demux.o
obj-$(CONFIG_NET_FOU) += fou.o
obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
obj-$(CONFIG_NET_IPGRE) += ip_gre.o
+udp_tunnel-y := udp_tunnel_core.o udp_tunnel_nic.o
obj-$(CONFIG_NET_UDP_TUNNEL) += udp_tunnel.o
obj-$(CONFIG_NET_IPVTI) += ip_vti.o
obj-$(CONFIG_SYN_COOKIES) += syncookies.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 02aa5cb3a4fd..4307503a6f0b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -411,6 +411,9 @@ int inet_release(struct socket *sock)
if (sk) {
long timeout;
+ if (!sk->sk_kern_sock)
+ BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk);
+
/* Applications forget to leave groups before exiting */
ip_mc_drop_socket(sk);
@@ -1040,8 +1043,6 @@ const struct proto_ops inet_stream_ops = {
.sendpage_locked = tcp_sendpage_locked,
.peek_len = tcp_peek_len,
#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_sock_common_setsockopt,
- .compat_getsockopt = compat_sock_common_getsockopt,
.compat_ioctl = inet_compat_ioctl,
#endif
.set_rcvlowat = tcp_set_rcvlowat,
@@ -1070,8 +1071,6 @@ const struct proto_ops inet_dgram_ops = {
.sendpage = inet_sendpage,
.set_peek_off = sk_set_peek_off,
#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_sock_common_setsockopt,
- .compat_getsockopt = compat_sock_common_getsockopt,
.compat_ioctl = inet_compat_ioctl,
#endif
};
@@ -1102,8 +1101,6 @@ static const struct proto_ops inet_sockraw_ops = {
.mmap = sock_no_mmap,
.sendpage = inet_sendpage,
#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_sock_common_setsockopt,
- .compat_getsockopt = compat_sock_common_getsockopt,
.compat_ioctl = inet_compat_ioctl,
#endif
};
@@ -1432,10 +1429,6 @@ static struct sk_buff *ipip_gso_segment(struct sk_buff *skb,
return inet_gso_segment(skb, features);
}
-INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *,
- struct sk_buff *));
-INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *,
- struct sk_buff *));
struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
{
const struct net_offload *ops;
@@ -1608,8 +1601,6 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
return -EINVAL;
}
-INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *, int));
-INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int));
int inet_gro_complete(struct sk_buff *skb, int nhoff)
{
__be16 newlen = htons(skb->len - nhoff);
diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c
index 0480918bfc7c..545b2640f019 100644
--- a/net/ipv4/bpfilter/sockopt.c
+++ b/net/ipv4/bpfilter/sockopt.c
@@ -12,18 +12,16 @@
struct bpfilter_umh_ops bpfilter_ops;
EXPORT_SYMBOL_GPL(bpfilter_ops);
-static void bpfilter_umh_cleanup(struct umh_info *info)
+void bpfilter_umh_cleanup(struct umd_info *info)
{
- mutex_lock(&bpfilter_ops.lock);
- bpfilter_ops.stop = true;
fput(info->pipe_to_umh);
fput(info->pipe_from_umh);
- info->pid = 0;
- mutex_unlock(&bpfilter_ops.lock);
+ put_pid(info->tgid);
+ info->tgid = NULL;
}
+EXPORT_SYMBOL_GPL(bpfilter_umh_cleanup);
-static int bpfilter_mbox_request(struct sock *sk, int optname,
- char __user *optval,
+static int bpfilter_mbox_request(struct sock *sk, int optname, sockptr_t optval,
unsigned int optlen, bool is_set)
{
int err;
@@ -38,7 +36,11 @@ static int bpfilter_mbox_request(struct sock *sk, int optname,
goto out;
}
}
- if (bpfilter_ops.stop) {
+ if (bpfilter_ops.info.tgid &&
+ thread_group_exited(bpfilter_ops.info.tgid))
+ bpfilter_umh_cleanup(&bpfilter_ops.info);
+
+ if (!bpfilter_ops.info.tgid) {
err = bpfilter_ops.start();
if (err)
goto out;
@@ -49,29 +51,31 @@ out:
return err;
}
-int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval,
+int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval,
unsigned int optlen)
{
return bpfilter_mbox_request(sk, optname, optval, optlen, true);
}
-int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
- int __user *optlen)
+int bpfilter_ip_get_sockopt(struct sock *sk, int optname,
+ char __user *user_optval, int __user *optlen)
{
- int len;
+ sockptr_t optval;
+ int err, len;
if (get_user(len, optlen))
return -EFAULT;
-
+ err = init_user_sockptr(&optval, user_optval, len);
+ if (err)
+ return err;
return bpfilter_mbox_request(sk, optname, optval, len, false);
}
static int __init bpfilter_sockopt_init(void)
{
mutex_init(&bpfilter_ops.lock);
- bpfilter_ops.stop = true;
- bpfilter_ops.info.cmdline = "bpfilter_umh";
- bpfilter_ops.info.cleanup = &bpfilter_umh_cleanup;
+ bpfilter_ops.info.tgid = NULL;
+ bpfilter_ops.info.driver_name = "bpfilter_umh";
return 0;
}
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index a23094b050f8..2eb71579f4d2 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -10,9 +10,9 @@
*
* The CIPSO draft specification can be found in the kernel's Documentation
* directory as well as the following URL:
- * http://tools.ietf.org/id/draft-ietf-cipso-ipsecurity-01.txt
+ * https://tools.ietf.org/id/draft-ietf-cipso-ipsecurity-01.txt
* The FIPS-188 specification can be found at the following URL:
- * http://www.itl.nist.gov/fipspubs/fip188.htm
+ * https://www.itl.nist.gov/fipspubs/fip188.htm
*
* Author: Paul Moore <paul.moore@hp.com>
*/
@@ -283,7 +283,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
/**
* cipso_v4_cache_add - Add an entry to the CIPSO cache
- * @skb: the packet
+ * @cipso_ptr: pointer to CIPSO IP option
* @secattr: the packet's security attributes
*
* Description:
@@ -1535,6 +1535,7 @@ unsigned char *cipso_v4_optptr(const struct sk_buff *skb)
/**
* cipso_v4_validate - Validate a CIPSO option
+ * @skb: the packet
* @option: the start of the option, on error it is set to point to the error
*
* Description:
@@ -2066,7 +2067,7 @@ void cipso_v4_sock_delattr(struct sock *sk)
/**
* cipso_v4_req_delattr - Delete the CIPSO option from a request socket
- * @reg: the request socket
+ * @req: the request socket
*
* Description:
* Removes the CIPSO option from a request socket, if present.
@@ -2158,6 +2159,7 @@ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
/**
* cipso_v4_skbuff_setattr - Set the CIPSO option on a packet
* @skb: the packet
+ * @doi_def: the DOI structure
* @secattr: the security attributes
*
* Description:
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index f99e3bac5cab..ce54a30c2ef1 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -29,6 +29,7 @@
#include <net/ip_fib.h>
#include <net/nexthop.h>
#include <net/fib_rules.h>
+#include <linux/indirect_call_wrapper.h>
struct fib4_rule {
struct fib_rule common;
@@ -103,8 +104,9 @@ int __fib_lookup(struct net *net, struct flowi4 *flp,
}
EXPORT_SYMBOL_GPL(__fib_lookup);
-static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
- int flags, struct fib_lookup_arg *arg)
+INDIRECT_CALLABLE_SCOPE int fib4_rule_action(struct fib_rule *rule,
+ struct flowi *flp, int flags,
+ struct fib_lookup_arg *arg)
{
int err = -EAGAIN;
struct fib_table *tbl;
@@ -138,7 +140,8 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
return err;
}
-static bool fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)
+INDIRECT_CALLABLE_SCOPE bool fib4_rule_suppress(struct fib_rule *rule,
+ struct fib_lookup_arg *arg)
{
struct fib_result *result = (struct fib_result *) arg->result;
struct net_device *dev = NULL;
@@ -169,7 +172,8 @@ suppress_route:
return true;
}
-static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
+INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule,
+ struct flowi *fl, int flags)
{
struct fib4_rule *r = (struct fib4_rule *) rule;
struct flowi4 *fl4 = &fl->u.ip4;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 3c65f71d0e82..c89b46fec153 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -13,7 +13,7 @@
*
* An experimental study of compression methods for dynamic tries
* Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
- * http://www.csc.kth.se/~snilsson/software/dyntrie2/
+ * https://www.csc.kth.se/~snilsson/software/dyntrie2/
*
* IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
* IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 2e6d1b7a7bc9..e0a246575887 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -15,12 +15,12 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
+ bool need_csum, need_recompute_csum, gso_partial;
struct sk_buff *segs = ERR_PTR(-EINVAL);
u16 mac_offset = skb->mac_header;
__be16 protocol = skb->protocol;
u16 mac_len = skb->mac_len;
int gre_offset, outer_hlen;
- bool need_csum, gso_partial;
if (!skb->encapsulation)
goto out;
@@ -41,6 +41,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
skb->protocol = skb->inner_protocol;
need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM);
+ need_recompute_csum = skb->csum_not_inet;
skb->encap_hdr_csum = need_csum;
features &= skb->dev->hw_enc_features;
@@ -98,7 +99,15 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
}
*(pcsum + 1) = 0;
- *pcsum = gso_make_checksum(skb, 0);
+ if (need_recompute_csum && !skb_is_gso(skb)) {
+ __wsum csum;
+
+ csum = skb_checksum(skb, gre_offset,
+ skb->len - gre_offset, 0);
+ *pcsum = csum_fold(csum);
+ } else {
+ *pcsum = gso_make_checksum(skb, 0);
+ }
} while ((skb = skb->next));
out:
return segs;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e30515f89802..cf36f955bfe6 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1116,6 +1116,65 @@ error:
goto drop;
}
+static bool ip_icmp_error_rfc4884_validate(const struct sk_buff *skb, int off)
+{
+ struct icmp_extobj_hdr *objh, _objh;
+ struct icmp_ext_hdr *exth, _exth;
+ u16 olen;
+
+ exth = skb_header_pointer(skb, off, sizeof(_exth), &_exth);
+ if (!exth)
+ return false;
+ if (exth->version != 2)
+ return true;
+
+ if (exth->checksum &&
+ csum_fold(skb_checksum(skb, off, skb->len - off, 0)))
+ return false;
+
+ off += sizeof(_exth);
+ while (off < skb->len) {
+ objh = skb_header_pointer(skb, off, sizeof(_objh), &_objh);
+ if (!objh)
+ return false;
+
+ olen = ntohs(objh->length);
+ if (olen < sizeof(_objh))
+ return false;
+
+ off += olen;
+ if (off > skb->len)
+ return false;
+ }
+
+ return true;
+}
+
+void ip_icmp_error_rfc4884(const struct sk_buff *skb,
+ struct sock_ee_data_rfc4884 *out,
+ int thlen, int off)
+{
+ int hlen;
+
+ /* original datagram headers: end of icmph to payload (skb->data) */
+ hlen = -skb_transport_offset(skb) - thlen;
+
+ /* per rfc 4884: minimal datagram length of 128 bytes */
+ if (off < 128 || off < hlen)
+ return;
+
+ /* kernel has stripped headers: return payload offset in bytes */
+ off -= hlen;
+ if (off + sizeof(struct icmp_ext_hdr) > skb->len)
+ return;
+
+ out->len = off;
+
+ if (!ip_icmp_error_rfc4884_validate(skb, off))
+ out->flags |= SO_EE_RFC4884_FLAG_INVALID;
+}
+EXPORT_SYMBOL_GPL(ip_icmp_error_rfc4884);
+
int icmp_err(struct sk_buff *skb, u32 info)
{
struct iphdr *iph = (struct iphdr *)skb->data;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index afaf582a5aa9..d1a3913eebe0 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -648,20 +648,19 @@ no_route:
EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
/* Decide when to expire the request and when to resend SYN-ACK */
-static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
- const int max_retries,
- const u8 rskq_defer_accept,
- int *expire, int *resend)
+static void syn_ack_recalc(struct request_sock *req,
+ const int max_syn_ack_retries,
+ const u8 rskq_defer_accept,
+ int *expire, int *resend)
{
if (!rskq_defer_accept) {
- *expire = req->num_timeout >= thresh;
+ *expire = req->num_timeout >= max_syn_ack_retries;
*resend = 1;
return;
}
- *expire = req->num_timeout >= thresh &&
- (!inet_rsk(req)->acked || req->num_timeout >= max_retries);
- /*
- * Do not resend while waiting for data after ACK,
+ *expire = req->num_timeout >= max_syn_ack_retries &&
+ (!inet_rsk(req)->acked || req->num_timeout >= rskq_defer_accept);
+ /* Do not resend while waiting for data after ACK,
* start to resend on end of deferring period to give
* last chance for data or ACK to create established socket.
*/
@@ -720,15 +719,12 @@ static void reqsk_timer_handler(struct timer_list *t)
struct net *net = sock_net(sk_listener);
struct inet_connection_sock *icsk = inet_csk(sk_listener);
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
- int qlen, expire = 0, resend = 0;
- int max_retries, thresh;
- u8 defer_accept;
+ int max_syn_ack_retries, qlen, expire = 0, resend = 0;
if (inet_sk_state_load(sk_listener) != TCP_LISTEN)
goto drop;
- max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
- thresh = max_retries;
+ max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
/* Normally all the openreqs are young and become mature
* (i.e. converted to established socket) for first timeout.
* If synack was not acknowledged for 1 second, it means
@@ -750,17 +746,14 @@ static void reqsk_timer_handler(struct timer_list *t)
if ((qlen << 1) > max(8U, READ_ONCE(sk_listener->sk_max_ack_backlog))) {
int young = reqsk_queue_len_young(queue) << 1;
- while (thresh > 2) {
+ while (max_syn_ack_retries > 2) {
if (qlen < young)
break;
- thresh--;
+ max_syn_ack_retries--;
young <<= 1;
}
}
- defer_accept = READ_ONCE(queue->rskq_defer_accept);
- if (defer_accept)
- max_retries = defer_accept;
- syn_ack_recalc(req, thresh, max_retries, defer_accept,
+ syn_ack_recalc(req, max_syn_ack_retries, READ_ONCE(queue->rskq_defer_accept),
&expire, &resend);
req->rsk_ops->syn_ack_timeout(req);
if (!expire &&
@@ -1064,34 +1057,6 @@ void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
}
EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
-#ifdef CONFIG_COMPAT
-int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
-{
- const struct inet_connection_sock *icsk = inet_csk(sk);
-
- if (icsk->icsk_af_ops->compat_getsockopt)
- return icsk->icsk_af_ops->compat_getsockopt(sk, level, optname,
- optval, optlen);
- return icsk->icsk_af_ops->getsockopt(sk, level, optname,
- optval, optlen);
-}
-EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt);
-
-int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
-{
- const struct inet_connection_sock *icsk = inet_csk(sk);
-
- if (icsk->icsk_af_ops->compat_setsockopt)
- return icsk->icsk_af_ops->compat_setsockopt(sk, level, optname,
- optval, optlen);
- return icsk->icsk_af_ops->setsockopt(sk, level, optname,
- optval, optlen);
-}
-EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt);
-#endif
-
static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl)
{
const struct inet_sock *inet = inet_sk(sk);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 125f4f8a36b4..4a98dd736270 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -52,6 +52,11 @@ static DEFINE_MUTEX(inet_diag_table_mutex);
static const struct inet_diag_handler *inet_diag_lock_handler(int proto)
{
+ if (proto < 0 || proto >= IPPROTO_MAX) {
+ mutex_lock(&inet_diag_table_mutex);
+ return ERR_PTR(-ENOENT);
+ }
+
if (!inet_diag_table[proto])
sock_load_diag_module(AF_INET, proto);
@@ -181,6 +186,28 @@ errout:
}
EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill);
+static void inet_diag_parse_attrs(const struct nlmsghdr *nlh, int hdrlen,
+ struct nlattr **req_nlas)
+{
+ struct nlattr *nla;
+ int remaining;
+
+ nlmsg_for_each_attr(nla, nlh, hdrlen, remaining) {
+ int type = nla_type(nla);
+
+ if (type < __INET_DIAG_REQ_MAX)
+ req_nlas[type] = nla;
+ }
+}
+
+static int inet_diag_get_protocol(const struct inet_diag_req_v2 *req,
+ const struct inet_diag_dump_data *data)
+{
+ if (data->req_nlas[INET_DIAG_REQ_PROTOCOL])
+ return nla_get_u32(data->req_nlas[INET_DIAG_REQ_PROTOCOL]);
+ return req->sdiag_protocol;
+}
+
#define MAX_DUMP_ALLOC_SIZE (KMALLOC_MAX_SIZE - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
@@ -198,7 +225,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
void *info = NULL;
cb_data = cb->data;
- handler = inet_diag_table[req->sdiag_protocol];
+ handler = inet_diag_table[inet_diag_get_protocol(req, cb_data)];
BUG_ON(!handler);
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
@@ -539,20 +566,25 @@ EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb,
const struct nlmsghdr *nlh,
+ int hdrlen,
const struct inet_diag_req_v2 *req)
{
const struct inet_diag_handler *handler;
- int err;
+ struct inet_diag_dump_data dump_data;
+ int err, protocol;
- handler = inet_diag_lock_handler(req->sdiag_protocol);
+ memset(&dump_data, 0, sizeof(dump_data));
+ inet_diag_parse_attrs(nlh, hdrlen, dump_data.req_nlas);
+ protocol = inet_diag_get_protocol(req, &dump_data);
+
+ handler = inet_diag_lock_handler(protocol);
if (IS_ERR(handler)) {
err = PTR_ERR(handler);
} else if (cmd == SOCK_DIAG_BY_FAMILY) {
- struct inet_diag_dump_data empty_dump_data = {};
struct netlink_callback cb = {
.nlh = nlh,
.skb = in_skb,
- .data = &empty_dump_data,
+ .data = &dump_data,
};
err = handler->dump_one(&cb, req);
} else if (cmd == SOCK_DESTROY && handler->destroy) {
@@ -1103,13 +1135,16 @@ EXPORT_SYMBOL_GPL(inet_diag_dump_icsk);
static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
const struct inet_diag_req_v2 *r)
{
+ struct inet_diag_dump_data *cb_data = cb->data;
const struct inet_diag_handler *handler;
u32 prev_min_dump_alloc;
- int err = 0;
+ int protocol, err = 0;
+
+ protocol = inet_diag_get_protocol(r, cb_data);
again:
prev_min_dump_alloc = cb->min_dump_alloc;
- handler = inet_diag_lock_handler(r->sdiag_protocol);
+ handler = inet_diag_lock_handler(protocol);
if (!IS_ERR(handler))
handler->dump(skb, cb, r);
else
@@ -1139,19 +1174,13 @@ static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen)
struct inet_diag_dump_data *cb_data;
struct sk_buff *skb = cb->skb;
struct nlattr *nla;
- int rem, err;
+ int err;
cb_data = kzalloc(sizeof(*cb_data), GFP_KERNEL);
if (!cb_data)
return -ENOMEM;
- nla_for_each_attr(nla, nlmsg_attrdata(nlh, hdrlen),
- nlmsg_attrlen(nlh, hdrlen), rem) {
- int type = nla_type(nla);
-
- if (type < __INET_DIAG_REQ_MAX)
- cb_data->req_nlas[type] = nla;
- }
+ inet_diag_parse_attrs(nlh, hdrlen, cb_data->req_nlas);
nla = cb_data->inet_diag_nla_bc;
if (nla) {
@@ -1237,7 +1266,8 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
req.idiag_states = rc->idiag_states;
req.id = rc->id;
- return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, &req);
+ return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh,
+ sizeof(struct inet_diag_req), &req);
}
static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -1279,7 +1309,8 @@ static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h)
return netlink_dump_start(net->diag_nlsk, skb, h, &c);
}
- return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h));
+ return inet_diag_cmd_exact(h->nlmsg_type, skb, h, hdrlen,
+ nlmsg_data(h));
}
static
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2bbaaf0c7176..4eb4cd8d20dd 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -246,6 +246,21 @@ static inline int compute_score(struct sock *sk, struct net *net,
return score;
}
+static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb, int doff,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, unsigned short hnum)
+{
+ struct sock *reuse_sk = NULL;
+ u32 phash;
+
+ if (sk->sk_reuseport) {
+ phash = inet_ehashfn(net, daddr, hnum, saddr, sport);
+ reuse_sk = reuseport_select_sock(sk, phash, skb, doff);
+ }
+ return reuse_sk;
+}
+
/*
* Here are some nice properties to exploit here. The BSD API
* does not allow a listening sock to specify the remote port nor the
@@ -265,21 +280,17 @@ static struct sock *inet_lhash2_lookup(struct net *net,
struct inet_connection_sock *icsk;
struct sock *sk, *result = NULL;
int score, hiscore = 0;
- u32 phash = 0;
inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
sk = (struct sock *)icsk;
score = compute_score(sk, net, hnum, daddr,
dif, sdif, exact_dif);
if (score > hiscore) {
- if (sk->sk_reuseport) {
- phash = inet_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, phash,
- skb, doff);
- if (result)
- return result;
- }
+ result = lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, hnum);
+ if (result)
+ return result;
+
result = sk;
hiscore = score;
}
@@ -288,6 +299,29 @@ static struct sock *inet_lhash2_lookup(struct net *net,
return result;
}
+static inline struct sock *inet_lookup_run_bpf(struct net *net,
+ struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, u16 hnum)
+{
+ struct sock *sk, *reuse_sk;
+ bool no_reuseport;
+
+ if (hashinfo != &tcp_hashinfo)
+ return NULL; /* only TCP is supported */
+
+ no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP,
+ saddr, sport, daddr, hnum, &sk);
+ if (no_reuseport || IS_ERR_OR_NULL(sk))
+ return sk;
+
+ reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum);
+ if (reuse_sk)
+ sk = reuse_sk;
+ return sk;
+}
+
struct sock *__inet_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
@@ -299,6 +333,14 @@ struct sock *__inet_lookup_listener(struct net *net,
struct sock *result = NULL;
unsigned int hash2;
+ /* Lookup redirect from BPF */
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
+ result = inet_lookup_run_bpf(net, hashinfo, skb, doff,
+ saddr, sport, daddr, hnum);
+ if (result)
+ goto done;
+ }
+
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ddaa01ec2bce..948747aac4e2 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -519,15 +519,20 @@ void ip_options_undo(struct ip_options *opt)
}
}
-static struct ip_options_rcu *ip_options_get_alloc(const int optlen)
+int ip_options_get(struct net *net, struct ip_options_rcu **optp,
+ sockptr_t data, int optlen)
{
- return kzalloc(sizeof(struct ip_options_rcu) + ((optlen + 3) & ~3),
+ struct ip_options_rcu *opt;
+
+ opt = kzalloc(sizeof(struct ip_options_rcu) + ((optlen + 3) & ~3),
GFP_KERNEL);
-}
+ if (!opt)
+ return -ENOMEM;
+ if (optlen && copy_from_sockptr(opt->opt.__data, data, optlen)) {
+ kfree(opt);
+ return -EFAULT;
+ }
-static int ip_options_get_finish(struct net *net, struct ip_options_rcu **optp,
- struct ip_options_rcu *opt, int optlen)
-{
while (optlen & 3)
opt->opt.__data[optlen++] = IPOPT_END;
opt->opt.optlen = optlen;
@@ -540,32 +545,6 @@ static int ip_options_get_finish(struct net *net, struct ip_options_rcu **optp,
return 0;
}
-int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp,
- unsigned char __user *data, int optlen)
-{
- struct ip_options_rcu *opt = ip_options_get_alloc(optlen);
-
- if (!opt)
- return -ENOMEM;
- if (optlen && copy_from_user(opt->opt.__data, data, optlen)) {
- kfree(opt);
- return -EFAULT;
- }
- return ip_options_get_finish(net, optp, opt, optlen);
-}
-
-int ip_options_get(struct net *net, struct ip_options_rcu **optp,
- unsigned char *data, int optlen)
-{
- struct ip_options_rcu *opt = ip_options_get_alloc(optlen);
-
- if (!opt)
- return -ENOMEM;
- if (optlen)
- memcpy(opt->opt.__data, data, optlen);
- return ip_options_get_finish(net, optp, opt, optlen);
-}
-
void ip_forward_options(struct sk_buff *skb)
{
struct ip_options *opt = &(IPCB(skb)->opt);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 17206677d503..61f802d5350c 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -539,6 +539,12 @@ no_route:
}
EXPORT_SYMBOL(__ip_queue_xmit);
+int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
+{
+ return __ip_queue_xmit(sk, skb, fl, inet_sk(sk)->tos);
+}
+EXPORT_SYMBOL(ip_queue_xmit);
+
static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
{
to->pkt_type = from->pkt_type;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 84ec3703c909..d2c223554ff7 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -280,7 +280,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
err = cmsg->cmsg_len - sizeof(struct cmsghdr);
/* Our caller is responsible for freeing ipc->opt */
- err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),
+ err = ip_options_get(net, &ipc->opt,
+ KERNEL_SOCKPTR(CMSG_DATA(cmsg)),
err < 40 ? err : 40);
if (err)
return err;
@@ -389,6 +390,18 @@ int ip_ra_control(struct sock *sk, unsigned char on,
return 0;
}
+static void ipv4_icmp_error_rfc4884(const struct sk_buff *skb,
+ struct sock_ee_data_rfc4884 *out)
+{
+ switch (icmp_hdr(skb)->type) {
+ case ICMP_DEST_UNREACH:
+ case ICMP_TIME_EXCEEDED:
+ case ICMP_PARAMETERPROB:
+ ip_icmp_error_rfc4884(skb, out, sizeof(struct icmphdr),
+ icmp_hdr(skb)->un.reserved[1] * 4);
+ }
+}
+
void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
__be16 port, u32 info, u8 *payload)
{
@@ -411,6 +424,9 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
serr->port = port;
if (skb_pull(skb, payload - skb->data)) {
+ if (inet_sk(sk)->recverr_rfc4884)
+ ipv4_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884);
+
skb_reset_transport_header(skb);
if (sock_queue_err_skb(sk, skb) == 0)
return;
@@ -679,20 +695,48 @@ Eaddrnotavail:
return -EADDRNOTAVAIL;
}
+static int copy_group_source_from_sockptr(struct group_source_req *greqs,
+ sockptr_t optval, int optlen)
+{
+ if (in_compat_syscall()) {
+ struct compat_group_source_req gr32;
+
+ if (optlen != sizeof(gr32))
+ return -EINVAL;
+ if (copy_from_sockptr(&gr32, optval, sizeof(gr32)))
+ return -EFAULT;
+ greqs->gsr_interface = gr32.gsr_interface;
+ greqs->gsr_group = gr32.gsr_group;
+ greqs->gsr_source = gr32.gsr_source;
+ } else {
+ if (optlen != sizeof(*greqs))
+ return -EINVAL;
+ if (copy_from_sockptr(greqs, optval, sizeof(*greqs)))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
static int do_mcast_group_source(struct sock *sk, int optname,
- struct group_source_req *greqs)
+ sockptr_t optval, int optlen)
{
+ struct group_source_req greqs;
struct ip_mreq_source mreqs;
struct sockaddr_in *psin;
int omode, add, err;
- if (greqs->gsr_group.ss_family != AF_INET ||
- greqs->gsr_source.ss_family != AF_INET)
+ err = copy_group_source_from_sockptr(&greqs, optval, optlen);
+ if (err)
+ return err;
+
+ if (greqs.gsr_group.ss_family != AF_INET ||
+ greqs.gsr_source.ss_family != AF_INET)
return -EADDRNOTAVAIL;
- psin = (struct sockaddr_in *)&greqs->gsr_group;
+ psin = (struct sockaddr_in *)&greqs.gsr_group;
mreqs.imr_multiaddr = psin->sin_addr.s_addr;
- psin = (struct sockaddr_in *)&greqs->gsr_source;
+ psin = (struct sockaddr_in *)&greqs.gsr_source;
mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
mreqs.imr_interface = 0; /* use index for mc_source */
@@ -705,25 +749,145 @@ static int do_mcast_group_source(struct sock *sk, int optname,
} else if (optname == MCAST_JOIN_SOURCE_GROUP) {
struct ip_mreqn mreq;
- psin = (struct sockaddr_in *)&greqs->gsr_group;
+ psin = (struct sockaddr_in *)&greqs.gsr_group;
mreq.imr_multiaddr = psin->sin_addr;
mreq.imr_address.s_addr = 0;
- mreq.imr_ifindex = greqs->gsr_interface;
+ mreq.imr_ifindex = greqs.gsr_interface;
err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
if (err && err != -EADDRINUSE)
return err;
- greqs->gsr_interface = mreq.imr_ifindex;
+ greqs.gsr_interface = mreq.imr_ifindex;
omode = MCAST_INCLUDE;
add = 1;
} else /* MCAST_LEAVE_SOURCE_GROUP */ {
omode = MCAST_INCLUDE;
add = 0;
}
- return ip_mc_source(add, omode, sk, &mreqs, greqs->gsr_interface);
+ return ip_mc_source(add, omode, sk, &mreqs, greqs.gsr_interface);
+}
+
+static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
+{
+ struct group_filter *gsf = NULL;
+ int err;
+
+ if (optlen < GROUP_FILTER_SIZE(0))
+ return -EINVAL;
+ if (optlen > sysctl_optmem_max)
+ return -ENOBUFS;
+
+ gsf = memdup_sockptr(optval, optlen);
+ if (IS_ERR(gsf))
+ return PTR_ERR(gsf);
+
+ /* numsrc >= (4G-140)/128 overflow in 32 bits */
+ err = -ENOBUFS;
+ if (gsf->gf_numsrc >= 0x1ffffff ||
+ gsf->gf_numsrc > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
+ goto out_free_gsf;
+
+ err = -EINVAL;
+ if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen)
+ goto out_free_gsf;
+
+ err = set_mcast_msfilter(sk, gsf->gf_interface, gsf->gf_numsrc,
+ gsf->gf_fmode, &gsf->gf_group, gsf->gf_slist);
+out_free_gsf:
+ kfree(gsf);
+ return err;
+}
+
+static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
+ int optlen)
+{
+ const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ struct compat_group_filter *gf32;
+ unsigned int n;
+ void *p;
+ int err;
+
+ if (optlen < size0)
+ return -EINVAL;
+ if (optlen > sysctl_optmem_max - 4)
+ return -ENOBUFS;
+
+ p = kmalloc(optlen + 4, GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+ gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+
+ err = -EFAULT;
+ if (copy_from_sockptr(gf32, optval, optlen))
+ goto out_free_gsf;
+
+ /* numsrc >= (4G-140)/128 overflow in 32 bits */
+ n = gf32->gf_numsrc;
+ err = -ENOBUFS;
+ if (n >= 0x1ffffff)
+ goto out_free_gsf;
+
+ err = -EINVAL;
+ if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen)
+ goto out_free_gsf;
+
+ /* numsrc >= (4G-140)/128 overflow in 32 bits */
+ err = -ENOBUFS;
+ if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
+ goto out_free_gsf;
+ err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode,
+ &gf32->gf_group, gf32->gf_slist);
+out_free_gsf:
+ kfree(p);
+ return err;
}
-static int do_ip_setsockopt(struct sock *sk, int level,
- int optname, char __user *optval, unsigned int optlen)
+static int ip_mcast_join_leave(struct sock *sk, int optname,
+ sockptr_t optval, int optlen)
+{
+ struct ip_mreqn mreq = { };
+ struct sockaddr_in *psin;
+ struct group_req greq;
+
+ if (optlen < sizeof(struct group_req))
+ return -EINVAL;
+ if (copy_from_sockptr(&greq, optval, sizeof(greq)))
+ return -EFAULT;
+
+ psin = (struct sockaddr_in *)&greq.gr_group;
+ if (psin->sin_family != AF_INET)
+ return -EINVAL;
+ mreq.imr_multiaddr = psin->sin_addr;
+ mreq.imr_ifindex = greq.gr_interface;
+ if (optname == MCAST_JOIN_GROUP)
+ return ip_mc_join_group(sk, &mreq);
+ return ip_mc_leave_group(sk, &mreq);
+}
+
+static int compat_ip_mcast_join_leave(struct sock *sk, int optname,
+ sockptr_t optval, int optlen)
+{
+ struct compat_group_req greq;
+ struct ip_mreqn mreq = { };
+ struct sockaddr_in *psin;
+
+ if (optlen < sizeof(struct compat_group_req))
+ return -EINVAL;
+ if (copy_from_sockptr(&greq, optval, sizeof(greq)))
+ return -EFAULT;
+
+ psin = (struct sockaddr_in *)&greq.gr_group;
+ if (psin->sin_family != AF_INET)
+ return -EINVAL;
+ mreq.imr_multiaddr = psin->sin_addr;
+ mreq.imr_ifindex = greq.gr_interface;
+
+ if (optname == MCAST_JOIN_GROUP)
+ return ip_mc_join_group(sk, &mreq);
+ return ip_mc_leave_group(sk, &mreq);
+}
+
+static int do_ip_setsockopt(struct sock *sk, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
{
struct inet_sock *inet = inet_sk(sk);
struct net *net = sock_net(sk);
@@ -755,13 +919,14 @@ static int do_ip_setsockopt(struct sock *sk, int level,
case IP_RECVORIGDSTADDR:
case IP_CHECKSUM:
case IP_RECVFRAGSIZE:
+ case IP_RECVERR_RFC4884:
if (optlen >= sizeof(int)) {
- if (get_user(val, (int __user *) optval))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
} else if (optlen >= sizeof(char)) {
unsigned char ucval;
- if (get_user(ucval, (unsigned char __user *) optval))
+ if (copy_from_sockptr(&ucval, optval, sizeof(ucval)))
return -EFAULT;
val = (int) ucval;
}
@@ -786,8 +951,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
if (optlen > 40)
goto e_inval;
- err = ip_options_get_from_user(sock_net(sk), &opt,
- optval, optlen);
+ err = ip_options_get(sock_net(sk), &opt, optval, optlen);
if (err)
break;
old = rcu_dereference_protected(inet->inet_opt,
@@ -914,6 +1078,11 @@ static int do_ip_setsockopt(struct sock *sk, int level,
if (!val)
skb_queue_purge(&sk->sk_error_queue);
break;
+ case IP_RECVERR_RFC4884:
+ if (val < 0 || val > 1)
+ goto e_inval;
+ inet->recverr_rfc4884 = !!val;
+ break;
case IP_MULTICAST_TTL:
if (sk->sk_type == SOCK_STREAM)
goto e_inval;
@@ -980,17 +1149,17 @@ static int do_ip_setsockopt(struct sock *sk, int level,
err = -EFAULT;
if (optlen >= sizeof(struct ip_mreqn)) {
- if (copy_from_user(&mreq, optval, sizeof(mreq)))
+ if (copy_from_sockptr(&mreq, optval, sizeof(mreq)))
break;
} else {
memset(&mreq, 0, sizeof(mreq));
if (optlen >= sizeof(struct ip_mreq)) {
- if (copy_from_user(&mreq, optval,
- sizeof(struct ip_mreq)))
+ if (copy_from_sockptr(&mreq, optval,
+ sizeof(struct ip_mreq)))
break;
} else if (optlen >= sizeof(struct in_addr)) {
- if (copy_from_user(&mreq.imr_address, optval,
- sizeof(struct in_addr)))
+ if (copy_from_sockptr(&mreq.imr_address, optval,
+ sizeof(struct in_addr)))
break;
}
}
@@ -1042,11 +1211,12 @@ static int do_ip_setsockopt(struct sock *sk, int level,
goto e_inval;
err = -EFAULT;
if (optlen >= sizeof(struct ip_mreqn)) {
- if (copy_from_user(&mreq, optval, sizeof(mreq)))
+ if (copy_from_sockptr(&mreq, optval, sizeof(mreq)))
break;
} else {
memset(&mreq, 0, sizeof(mreq));
- if (copy_from_user(&mreq, optval, sizeof(struct ip_mreq)))
+ if (copy_from_sockptr(&mreq, optval,
+ sizeof(struct ip_mreq)))
break;
}
@@ -1066,7 +1236,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
err = -ENOBUFS;
break;
}
- msf = memdup_user(optval, optlen);
+ msf = memdup_sockptr(optval, optlen);
if (IS_ERR(msf)) {
err = PTR_ERR(msf);
break;
@@ -1097,7 +1267,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
if (optlen != sizeof(struct ip_mreq_source))
goto e_inval;
- if (copy_from_user(&mreqs, optval, sizeof(mreqs))) {
+ if (copy_from_sockptr(&mreqs, optval, sizeof(mreqs))) {
err = -EFAULT;
break;
}
@@ -1127,77 +1297,24 @@ static int do_ip_setsockopt(struct sock *sk, int level,
}
case MCAST_JOIN_GROUP:
case MCAST_LEAVE_GROUP:
- {
- struct group_req greq;
- struct sockaddr_in *psin;
- struct ip_mreqn mreq;
-
- if (optlen < sizeof(struct group_req))
- goto e_inval;
- err = -EFAULT;
- if (copy_from_user(&greq, optval, sizeof(greq)))
- break;
- psin = (struct sockaddr_in *)&greq.gr_group;
- if (psin->sin_family != AF_INET)
- goto e_inval;
- memset(&mreq, 0, sizeof(mreq));
- mreq.imr_multiaddr = psin->sin_addr;
- mreq.imr_ifindex = greq.gr_interface;
-
- if (optname == MCAST_JOIN_GROUP)
- err = ip_mc_join_group(sk, &mreq);
+ if (in_compat_syscall())
+ err = compat_ip_mcast_join_leave(sk, optname, optval,
+ optlen);
else
- err = ip_mc_leave_group(sk, &mreq);
+ err = ip_mcast_join_leave(sk, optname, optval, optlen);
break;
- }
case MCAST_JOIN_SOURCE_GROUP:
case MCAST_LEAVE_SOURCE_GROUP:
case MCAST_BLOCK_SOURCE:
case MCAST_UNBLOCK_SOURCE:
- {
- struct group_source_req greqs;
-
- if (optlen != sizeof(struct group_source_req))
- goto e_inval;
- if (copy_from_user(&greqs, optval, sizeof(greqs))) {
- err = -EFAULT;
- break;
- }
- err = do_mcast_group_source(sk, optname, &greqs);
+ err = do_mcast_group_source(sk, optname, optval, optlen);
break;
- }
case MCAST_MSFILTER:
- {
- struct group_filter *gsf = NULL;
-
- if (optlen < GROUP_FILTER_SIZE(0))
- goto e_inval;
- if (optlen > sysctl_optmem_max) {
- err = -ENOBUFS;
- break;
- }
- gsf = memdup_user(optval, optlen);
- if (IS_ERR(gsf)) {
- err = PTR_ERR(gsf);
- break;
- }
- /* numsrc >= (4G-140)/128 overflow in 32 bits */
- if (gsf->gf_numsrc >= 0x1ffffff ||
- gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
- err = -ENOBUFS;
- goto mc_msf_out;
- }
- if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {
- err = -EINVAL;
- goto mc_msf_out;
- }
- err = set_mcast_msfilter(sk, gsf->gf_interface,
- gsf->gf_numsrc, gsf->gf_fmode,
- &gsf->gf_group, gsf->gf_slist);
-mc_msf_out:
- kfree(gsf);
+ if (in_compat_syscall())
+ err = compat_ip_set_mcast_msfilter(sk, optval, optlen);
+ else
+ err = ip_set_mcast_msfilter(sk, optval, optlen);
break;
- }
case IP_MULTICAST_ALL:
if (optlen < 1)
goto e_inval;
@@ -1296,8 +1413,8 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
skb_dst_drop(skb);
}
-int ip_setsockopt(struct sock *sk, int level,
- int optname, char __user *optval, unsigned int optlen)
+int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+ unsigned int optlen)
{
int err;
@@ -1322,138 +1439,6 @@ int ip_setsockopt(struct sock *sk, int level,
}
EXPORT_SYMBOL(ip_setsockopt);
-#ifdef CONFIG_COMPAT
-int compat_ip_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
-{
- int err;
-
- if (level != SOL_IP)
- return -ENOPROTOOPT;
-
- switch (optname) {
- case MCAST_JOIN_GROUP:
- case MCAST_LEAVE_GROUP:
- {
- struct compat_group_req __user *gr32 = (void __user *)optval;
- struct group_req greq;
- struct sockaddr_in *psin = (struct sockaddr_in *)&greq.gr_group;
- struct ip_mreqn mreq;
-
- if (optlen < sizeof(struct compat_group_req))
- return -EINVAL;
-
- if (get_user(greq.gr_interface, &gr32->gr_interface) ||
- copy_from_user(&greq.gr_group, &gr32->gr_group,
- sizeof(greq.gr_group)))
- return -EFAULT;
-
- if (psin->sin_family != AF_INET)
- return -EINVAL;
-
- memset(&mreq, 0, sizeof(mreq));
- mreq.imr_multiaddr = psin->sin_addr;
- mreq.imr_ifindex = greq.gr_interface;
-
- rtnl_lock();
- lock_sock(sk);
- if (optname == MCAST_JOIN_GROUP)
- err = ip_mc_join_group(sk, &mreq);
- else
- err = ip_mc_leave_group(sk, &mreq);
- release_sock(sk);
- rtnl_unlock();
- return err;
- }
- case MCAST_JOIN_SOURCE_GROUP:
- case MCAST_LEAVE_SOURCE_GROUP:
- case MCAST_BLOCK_SOURCE:
- case MCAST_UNBLOCK_SOURCE:
- {
- struct compat_group_source_req __user *gsr32 = (void __user *)optval;
- struct group_source_req greqs;
-
- if (optlen != sizeof(struct compat_group_source_req))
- return -EINVAL;
-
- if (get_user(greqs.gsr_interface, &gsr32->gsr_interface) ||
- copy_from_user(&greqs.gsr_group, &gsr32->gsr_group,
- sizeof(greqs.gsr_group)) ||
- copy_from_user(&greqs.gsr_source, &gsr32->gsr_source,
- sizeof(greqs.gsr_source)))
- return -EFAULT;
-
- rtnl_lock();
- lock_sock(sk);
- err = do_mcast_group_source(sk, optname, &greqs);
- release_sock(sk);
- rtnl_unlock();
- return err;
- }
- case MCAST_MSFILTER:
- {
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
- struct compat_group_filter *gf32;
- unsigned int n;
- void *p;
-
- if (optlen < size0)
- return -EINVAL;
- if (optlen > sysctl_optmem_max - 4)
- return -ENOBUFS;
-
- p = kmalloc(optlen + 4, GFP_KERNEL);
- if (!p)
- return -ENOMEM;
- gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
- if (copy_from_user(gf32, optval, optlen)) {
- err = -EFAULT;
- goto mc_msf_out;
- }
-
- n = gf32->gf_numsrc;
- /* numsrc >= (4G-140)/128 overflow in 32 bits */
- if (n >= 0x1ffffff) {
- err = -ENOBUFS;
- goto mc_msf_out;
- }
- if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) {
- err = -EINVAL;
- goto mc_msf_out;
- }
-
- rtnl_lock();
- lock_sock(sk);
- /* numsrc >= (4G-140)/128 overflow in 32 bits */
- if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
- err = -ENOBUFS;
- else
- err = set_mcast_msfilter(sk, gf32->gf_interface,
- n, gf32->gf_fmode,
- &gf32->gf_group, gf32->gf_slist);
- release_sock(sk);
- rtnl_unlock();
-mc_msf_out:
- kfree(p);
- return err;
- }
- }
-
- err = do_ip_setsockopt(sk, level, optname, optval, optlen);
-#ifdef CONFIG_NETFILTER
- /* we need to exclude all possible ENOPROTOOPTs except default case */
- if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
- optname != IP_IPSEC_POLICY &&
- optname != IP_XFRM_POLICY &&
- !ip_mroute_opt(optname))
- err = compat_nf_setsockopt(sk, PF_INET, optname, optval,
- optlen);
-#endif
- return err;
-}
-EXPORT_SYMBOL(compat_ip_setsockopt);
-#endif
-
/*
* Get the options. Note for future reference. The GET of IP options gets
* the _received_ ones. The set sets the _sent_ ones.
@@ -1469,8 +1454,67 @@ static bool getsockopt_needs_rtnl(int optname)
return false;
}
+static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
+ int __user *optlen, int len)
+{
+ const int size0 = offsetof(struct group_filter, gf_slist);
+ struct group_filter __user *p = optval;
+ struct group_filter gsf;
+ int num;
+ int err;
+
+ if (len < size0)
+ return -EINVAL;
+ if (copy_from_user(&gsf, p, size0))
+ return -EFAULT;
+
+ num = gsf.gf_numsrc;
+ err = ip_mc_gsfget(sk, &gsf, p->gf_slist);
+ if (err)
+ return err;
+ if (gsf.gf_numsrc < num)
+ num = gsf.gf_numsrc;
+ if (put_user(GROUP_FILTER_SIZE(num), optlen) ||
+ copy_to_user(p, &gsf, size0))
+ return -EFAULT;
+ return 0;
+}
+
+static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
+ int __user *optlen, int len)
+{
+ const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ struct compat_group_filter __user *p = optval;
+ struct compat_group_filter gf32;
+ struct group_filter gf;
+ int num;
+ int err;
+
+ if (len < size0)
+ return -EINVAL;
+ if (copy_from_user(&gf32, p, size0))
+ return -EFAULT;
+
+ gf.gf_interface = gf32.gf_interface;
+ gf.gf_fmode = gf32.gf_fmode;
+ num = gf.gf_numsrc = gf32.gf_numsrc;
+ gf.gf_group = gf32.gf_group;
+
+ err = ip_mc_gsfget(sk, &gf, p->gf_slist);
+ if (err)
+ return err;
+ if (gf.gf_numsrc < num)
+ num = gf.gf_numsrc;
+ len = GROUP_FILTER_SIZE(num) - (sizeof(gf) - sizeof(gf32));
+ if (put_user(len, optlen) ||
+ put_user(gf.gf_fmode, &p->gf_fmode) ||
+ put_user(gf.gf_numsrc, &p->gf_numsrc))
+ return -EFAULT;
+ return 0;
+}
+
static int do_ip_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen, unsigned int flags)
+ char __user *optval, int __user *optlen)
{
struct inet_sock *inet = inet_sk(sk);
bool needs_rtnl = getsockopt_needs_rtnl(optname);
@@ -1588,6 +1632,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_RECVERR:
val = inet->recverr;
break;
+ case IP_RECVERR_RFC4884:
+ val = inet->recverr_rfc4884;
+ break;
case IP_MULTICAST_TTL:
val = inet->mc_ttl;
break;
@@ -1627,31 +1674,12 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
goto out;
}
case MCAST_MSFILTER:
- {
- struct group_filter __user *p = (void __user *)optval;
- struct group_filter gsf;
- const int size0 = offsetof(struct group_filter, gf_slist);
- int num;
-
- if (len < size0) {
- err = -EINVAL;
- goto out;
- }
- if (copy_from_user(&gsf, p, size0)) {
- err = -EFAULT;
- goto out;
- }
- num = gsf.gf_numsrc;
- err = ip_mc_gsfget(sk, &gsf, p->gf_slist);
- if (err)
- goto out;
- if (gsf.gf_numsrc < num)
- num = gsf.gf_numsrc;
- if (put_user(GROUP_FILTER_SIZE(num), optlen) ||
- copy_to_user(p, &gsf, size0))
- err = -EFAULT;
+ if (in_compat_syscall())
+ err = compat_ip_get_mcast_msfilter(sk, optval, optlen,
+ len);
+ else
+ err = ip_get_mcast_msfilter(sk, optval, optlen, len);
goto out;
- }
case IP_MULTICAST_ALL:
val = inet->mc_all;
break;
@@ -1667,7 +1695,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
msg.msg_control_is_user = true;
msg.msg_control_user = optval;
msg.msg_controllen = len;
- msg.msg_flags = flags;
+ msg.msg_flags = in_compat_syscall() ? MSG_CMSG_COMPAT : 0;
if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
struct in_pktinfo info;
@@ -1731,7 +1759,8 @@ int ip_getsockopt(struct sock *sk, int level,
{
int err;
- err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
+ err = do_ip_getsockopt(sk, level, optname, optval, optlen);
+
#if IS_ENABLED(CONFIG_BPFILTER_UMH)
if (optname >= BPFILTER_IPT_SO_GET_INFO &&
optname < BPFILTER_IPT_GET_MAX)
@@ -1755,79 +1784,3 @@ int ip_getsockopt(struct sock *sk, int level,
return err;
}
EXPORT_SYMBOL(ip_getsockopt);
-
-#ifdef CONFIG_COMPAT
-int compat_ip_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
-{
- int err;
-
- if (optname == MCAST_MSFILTER) {
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
- struct compat_group_filter __user *p = (void __user *)optval;
- struct compat_group_filter gf32;
- struct group_filter gf;
- int ulen, err;
- int num;
-
- if (level != SOL_IP)
- return -EOPNOTSUPP;
-
- if (get_user(ulen, optlen))
- return -EFAULT;
-
- if (ulen < size0)
- return -EINVAL;
-
- if (copy_from_user(&gf32, p, size0))
- return -EFAULT;
-
- gf.gf_interface = gf32.gf_interface;
- gf.gf_fmode = gf32.gf_fmode;
- num = gf.gf_numsrc = gf32.gf_numsrc;
- gf.gf_group = gf32.gf_group;
-
- rtnl_lock();
- lock_sock(sk);
- err = ip_mc_gsfget(sk, &gf, p->gf_slist);
- release_sock(sk);
- rtnl_unlock();
- if (err)
- return err;
- if (gf.gf_numsrc < num)
- num = gf.gf_numsrc;
- ulen = GROUP_FILTER_SIZE(num) - (sizeof(gf) - sizeof(gf32));
- if (put_user(ulen, optlen) ||
- put_user(gf.gf_fmode, &p->gf_fmode) ||
- put_user(gf.gf_numsrc, &p->gf_numsrc))
- return -EFAULT;
- return 0;
- }
-
- err = do_ip_getsockopt(sk, level, optname, optval, optlen,
- MSG_CMSG_COMPAT);
-
-#if IS_ENABLED(CONFIG_BPFILTER_UMH)
- if (optname >= BPFILTER_IPT_SO_GET_INFO &&
- optname < BPFILTER_IPT_GET_MAX)
- err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
-#endif
-#ifdef CONFIG_NETFILTER
- /* we need to exclude all possible ENOPROTOOPTs except default case */
- if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
- !ip_mroute_opt(optname)) {
- int len;
-
- if (get_user(len, optlen))
- return -EFAULT;
-
- err = compat_nf_getsockopt(sk, PF_INET, optname, optval, &len);
- if (err >= 0)
- err = put_user(len, optlen);
- return err;
- }
-#endif
- return err;
-}
-EXPORT_SYMBOL(compat_ip_getsockopt);
-#endif
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index f8b419e2475c..9ddee2a0c66d 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -184,6 +184,250 @@ int iptunnel_handle_offloads(struct sk_buff *skb,
}
EXPORT_SYMBOL_GPL(iptunnel_handle_offloads);
+/**
+ * iptunnel_pmtud_build_icmp() - Build ICMP error message for PMTUD
+ * @skb: Original packet with L2 header
+ * @mtu: MTU value for ICMP error
+ *
+ * Return: length on success, negative error code if message couldn't be built.
+ */
+static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ struct icmphdr *icmph;
+ struct iphdr *niph;
+ struct ethhdr eh;
+ int len, err;
+
+ if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct iphdr)))
+ return -EINVAL;
+
+ skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN);
+ pskb_pull(skb, ETH_HLEN);
+ skb_reset_network_header(skb);
+
+ err = pskb_trim(skb, 576 - sizeof(*niph) - sizeof(*icmph));
+ if (err)
+ return err;
+
+ len = skb->len + sizeof(*icmph);
+ err = skb_cow(skb, sizeof(*niph) + sizeof(*icmph) + ETH_HLEN);
+ if (err)
+ return err;
+
+ icmph = skb_push(skb, sizeof(*icmph));
+ *icmph = (struct icmphdr) {
+ .type = ICMP_DEST_UNREACH,
+ .code = ICMP_FRAG_NEEDED,
+ .checksum = 0,
+ .un.frag.__unused = 0,
+ .un.frag.mtu = ntohs(mtu),
+ };
+ icmph->checksum = ip_compute_csum(icmph, len);
+ skb_reset_transport_header(skb);
+
+ niph = skb_push(skb, sizeof(*niph));
+ *niph = (struct iphdr) {
+ .ihl = sizeof(*niph) / 4u,
+ .version = 4,
+ .tos = 0,
+ .tot_len = htons(len + sizeof(*niph)),
+ .id = 0,
+ .frag_off = htons(IP_DF),
+ .ttl = iph->ttl,
+ .protocol = IPPROTO_ICMP,
+ .saddr = iph->daddr,
+ .daddr = iph->saddr,
+ };
+ ip_send_check(niph);
+ skb_reset_network_header(skb);
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ eth_header(skb, skb->dev, htons(eh.h_proto), eh.h_source, eh.h_dest, 0);
+ skb_reset_mac_header(skb);
+
+ return skb->len;
+}
+
+/**
+ * iptunnel_pmtud_check_icmp() - Trigger ICMP reply if needed and allowed
+ * @skb: Buffer being sent by encapsulation, L2 headers expected
+ * @mtu: Network MTU for path
+ *
+ * Return: 0 for no ICMP reply, length if built, negative value on error.
+ */
+static int iptunnel_pmtud_check_icmp(struct sk_buff *skb, int mtu)
+{
+ const struct icmphdr *icmph = icmp_hdr(skb);
+ const struct iphdr *iph = ip_hdr(skb);
+
+ if (mtu <= 576 || iph->frag_off != htons(IP_DF))
+ return 0;
+
+ if (ipv4_is_lbcast(iph->daddr) || ipv4_is_multicast(iph->daddr) ||
+ ipv4_is_zeronet(iph->saddr) || ipv4_is_loopback(iph->saddr) ||
+ ipv4_is_lbcast(iph->saddr) || ipv4_is_multicast(iph->saddr))
+ return 0;
+
+ if (iph->protocol == IPPROTO_ICMP && icmp_is_err(icmph->type))
+ return 0;
+
+ return iptunnel_pmtud_build_icmp(skb, mtu);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+/**
+ * iptunnel_pmtud_build_icmpv6() - Build ICMPv6 error message for PMTUD
+ * @skb: Original packet with L2 header
+ * @mtu: MTU value for ICMPv6 error
+ *
+ * Return: length on success, negative error code if message couldn't be built.
+ */
+static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu)
+{
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ struct icmp6hdr *icmp6h;
+ struct ipv6hdr *nip6h;
+ struct ethhdr eh;
+ int len, err;
+ __wsum csum;
+
+ if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct ipv6hdr)))
+ return -EINVAL;
+
+ skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN);
+ pskb_pull(skb, ETH_HLEN);
+ skb_reset_network_header(skb);
+
+ err = pskb_trim(skb, IPV6_MIN_MTU - sizeof(*nip6h) - sizeof(*icmp6h));
+ if (err)
+ return err;
+
+ len = skb->len + sizeof(*icmp6h);
+ err = skb_cow(skb, sizeof(*nip6h) + sizeof(*icmp6h) + ETH_HLEN);
+ if (err)
+ return err;
+
+ icmp6h = skb_push(skb, sizeof(*icmp6h));
+ *icmp6h = (struct icmp6hdr) {
+ .icmp6_type = ICMPV6_PKT_TOOBIG,
+ .icmp6_code = 0,
+ .icmp6_cksum = 0,
+ .icmp6_mtu = htonl(mtu),
+ };
+ skb_reset_transport_header(skb);
+
+ nip6h = skb_push(skb, sizeof(*nip6h));
+ *nip6h = (struct ipv6hdr) {
+ .priority = 0,
+ .version = 6,
+ .flow_lbl = { 0 },
+ .payload_len = htons(len),
+ .nexthdr = IPPROTO_ICMPV6,
+ .hop_limit = ip6h->hop_limit,
+ .saddr = ip6h->daddr,
+ .daddr = ip6h->saddr,
+ };
+ skb_reset_network_header(skb);
+
+ csum = csum_partial(icmp6h, len, 0);
+ icmp6h->icmp6_cksum = csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, len,
+ IPPROTO_ICMPV6, csum);
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ eth_header(skb, skb->dev, htons(eh.h_proto), eh.h_source, eh.h_dest, 0);
+ skb_reset_mac_header(skb);
+
+ return skb->len;
+}
+
+/**
+ * iptunnel_pmtud_check_icmpv6() - Trigger ICMPv6 reply if needed and allowed
+ * @skb: Buffer being sent by encapsulation, L2 headers expected
+ * @mtu: Network MTU for path
+ *
+ * Return: 0 for no ICMPv6 reply, length if built, negative value on error.
+ */
+static int iptunnel_pmtud_check_icmpv6(struct sk_buff *skb, int mtu)
+{
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ int stype = ipv6_addr_type(&ip6h->saddr);
+ u8 proto = ip6h->nexthdr;
+ __be16 frag_off;
+ int offset;
+
+ if (mtu <= IPV6_MIN_MTU)
+ return 0;
+
+ if (stype == IPV6_ADDR_ANY || stype == IPV6_ADDR_MULTICAST ||
+ stype == IPV6_ADDR_LOOPBACK)
+ return 0;
+
+ offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &proto,
+ &frag_off);
+ if (offset < 0 || (frag_off & htons(~0x7)))
+ return 0;
+
+ if (proto == IPPROTO_ICMPV6) {
+ struct icmp6hdr *icmp6h;
+
+ if (!pskb_may_pull(skb, skb_network_header(skb) +
+ offset + 1 - skb->data))
+ return 0;
+
+ icmp6h = (struct icmp6hdr *)(skb_network_header(skb) + offset);
+ if (icmpv6_is_err(icmp6h->icmp6_type) ||
+ icmp6h->icmp6_type == NDISC_REDIRECT)
+ return 0;
+ }
+
+ return iptunnel_pmtud_build_icmpv6(skb, mtu);
+}
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+
+/**
+ * skb_tunnel_check_pmtu() - Check, update PMTU and trigger ICMP reply as needed
+ * @skb: Buffer being sent by encapsulation, L2 headers expected
+ * @encap_dst: Destination for tunnel encapsulation (outer IP)
+ * @headroom: Encapsulation header size, bytes
+ * @reply: Build matching ICMP or ICMPv6 message as a result
+ *
+ * L2 tunnel implementations that can carry IP and can be directly bridged
+ * (currently UDP tunnels) can't always rely on IP forwarding paths to handle
+ * PMTU discovery. In the bridged case, ICMP or ICMPv6 messages need to be built
+ * based on payload and sent back by the encapsulation itself.
+ *
+ * For routable interfaces, we just need to update the PMTU for the destination.
+ *
+ * Return: 0 if ICMP error not needed, length if built, negative value on error
+ */
+int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst,
+ int headroom, bool reply)
+{
+ u32 mtu = dst_mtu(encap_dst) - headroom;
+
+ if ((skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) ||
+ (!skb_is_gso(skb) && (skb->len - skb_mac_header_len(skb)) <= mtu))
+ return 0;
+
+ skb_dst_update_pmtu_no_confirm(skb, mtu);
+
+ if (!reply || skb->pkt_type == PACKET_HOST)
+ return 0;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ return iptunnel_pmtud_check_icmp(skb, mtu);
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (skb->protocol == htons(ETH_P_IPV6))
+ return iptunnel_pmtud_check_icmpv6(skb, mtu);
+#endif
+ return 0;
+}
+EXPORT_SYMBOL(skb_tunnel_check_pmtu);
+
/* Often modified stats are per cpu, other are shared (netdev->stats) */
void ip_tunnel_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *tot)
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 460ca1099e8a..49daaed89764 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -91,32 +91,6 @@ static int vti_rcv_proto(struct sk_buff *skb)
return vti_rcv(skb, 0, false);
}
-static int vti_rcv_tunnel(struct sk_buff *skb)
-{
- struct ip_tunnel_net *itn = net_generic(dev_net(skb->dev), vti_net_id);
- const struct iphdr *iph = ip_hdr(skb);
- struct ip_tunnel *tunnel;
-
- tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
- iph->saddr, iph->daddr, 0);
- if (tunnel) {
- struct tnl_ptk_info tpi = {
- .proto = htons(ETH_P_IP),
- };
-
- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
- goto drop;
- if (iptunnel_pull_header(skb, 0, tpi.proto, false))
- goto drop;
- return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, false);
- }
-
- return -EINVAL;
-drop:
- kfree_skb(skb);
- return 0;
-}
-
static int vti_rcv_cb(struct sk_buff *skb, int err)
{
unsigned short family;
@@ -244,12 +218,15 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
}
dst_hold(dst);
- dst = xfrm_lookup(tunnel->net, dst, fl, NULL, 0);
+ dst = xfrm_lookup_route(tunnel->net, dst, fl, NULL, 0);
if (IS_ERR(dst)) {
dev->stats.tx_carrier_errors++;
goto tx_error_icmp;
}
+ if (dst->flags & DST_XFRM_QUEUE)
+ goto queued;
+
if (!vti_state_check(dst->xfrm, parms->iph.daddr, parms->iph.saddr)) {
dev->stats.tx_carrier_errors++;
dst_release(dst);
@@ -281,6 +258,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
+queued:
skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
skb_dst_set(skb, dst);
skb->dev = skb_dst(skb)->dev;
@@ -496,11 +474,29 @@ static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = {
.priority = 100,
};
-static struct xfrm_tunnel ipip_handler __read_mostly = {
+#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL)
+static int vti_rcv_tunnel(struct sk_buff *skb)
+{
+ XFRM_SPI_SKB_CB(skb)->family = AF_INET;
+ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
+
+ return vti_input(skb, IPPROTO_IPIP, ip_hdr(skb)->saddr, 0, false);
+}
+
+static struct xfrm_tunnel vti_ipip_handler __read_mostly = {
+ .handler = vti_rcv_tunnel,
+ .cb_handler = vti_rcv_cb,
+ .err_handler = vti4_err,
+ .priority = 0,
+};
+
+static struct xfrm_tunnel vti_ipip6_handler __read_mostly = {
.handler = vti_rcv_tunnel,
+ .cb_handler = vti_rcv_cb,
.err_handler = vti4_err,
.priority = 0,
};
+#endif
static int __net_init vti_init_net(struct net *net)
{
@@ -670,10 +666,17 @@ static int __init vti_init(void)
if (err < 0)
goto xfrm_proto_comp_failed;
+#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL)
msg = "ipip tunnel";
- err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
+ err = xfrm4_tunnel_register(&vti_ipip_handler, AF_INET);
+ if (err < 0)
+ goto xfrm_tunnel_ipip_failed;
+#if IS_ENABLED(CONFIG_IPV6)
+ err = xfrm4_tunnel_register(&vti_ipip6_handler, AF_INET6);
if (err < 0)
- goto xfrm_tunnel_failed;
+ goto xfrm_tunnel_ipip6_failed;
+#endif
+#endif
msg = "netlink interface";
err = rtnl_link_register(&vti_link_ops);
@@ -683,8 +686,14 @@ static int __init vti_init(void)
return err;
rtnl_link_failed:
- xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
-xfrm_tunnel_failed:
+#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL)
+#if IS_ENABLED(CONFIG_IPV6)
+ xfrm4_tunnel_deregister(&vti_ipip6_handler, AF_INET6);
+xfrm_tunnel_ipip6_failed:
+#endif
+ xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET);
+xfrm_tunnel_ipip_failed:
+#endif
xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
xfrm_proto_comp_failed:
xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
@@ -700,7 +709,12 @@ pernet_dev_failed:
static void __exit vti_fini(void)
{
rtnl_link_unregister(&vti_link_ops);
- xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
+#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL)
+#if IS_ENABLED(CONFIG_IPV6)
+ xfrm4_tunnel_deregister(&vti_ipip6_handler, AF_INET6);
+#endif
+ xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET);
+#endif
xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 59bfa3825810..b42683212c65 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -72,6 +72,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
t->props.flags = x->props.flags;
t->props.extra_flags = x->props.extra_flags;
memcpy(&t->mark, &x->mark, sizeof(t->mark));
+ t->if_id = x->if_id;
if (xfrm_init_state(t))
goto error;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index f5c7a58844a4..876fd6ff1ff9 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -636,7 +636,10 @@ static int call_ipmr_mfc_entry_notifiers(struct net *net,
/**
* vif_delete - Delete a VIF entry
+ * @mrt: Table to delete from
+ * @vifi: VIF identifier to delete
* @notify: Set to 1, if the caller is a notifier_call
+ * @head: if unregistering the VIF, place it on this queue
*/
static int vif_delete(struct mr_table *mrt, int vifi, int notify,
struct list_head *head)
@@ -1338,7 +1341,7 @@ static void mrtsock_destruct(struct sock *sk)
* MOSPF/PIM router set up we can clean this up.
*/
-int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
+int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
unsigned int optlen)
{
struct net *net = sock_net(sk);
@@ -1410,7 +1413,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
ret = -EINVAL;
break;
}
- if (copy_from_user(&vif, optval, sizeof(vif))) {
+ if (copy_from_sockptr(&vif, optval, sizeof(vif))) {
ret = -EFAULT;
break;
}
@@ -1438,7 +1441,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
ret = -EINVAL;
break;
}
- if (copy_from_user(&mfc, optval, sizeof(mfc))) {
+ if (copy_from_sockptr(&mfc, optval, sizeof(mfc))) {
ret = -EFAULT;
break;
}
@@ -1456,7 +1459,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
ret = -EINVAL;
break;
}
- if (get_user(val, (int __user *)optval)) {
+ if (copy_from_sockptr(&val, optval, sizeof(val))) {
ret = -EFAULT;
break;
}
@@ -1468,7 +1471,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
ret = -EINVAL;
break;
}
- if (get_user(val, (int __user *)optval)) {
+ if (copy_from_sockptr(&val, optval, sizeof(val))) {
ret = -EFAULT;
break;
}
@@ -1483,7 +1486,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
ret = -EINVAL;
break;
}
- if (get_user(val, (int __user *)optval)) {
+ if (copy_from_sockptr(&val, optval, sizeof(val))) {
ret = -EFAULT;
break;
}
@@ -1505,7 +1508,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
ret = -EINVAL;
break;
}
- if (get_user(uval, (u32 __user *)optval)) {
+ if (copy_from_sockptr(&uval, optval, sizeof(uval))) {
ret = -EFAULT;
break;
}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index b167f4a5b684..d1e04d2b5170 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -787,8 +787,7 @@ static int compat_table_info(const struct xt_table_info *info,
}
#endif
-static int get_info(struct net *net, void __user *user,
- const int *len, int compat)
+static int get_info(struct net *net, void __user *user, const int *len)
{
char name[XT_TABLE_MAXNAMELEN];
struct xt_table *t;
@@ -802,7 +801,7 @@ static int get_info(struct net *net, void __user *user,
name[XT_TABLE_MAXNAMELEN-1] = '\0';
#ifdef CONFIG_COMPAT
- if (compat)
+ if (in_compat_syscall())
xt_compat_lock(NFPROTO_ARP);
#endif
t = xt_request_find_table_lock(net, NFPROTO_ARP, name);
@@ -812,7 +811,7 @@ static int get_info(struct net *net, void __user *user,
#ifdef CONFIG_COMPAT
struct xt_table_info tmp;
- if (compat) {
+ if (in_compat_syscall()) {
ret = compat_table_info(private, &tmp);
xt_compat_flush_offsets(NFPROTO_ARP);
private = &tmp;
@@ -837,7 +836,7 @@ static int get_info(struct net *net, void __user *user,
} else
ret = PTR_ERR(t);
#ifdef CONFIG_COMPAT
- if (compat)
+ if (in_compat_syscall())
xt_compat_unlock(NFPROTO_ARP);
#endif
return ret;
@@ -948,8 +947,7 @@ static int __do_replace(struct net *net, const char *name,
return ret;
}
-static int do_replace(struct net *net, const void __user *user,
- unsigned int len)
+static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
{
int ret;
struct arpt_replace tmp;
@@ -957,7 +955,7 @@ static int do_replace(struct net *net, const void __user *user,
void *loc_cpu_entry;
struct arpt_entry *iter;
- if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
/* overflow check */
@@ -973,8 +971,8 @@ static int do_replace(struct net *net, const void __user *user,
return -ENOMEM;
loc_cpu_entry = newinfo->entries;
- if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
- tmp.size) != 0) {
+ if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp),
+ tmp.size) != 0) {
ret = -EFAULT;
goto free_newinfo;
}
@@ -997,8 +995,7 @@ static int do_replace(struct net *net, const void __user *user,
return ret;
}
-static int do_add_counters(struct net *net, const void __user *user,
- unsigned int len, int compat)
+static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len)
{
unsigned int i;
struct xt_counters_info tmp;
@@ -1009,7 +1006,7 @@ static int do_add_counters(struct net *net, const void __user *user,
struct arpt_entry *iter;
unsigned int addend;
- paddc = xt_copy_counters_from_user(user, len, &tmp, compat);
+ paddc = xt_copy_counters(arg, len, &tmp);
if (IS_ERR(paddc))
return PTR_ERR(paddc);
@@ -1246,8 +1243,7 @@ out_unlock:
return ret;
}
-static int compat_do_replace(struct net *net, void __user *user,
- unsigned int len)
+static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
{
int ret;
struct compat_arpt_replace tmp;
@@ -1255,7 +1251,7 @@ static int compat_do_replace(struct net *net, void __user *user,
void *loc_cpu_entry;
struct arpt_entry *iter;
- if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
/* overflow check */
@@ -1271,7 +1267,8 @@ static int compat_do_replace(struct net *net, void __user *user,
return -ENOMEM;
loc_cpu_entry = newinfo->entries;
- if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) {
+ if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp),
+ tmp.size) != 0) {
ret = -EFAULT;
goto free_newinfo;
}
@@ -1294,30 +1291,6 @@ static int compat_do_replace(struct net *net, void __user *user,
return ret;
}
-static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user,
- unsigned int len)
-{
- int ret;
-
- if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
- switch (cmd) {
- case ARPT_SO_SET_REPLACE:
- ret = compat_do_replace(sock_net(sk), user, len);
- break;
-
- case ARPT_SO_SET_ADD_COUNTERS:
- ret = do_add_counters(sock_net(sk), user, len, 1);
- break;
-
- default:
- ret = -EINVAL;
- }
-
- return ret;
-}
-
static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr,
compat_uint_t *size,
struct xt_counters *counters,
@@ -1425,32 +1398,10 @@ static int compat_get_entries(struct net *net,
xt_compat_unlock(NFPROTO_ARP);
return ret;
}
-
-static int do_arpt_get_ctl(struct sock *, int, void __user *, int *);
-
-static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user,
- int *len)
-{
- int ret;
-
- if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
- switch (cmd) {
- case ARPT_SO_GET_INFO:
- ret = get_info(sock_net(sk), user, len, 1);
- break;
- case ARPT_SO_GET_ENTRIES:
- ret = compat_get_entries(sock_net(sk), user, len);
- break;
- default:
- ret = do_arpt_get_ctl(sk, cmd, user, len);
- }
- return ret;
-}
#endif
-static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+static int do_arpt_set_ctl(struct sock *sk, int cmd, sockptr_t arg,
+ unsigned int len)
{
int ret;
@@ -1459,11 +1410,16 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned
switch (cmd) {
case ARPT_SO_SET_REPLACE:
- ret = do_replace(sock_net(sk), user, len);
+#ifdef CONFIG_COMPAT
+ if (in_compat_syscall())
+ ret = compat_do_replace(sock_net(sk), arg, len);
+ else
+#endif
+ ret = do_replace(sock_net(sk), arg, len);
break;
case ARPT_SO_SET_ADD_COUNTERS:
- ret = do_add_counters(sock_net(sk), user, len, 0);
+ ret = do_add_counters(sock_net(sk), arg, len);
break;
default:
@@ -1482,11 +1438,16 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
switch (cmd) {
case ARPT_SO_GET_INFO:
- ret = get_info(sock_net(sk), user, len, 0);
+ ret = get_info(sock_net(sk), user, len);
break;
case ARPT_SO_GET_ENTRIES:
- ret = get_entries(sock_net(sk), user, len);
+#ifdef CONFIG_COMPAT
+ if (in_compat_syscall())
+ ret = compat_get_entries(sock_net(sk), user, len);
+ else
+#endif
+ ret = get_entries(sock_net(sk), user, len);
break;
case ARPT_SO_GET_REVISION_TARGET: {
@@ -1610,15 +1571,9 @@ static struct nf_sockopt_ops arpt_sockopts = {
.set_optmin = ARPT_BASE_CTL,
.set_optmax = ARPT_SO_SET_MAX+1,
.set = do_arpt_set_ctl,
-#ifdef CONFIG_COMPAT
- .compat_set = compat_do_arpt_set_ctl,
-#endif
.get_optmin = ARPT_BASE_CTL,
.get_optmax = ARPT_SO_GET_MAX+1,
.get = do_arpt_get_ctl,
-#ifdef CONFIG_COMPAT
- .compat_get = compat_do_arpt_get_ctl,
-#endif
.owner = THIS_MODULE,
};
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 5bf9fa06aee0..f15bc21d7301 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -944,8 +944,7 @@ static int compat_table_info(const struct xt_table_info *info,
}
#endif
-static int get_info(struct net *net, void __user *user,
- const int *len, int compat)
+static int get_info(struct net *net, void __user *user, const int *len)
{
char name[XT_TABLE_MAXNAMELEN];
struct xt_table *t;
@@ -959,7 +958,7 @@ static int get_info(struct net *net, void __user *user,
name[XT_TABLE_MAXNAMELEN-1] = '\0';
#ifdef CONFIG_COMPAT
- if (compat)
+ if (in_compat_syscall())
xt_compat_lock(AF_INET);
#endif
t = xt_request_find_table_lock(net, AF_INET, name);
@@ -969,7 +968,7 @@ static int get_info(struct net *net, void __user *user,
#ifdef CONFIG_COMPAT
struct xt_table_info tmp;
- if (compat) {
+ if (in_compat_syscall()) {
ret = compat_table_info(private, &tmp);
xt_compat_flush_offsets(AF_INET);
private = &tmp;
@@ -995,7 +994,7 @@ static int get_info(struct net *net, void __user *user,
} else
ret = PTR_ERR(t);
#ifdef CONFIG_COMPAT
- if (compat)
+ if (in_compat_syscall())
xt_compat_unlock(AF_INET);
#endif
return ret;
@@ -1103,7 +1102,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
}
static int
-do_replace(struct net *net, const void __user *user, unsigned int len)
+do_replace(struct net *net, sockptr_t arg, unsigned int len)
{
int ret;
struct ipt_replace tmp;
@@ -1111,7 +1110,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
void *loc_cpu_entry;
struct ipt_entry *iter;
- if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
/* overflow check */
@@ -1127,8 +1126,8 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
return -ENOMEM;
loc_cpu_entry = newinfo->entries;
- if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
- tmp.size) != 0) {
+ if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp),
+ tmp.size) != 0) {
ret = -EFAULT;
goto free_newinfo;
}
@@ -1152,8 +1151,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
}
static int
-do_add_counters(struct net *net, const void __user *user,
- unsigned int len, int compat)
+do_add_counters(struct net *net, sockptr_t arg, unsigned int len)
{
unsigned int i;
struct xt_counters_info tmp;
@@ -1164,7 +1162,7 @@ do_add_counters(struct net *net, const void __user *user,
struct ipt_entry *iter;
unsigned int addend;
- paddc = xt_copy_counters_from_user(user, len, &tmp, compat);
+ paddc = xt_copy_counters(arg, len, &tmp);
if (IS_ERR(paddc))
return PTR_ERR(paddc);
@@ -1486,7 +1484,7 @@ out_unlock:
}
static int
-compat_do_replace(struct net *net, void __user *user, unsigned int len)
+compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
{
int ret;
struct compat_ipt_replace tmp;
@@ -1494,7 +1492,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
void *loc_cpu_entry;
struct ipt_entry *iter;
- if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
/* overflow check */
@@ -1510,8 +1508,8 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
return -ENOMEM;
loc_cpu_entry = newinfo->entries;
- if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
- tmp.size) != 0) {
+ if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp),
+ tmp.size) != 0) {
ret = -EFAULT;
goto free_newinfo;
}
@@ -1534,31 +1532,6 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
return ret;
}
-static int
-compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
- unsigned int len)
-{
- int ret;
-
- if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
- switch (cmd) {
- case IPT_SO_SET_REPLACE:
- ret = compat_do_replace(sock_net(sk), user, len);
- break;
-
- case IPT_SO_SET_ADD_COUNTERS:
- ret = do_add_counters(sock_net(sk), user, len, 1);
- break;
-
- default:
- ret = -EINVAL;
- }
-
- return ret;
-}
-
struct compat_ipt_get_entries {
char name[XT_TABLE_MAXNAMELEN];
compat_uint_t size;
@@ -1634,33 +1607,10 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
xt_compat_unlock(AF_INET);
return ret;
}
-
-static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
-
-static int
-compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
-{
- int ret;
-
- if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
- switch (cmd) {
- case IPT_SO_GET_INFO:
- ret = get_info(sock_net(sk), user, len, 1);
- break;
- case IPT_SO_GET_ENTRIES:
- ret = compat_get_entries(sock_net(sk), user, len);
- break;
- default:
- ret = do_ipt_get_ctl(sk, cmd, user, len);
- }
- return ret;
-}
#endif
static int
-do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+do_ipt_set_ctl(struct sock *sk, int cmd, sockptr_t arg, unsigned int len)
{
int ret;
@@ -1669,11 +1619,16 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
switch (cmd) {
case IPT_SO_SET_REPLACE:
- ret = do_replace(sock_net(sk), user, len);
+#ifdef CONFIG_COMPAT
+ if (in_compat_syscall())
+ ret = compat_do_replace(sock_net(sk), arg, len);
+ else
+#endif
+ ret = do_replace(sock_net(sk), arg, len);
break;
case IPT_SO_SET_ADD_COUNTERS:
- ret = do_add_counters(sock_net(sk), user, len, 0);
+ ret = do_add_counters(sock_net(sk), arg, len);
break;
default:
@@ -1693,11 +1648,16 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
switch (cmd) {
case IPT_SO_GET_INFO:
- ret = get_info(sock_net(sk), user, len, 0);
+ ret = get_info(sock_net(sk), user, len);
break;
case IPT_SO_GET_ENTRIES:
- ret = get_entries(sock_net(sk), user, len);
+#ifdef CONFIG_COMPAT
+ if (in_compat_syscall())
+ ret = compat_get_entries(sock_net(sk), user, len);
+ else
+#endif
+ ret = get_entries(sock_net(sk), user, len);
break;
case IPT_SO_GET_REVISION_MATCH:
@@ -1886,15 +1846,9 @@ static struct nf_sockopt_ops ipt_sockopts = {
.set_optmin = IPT_BASE_CTL,
.set_optmax = IPT_SO_SET_MAX+1,
.set = do_ipt_set_ctl,
-#ifdef CONFIG_COMPAT
- .compat_set = compat_do_ipt_set_ctl,
-#endif
.get_optmin = IPT_BASE_CTL,
.get_optmax = IPT_SO_GET_MAX+1,
.get = do_ipt_get_ctl,
-#ifdef CONFIG_COMPAT
- .compat_get = compat_do_ipt_get_ctl,
-#endif
.owner = THIS_MODULE,
};
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index f8755a4ae9d4..a8b980ad11d4 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -3,7 +3,7 @@
* (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
* based on ideas of Fabio Olive Leite <olive@unixforge.org>
*
- * Development of this code funded by SuSE Linux AG, http://www.suse.com/
+ * Development of this code funded by SuSE Linux AG, https://www.suse.com/
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 2361fdac2c43..9dcfa4e461b6 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -96,6 +96,21 @@ void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb,
}
EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put);
+static int nf_reject_fill_skb_dst(struct sk_buff *skb_in)
+{
+ struct dst_entry *dst = NULL;
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(struct flowi));
+ fl.u.ip4.daddr = ip_hdr(skb_in)->saddr;
+ nf_ip_route(dev_net(skb_in->dev), &dst, &fl, false);
+ if (!dst)
+ return -1;
+
+ skb_dst_set(skb_in, dst);
+ return 0;
+}
+
/* Send RST reply */
void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
{
@@ -109,6 +124,9 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
if (!oth)
return;
+ if (hook == NF_INET_PRE_ROUTING && nf_reject_fill_skb_dst(oldskb))
+ return;
+
if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
return;
@@ -175,6 +193,9 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
if (iph->frag_off & htons(IP_OFFSET))
return;
+ if (hook == NF_INET_PRE_ROUTING && nf_reject_fill_skb_dst(skb_in))
+ return;
+
if (skb_csum_unnecessary(skb_in) || !nf_reject_verify_csum(proto)) {
icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
return;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 75545a829a2b..1074df726ec0 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -292,6 +292,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPFastOpenPassiveAltKey", LINUX_MIB_TCPFASTOPENPASSIVEALTKEY),
SNMP_MIB_ITEM("TcpTimeoutRehash", LINUX_MIB_TCPTIMEOUTREHASH),
SNMP_MIB_ITEM("TcpDuplicateDataRehash", LINUX_MIB_TCPDUPLICATEDATAREHASH),
+ SNMP_MIB_ITEM("TCPDSACKRecvSegs", LINUX_MIB_TCPDSACKRECVSEGS),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 47665919048f..6fd4330287c2 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -809,11 +809,11 @@ static int raw_sk_init(struct sock *sk)
return 0;
}
-static int raw_seticmpfilter(struct sock *sk, char __user *optval, int optlen)
+static int raw_seticmpfilter(struct sock *sk, sockptr_t optval, int optlen)
{
if (optlen > sizeof(struct icmp_filter))
optlen = sizeof(struct icmp_filter);
- if (copy_from_user(&raw_sk(sk)->filter, optval, optlen))
+ if (copy_from_sockptr(&raw_sk(sk)->filter, optval, optlen))
return -EFAULT;
return 0;
}
@@ -838,7 +838,7 @@ out: return ret;
}
static int do_raw_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
if (optname == ICMP_FILTER) {
if (inet_sk(sk)->inet_num != IPPROTO_ICMP)
@@ -850,23 +850,13 @@ static int do_raw_setsockopt(struct sock *sk, int level, int optname,
}
static int raw_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
if (level != SOL_RAW)
return ip_setsockopt(sk, level, optname, optval, optlen);
return do_raw_setsockopt(sk, level, optname, optval, optlen);
}
-#ifdef CONFIG_COMPAT
-static int compat_raw_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
-{
- if (level != SOL_RAW)
- return compat_ip_setsockopt(sk, level, optname, optval, optlen);
- return do_raw_setsockopt(sk, level, optname, optval, optlen);
-}
-#endif
-
static int do_raw_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -887,16 +877,6 @@ static int raw_getsockopt(struct sock *sk, int level, int optname,
return do_raw_getsockopt(sk, level, optname, optval, optlen);
}
-#ifdef CONFIG_COMPAT
-static int compat_raw_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
-{
- if (level != SOL_RAW)
- return compat_ip_getsockopt(sk, level, optname, optval, optlen);
- return do_raw_getsockopt(sk, level, optname, optval, optlen);
-}
-#endif
-
static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
switch (cmd) {
@@ -980,8 +960,6 @@ struct proto raw_prot = {
.usersize = sizeof_field(struct raw_sock, filter),
.h.raw_hash = &raw_v4_hashinfo,
#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_raw_setsockopt,
- .compat_getsockopt = compat_raw_getsockopt,
.compat_ioctl = compat_raw_ioctl,
#endif
.diag_destroy = raw_abort,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a01efa062f6b..8ca6bcab7b03 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1050,6 +1050,11 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct flowi4 fl4;
ip_rt_build_flow_key(&fl4, sk, skb);
+
+ /* Don't make lookup fail for bridged encapsulations */
+ if (skb && netif_is_any_bridge_port(skb->dev))
+ fl4.flowi4_oif = 0;
+
__ip_rt_update_pmtu(rt, &fl4, mtu);
}
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 9a4f6b16c9bc..f0794f0232ba 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -212,6 +212,12 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
refcount_set(&req->rsk_refcnt, 1);
tcp_sk(child)->tsoffset = tsoff;
sock_rps_save_rxhash(child, skb);
+
+ if (rsk_drop_req(req)) {
+ refcount_set(&req->rsk_refcnt, 2);
+ return child;
+ }
+
if (inet_csk_reqsk_queue_add(sk, req, child))
return child;
@@ -276,6 +282,40 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt,
}
EXPORT_SYMBOL(cookie_ecn_ok);
+struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
+ struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct request_sock *req;
+
+#ifdef CONFIG_MPTCP
+ struct tcp_request_sock *treq;
+
+ if (sk_is_mptcp(sk))
+ ops = &mptcp_subflow_request_sock_ops;
+#endif
+
+ req = inet_reqsk_alloc(ops, sk, false);
+ if (!req)
+ return NULL;
+
+#if IS_ENABLED(CONFIG_MPTCP)
+ treq = tcp_rsk(req);
+ treq->is_mptcp = sk_is_mptcp(sk);
+ if (treq->is_mptcp) {
+ int err = mptcp_subflow_init_cookie_req(req, sk, skb);
+
+ if (err) {
+ reqsk_free(req);
+ return NULL;
+ }
+ }
+#endif
+
+ return req;
+}
+EXPORT_SYMBOL_GPL(cookie_tcp_reqsk_alloc);
+
/* On input, sk is a listener.
* Output is listener if incoming packet would not create a child
* NULL if memory could not be allocated.
@@ -326,7 +366,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
- req = inet_reqsk_alloc(&tcp_request_sock_ops, sk, false); /* for safety */
+ req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb);
if (!req)
goto out;
@@ -350,9 +390,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
treq->snt_synack = 0;
treq->tfo_listener = false;
- if (IS_ENABLED(CONFIG_MPTCP))
- treq->is_mptcp = 0;
-
if (IS_ENABLED(CONFIG_SMC))
ireq->smc_ok = 0;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6f0caf9a866d..c06d2bfd2ec4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2764,7 +2764,7 @@ static inline bool tcp_can_repair_sock(const struct sock *sk)
(sk->sk_state != TCP_LISTEN);
}
-static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len)
+static int tcp_repair_set_window(struct tcp_sock *tp, sockptr_t optbuf, int len)
{
struct tcp_repair_window opt;
@@ -2774,7 +2774,7 @@ static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int l
if (len != sizeof(opt))
return -EINVAL;
- if (copy_from_user(&opt, optbuf, sizeof(opt)))
+ if (copy_from_sockptr(&opt, optbuf, sizeof(opt)))
return -EFAULT;
if (opt.max_window < opt.snd_wnd)
@@ -2796,17 +2796,18 @@ static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int l
return 0;
}
-static int tcp_repair_options_est(struct sock *sk,
- struct tcp_repair_opt __user *optbuf, unsigned int len)
+static int tcp_repair_options_est(struct sock *sk, sockptr_t optbuf,
+ unsigned int len)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_repair_opt opt;
+ size_t offset = 0;
while (len >= sizeof(opt)) {
- if (copy_from_user(&opt, optbuf, sizeof(opt)))
+ if (copy_from_sockptr_offset(&opt, optbuf, offset, sizeof(opt)))
return -EFAULT;
- optbuf++;
+ offset += sizeof(opt);
len -= sizeof(opt);
switch (opt.opt_code) {
@@ -2960,7 +2961,7 @@ void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
}
EXPORT_SYMBOL(tcp_sock_set_user_timeout);
-static int __tcp_sock_set_keepidle(struct sock *sk, int val)
+int tcp_sock_set_keepidle_locked(struct sock *sk, int val)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2987,7 +2988,7 @@ int tcp_sock_set_keepidle(struct sock *sk, int val)
int err;
lock_sock(sk);
- err = __tcp_sock_set_keepidle(sk, val);
+ err = tcp_sock_set_keepidle_locked(sk, val);
release_sock(sk);
return err;
}
@@ -3020,8 +3021,8 @@ EXPORT_SYMBOL(tcp_sock_set_keepcnt);
/*
* Socket option code for TCP.
*/
-static int do_tcp_setsockopt(struct sock *sk, int level,
- int optname, char __user *optval, unsigned int optlen)
+static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -3037,7 +3038,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
if (optlen < 1)
return -EINVAL;
- val = strncpy_from_user(name, optval,
+ val = strncpy_from_sockptr(name, optval,
min_t(long, TCP_CA_NAME_MAX-1, optlen));
if (val < 0)
return -EFAULT;
@@ -3056,7 +3057,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
if (optlen < 1)
return -EINVAL;
- val = strncpy_from_user(name, optval,
+ val = strncpy_from_sockptr(name, optval,
min_t(long, TCP_ULP_NAME_MAX - 1,
optlen));
if (val < 0)
@@ -3079,7 +3080,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
optlen != TCP_FASTOPEN_KEY_BUF_LENGTH)
return -EINVAL;
- if (copy_from_user(key, optval, optlen))
+ if (copy_from_sockptr(key, optval, optlen))
return -EFAULT;
if (optlen == TCP_FASTOPEN_KEY_BUF_LENGTH)
@@ -3095,7 +3096,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *)optval))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
lock_sock(sk);
@@ -3174,9 +3175,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
if (!tp->repair)
err = -EINVAL;
else if (sk->sk_state == TCP_ESTABLISHED)
- err = tcp_repair_options_est(sk,
- (struct tcp_repair_opt __user *)optval,
- optlen);
+ err = tcp_repair_options_est(sk, optval, optlen);
else
err = -EPERM;
break;
@@ -3186,7 +3185,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
break;
case TCP_KEEPIDLE:
- err = __tcp_sock_set_keepidle(sk, val);
+ err = tcp_sock_set_keepidle_locked(sk, val);
break;
case TCP_KEEPINTVL:
if (val < 1 || val > MAX_TCP_KEEPINTVL)
@@ -3325,7 +3324,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
return err;
}
-int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
+int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
unsigned int optlen)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -3337,18 +3336,6 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
}
EXPORT_SYMBOL(tcp_setsockopt);
-#ifdef CONFIG_COMPAT
-int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
-{
- if (level != SOL_TCP)
- return inet_csk_compat_setsockopt(sk, level, optname,
- optval, optlen);
- return do_tcp_setsockopt(sk, level, optname, optval, optlen);
-}
-EXPORT_SYMBOL(compat_tcp_setsockopt);
-#endif
-
static void tcp_get_info_chrono_stats(const struct tcp_sock *tp,
struct tcp_info *info)
{
@@ -3514,10 +3501,12 @@ static size_t tcp_opt_stats_get_size(void)
nla_total_size(sizeof(u32)) + /* TCP_NLA_SRTT */
nla_total_size(sizeof(u16)) + /* TCP_NLA_TIMEOUT_REHASH */
nla_total_size(sizeof(u32)) + /* TCP_NLA_BYTES_NOTSENT */
+ nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_EDT */
0;
}
-struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
+struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
+ const struct sk_buff *orig_skb)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *stats;
@@ -3571,6 +3560,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, tp->timeout_rehash);
nla_put_u32(stats, TCP_NLA_BYTES_NOTSENT,
max_t(int, 0, tp->write_seq - tp->snd_nxt));
+ nla_put_u64_64bit(stats, TCP_NLA_EDT, orig_skb->skb_mstamp_ns,
+ TCP_NLA_PAD);
return stats;
}
@@ -3896,18 +3887,6 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
}
EXPORT_SYMBOL(tcp_getsockopt);
-#ifdef CONFIG_COMPAT
-int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
-{
- if (level != SOL_TCP)
- return inet_csk_compat_getsockopt(sk, level, optname,
- optval, optlen);
- return do_tcp_getsockopt(sk, level, optname, optval, optlen);
-}
-EXPORT_SYMBOL(compat_tcp_getsockopt);
-#endif
-
#ifdef CONFIG_TCP_MD5SIG
static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool);
static DEFINE_MUTEX(tcp_md5sig_mutex);
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index bfdfbb972c57..349069d6cd0a 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -2,7 +2,7 @@
/*
* Sally Floyd's High Speed TCP (RFC 3649) congestion control
*
- * See http://www.icir.org/floyd/hstcp.html
+ * See https://www.icir.org/floyd/hstcp.html
*
* John Heffner <jheffner@psc.edu>
*/
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 88e1f011afe0..55adcfcf96fe 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -4,7 +4,7 @@
* R.N.Shorten, D.J.Leith:
* "H-TCP: TCP for high-speed and long-distance networks"
* Proc. PFLDnet, Argonne, 2004.
- * http://www.hamilton.ie/net/htcp3.pdf
+ * https://www.hamilton.ie/net/htcp3.pdf
*/
#include <linux/mm.h>
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 518f04355fbf..184ea556f50e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -518,7 +518,7 @@ EXPORT_SYMBOL(tcp_initialize_rcv_mss);
*
* The algorithm for RTT estimation w/o timestamps is based on
* Dynamic Right-Sizing (DRS) by Wu Feng and Mike Fisk of LANL.
- * <http://public.lanl.gov/radiant/pubs.html#DRS>
+ * <https://public.lanl.gov/radiant/pubs.html#DRS>
*
* More detail on this code can be found at
* <http://staff.psc.edu/jheffner/>,
@@ -871,12 +871,41 @@ __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
}
+struct tcp_sacktag_state {
+ /* Timestamps for earliest and latest never-retransmitted segment
+ * that was SACKed. RTO needs the earliest RTT to stay conservative,
+ * but congestion control should still get an accurate delay signal.
+ */
+ u64 first_sackt;
+ u64 last_sackt;
+ u32 reord;
+ u32 sack_delivered;
+ int flag;
+ unsigned int mss_now;
+ struct rate_sample *rate;
+};
+
/* Take a notice that peer is sending D-SACKs */
-static void tcp_dsack_seen(struct tcp_sock *tp)
+static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq,
+ u32 end_seq, struct tcp_sacktag_state *state)
{
+ u32 seq_len, dup_segs = 1;
+
+ if (before(start_seq, end_seq)) {
+ seq_len = end_seq - start_seq;
+ if (seq_len > tp->mss_cache)
+ dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache);
+ }
+
tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
tp->rack.dsack_seen = 1;
- tp->dsack_dups++;
+ tp->dsack_dups += dup_segs;
+
+ state->flag |= FLAG_DSACKING_ACK;
+ /* A spurious retransmission is delivered */
+ state->sack_delivered += dup_segs;
+
+ return dup_segs;
}
/* It's reordering when higher sequence was delivered (i.e. sacked) before
@@ -962,6 +991,15 @@ void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb)
}
}
+/* Updates the delivered and delivered_ce counts */
+static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
+ bool ece_ack)
+{
+ tp->delivered += delivered;
+ if (ece_ack)
+ tp->delivered_ce += delivered;
+}
+
/* This procedure tags the retransmission queue when SACKs arrive.
*
* We have three tag bits: SACKED(S), RETRANS(R) and LOST(L).
@@ -1094,52 +1132,38 @@ static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
struct tcp_sack_block_wire *sp, int num_sacks,
- u32 prior_snd_una)
+ u32 prior_snd_una, struct tcp_sacktag_state *state)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
- bool dup_sack = false;
+ u32 dup_segs;
if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
- dup_sack = true;
- tcp_dsack_seen(tp);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
} else if (num_sacks > 1) {
u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
- if (!after(end_seq_0, end_seq_1) &&
- !before(start_seq_0, start_seq_1)) {
- dup_sack = true;
- tcp_dsack_seen(tp);
- NET_INC_STATS(sock_net(sk),
- LINUX_MIB_TCPDSACKOFORECV);
- }
+ if (after(end_seq_0, end_seq_1) || before(start_seq_0, start_seq_1))
+ return false;
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKOFORECV);
+ } else {
+ return false;
}
+ dup_segs = tcp_dsack_seen(tp, start_seq_0, end_seq_0, state);
+ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECVSEGS, dup_segs);
+
/* D-SACK for already forgotten data... Do dumb counting. */
- if (dup_sack && tp->undo_marker && tp->undo_retrans > 0 &&
+ if (tp->undo_marker && tp->undo_retrans > 0 &&
!after(end_seq_0, prior_snd_una) &&
after(end_seq_0, tp->undo_marker))
- tp->undo_retrans--;
+ tp->undo_retrans = max_t(int, 0, tp->undo_retrans - dup_segs);
- return dup_sack;
+ return true;
}
-struct tcp_sacktag_state {
- u32 reord;
- /* Timestamps for earliest and latest never-retransmitted segment
- * that was SACKed. RTO needs the earliest RTT to stay conservative,
- * but congestion control should still get an accurate delay signal.
- */
- u64 first_sackt;
- u64 last_sackt;
- struct rate_sample *rate;
- int flag;
- unsigned int mss_now;
-};
-
/* Check if skb is fully within the SACK block. In presence of GSO skbs,
* the incoming SACK may not exactly match but we can find smaller MSS
* aligned portion of it that matches. Therefore we might need to fragment
@@ -1258,7 +1282,8 @@ static u8 tcp_sacktag_one(struct sock *sk,
sacked |= TCPCB_SACKED_ACKED;
state->flag |= FLAG_DATA_SACKED;
tp->sacked_out += pcount;
- tp->delivered += pcount; /* Out-of-order packets delivered */
+ /* Out-of-order packets delivered */
+ state->sack_delivered += pcount;
/* Lost marker hint past SACKed? Tweak RFC3517 cnt */
if (tp->lost_skb_hint &&
@@ -1681,11 +1706,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
tcp_highest_sack_reset(sk);
found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
- num_sacks, prior_snd_una);
- if (found_dup_sack) {
- state->flag |= FLAG_DSACKING_ACK;
- tp->delivered++; /* A spurious retransmission is delivered */
- }
+ num_sacks, prior_snd_una, state);
/* Eliminate too old ACKs, but take into
* account more or less fresh ones, they can
@@ -1893,7 +1914,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
/* Emulate SACKs for SACKless connection: account for a new dupack. */
-static void tcp_add_reno_sack(struct sock *sk, int num_dupack)
+static void tcp_add_reno_sack(struct sock *sk, int num_dupack, bool ece_ack)
{
if (num_dupack) {
struct tcp_sock *tp = tcp_sk(sk);
@@ -1904,20 +1925,21 @@ static void tcp_add_reno_sack(struct sock *sk, int num_dupack)
tcp_check_reno_reordering(sk, 0);
delivered = tp->sacked_out - prior_sacked;
if (delivered > 0)
- tp->delivered += delivered;
+ tcp_count_delivered(tp, delivered, ece_ack);
tcp_verify_left_out(tp);
}
}
/* Account for ACK, ACKing some data in Reno Recovery phase. */
-static void tcp_remove_reno_sacks(struct sock *sk, int acked)
+static void tcp_remove_reno_sacks(struct sock *sk, int acked, bool ece_ack)
{
struct tcp_sock *tp = tcp_sk(sk);
if (acked > 0) {
/* One ACK acked hole. The rest eat duplicate ACKs. */
- tp->delivered += max_t(int, acked - tp->sacked_out, 1);
+ tcp_count_delivered(tp, max_t(int, acked - tp->sacked_out, 1),
+ ece_ack);
if (acked - 1 >= tp->sacked_out)
tp->sacked_out = 0;
else
@@ -2697,7 +2719,7 @@ static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
* delivered. Lower inflight to clock out (re)tranmissions.
*/
if (after(tp->snd_nxt, tp->high_seq) && num_dupack)
- tcp_add_reno_sack(sk, num_dupack);
+ tcp_add_reno_sack(sk, num_dupack, flag & FLAG_ECE);
else if (flag & FLAG_SND_UNA_ADVANCED)
tcp_reset_reno_sack(tp);
}
@@ -2779,6 +2801,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int fast_rexmit = 0, flag = *ack_flag;
+ bool ece_ack = flag & FLAG_ECE;
bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) &&
tcp_force_fast_retransmit(sk));
@@ -2787,7 +2810,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
/* Now state machine starts.
* A. ECE, hence prohibit cwnd undoing, the reduction is required. */
- if (flag & FLAG_ECE)
+ if (ece_ack)
tp->prior_ssthresh = 0;
/* B. In all the states check for reneging SACKs. */
@@ -2828,7 +2851,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
case TCP_CA_Recovery:
if (!(flag & FLAG_SND_UNA_ADVANCED)) {
if (tcp_is_reno(tp))
- tcp_add_reno_sack(sk, num_dupack);
+ tcp_add_reno_sack(sk, num_dupack, ece_ack);
} else {
if (tcp_try_undo_partial(sk, prior_snd_una))
return;
@@ -2853,7 +2876,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
if (tcp_is_reno(tp)) {
if (flag & FLAG_SND_UNA_ADVANCED)
tcp_reset_reno_sack(tp);
- tcp_add_reno_sack(sk, num_dupack);
+ tcp_add_reno_sack(sk, num_dupack, ece_ack);
}
if (icsk->icsk_ca_state <= TCP_CA_Disorder)
@@ -2877,7 +2900,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
}
/* Otherwise enter Recovery state */
- tcp_enter_recovery(sk, (flag & FLAG_ECE));
+ tcp_enter_recovery(sk, ece_ack);
fast_rexmit = 1;
}
@@ -2927,6 +2950,8 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+ if (!delta)
+ delta = 1;
seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
ca_rtt_us = seq_rtt_us;
}
@@ -3053,7 +3078,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
*/
static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
u32 prior_snd_una,
- struct tcp_sacktag_state *sack)
+ struct tcp_sacktag_state *sack, bool ece_ack)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
u64 first_ackt, last_ackt;
@@ -3078,8 +3103,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
u8 sacked = scb->sacked;
u32 acked_pcount;
- tcp_ack_tstamp(sk, skb, prior_snd_una);
-
/* Determine how many packets and what bytes were acked, tso and else */
if (after(scb->end_seq, tp->snd_una)) {
if (tcp_skb_pcount(skb) == 1 ||
@@ -3114,7 +3137,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
if (sacked & TCPCB_SACKED_ACKED) {
tp->sacked_out -= acked_pcount;
} else if (tcp_is_sack(tp)) {
- tp->delivered += acked_pcount;
+ tcp_count_delivered(tp, acked_pcount, ece_ack);
if (!tcp_skb_spurious_retrans(tp, skb))
tcp_rack_advance(tp, sacked, scb->end_seq,
tcp_skb_timestamp_us(skb));
@@ -3143,6 +3166,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
if (!fully_acked)
break;
+ tcp_ack_tstamp(sk, skb, prior_snd_una);
+
next = skb_rb_next(skb);
if (unlikely(skb == tp->retransmit_skb_hint))
tp->retransmit_skb_hint = NULL;
@@ -3158,8 +3183,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
tp->snd_up = tp->snd_una;
- if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
- flag |= FLAG_SACK_RENEGING;
+ if (skb) {
+ tcp_ack_tstamp(sk, skb, prior_snd_una);
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
+ flag |= FLAG_SACK_RENEGING;
+ }
if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
@@ -3191,7 +3219,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
}
if (tcp_is_reno(tp)) {
- tcp_remove_reno_sacks(sk, pkts_acked);
+ tcp_remove_reno_sacks(sk, pkts_acked, ece_ack);
/* If any of the cumulatively ACKed segments was
* retransmitted, non-SACK case cannot confirm that
@@ -3559,10 +3587,9 @@ static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag)
delivered = tp->delivered - prior_delivered;
NET_ADD_STATS(net, LINUX_MIB_TCPDELIVERED, delivered);
- if (flag & FLAG_ECE) {
- tp->delivered_ce += delivered;
+ if (flag & FLAG_ECE)
NET_ADD_STATS(net, LINUX_MIB_TCPDELIVEREDCE, delivered);
- }
+
return delivered;
}
@@ -3586,6 +3613,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
sack_state.first_sackt = 0;
sack_state.rate = &rs;
+ sack_state.sack_delivered = 0;
/* We very likely will need to access rtx queue. */
prefetch(sk->tcp_rtx_queue.rb_node);
@@ -3661,6 +3689,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
ack_ev_flags |= CA_ACK_ECE;
}
+ if (sack_state.sack_delivered)
+ tcp_count_delivered(tp, sack_state.sack_delivered,
+ flag & FLAG_ECE);
+
if (flag & FLAG_WIN_UPDATE)
ack_ev_flags |= CA_ACK_WIN_UPDATE;
@@ -3686,7 +3718,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
goto no_queue;
/* See if we can take anything off of the retransmit queue. */
- flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state);
+ flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state,
+ flag & FLAG_ECE);
tcp_rack_update_reo_wnd(sk, &rs);
@@ -4427,7 +4460,6 @@ static void tcp_sack_remove(struct tcp_sock *tp)
/**
* tcp_try_coalesce - try to merge skb to prior one
* @sk: socket
- * @dest: destination queue
* @to: prior buffer
* @from: buffer to add in queue
* @fragstolen: pointer to boolean
@@ -6489,7 +6521,6 @@ static void tcp_openreq_init(struct request_sock *req,
struct inet_request_sock *ireq = inet_rsk(req);
req->rsk_rcv_wnd = 0; /* So that tcp_send_synack() knows! */
- req->cookie_ts = 0;
tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
tcp_rsk(req)->snt_synack = 0;
@@ -6644,6 +6675,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
if (!req)
goto drop;
+ req->syncookie = want_cookie;
tcp_rsk(req)->af_specific = af_ops;
tcp_rsk(req)->ts_off = 0;
#if IS_ENABLED(CONFIG_MPTCP)
@@ -6671,9 +6703,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
af_ops->init_req(req, sk, skb);
- if (IS_ENABLED(CONFIG_MPTCP) && want_cookie)
- tcp_rsk(req)->is_mptcp = 0;
-
if (security_inet_conn_request(sk, skb, req))
goto drop_and_free;
@@ -6709,7 +6738,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
if (want_cookie) {
isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
- req->cookie_ts = tmp_opt.tstamp_ok;
if (!tmp_opt.tstamp_ok)
inet_rsk(req)->ecn_ok = 0;
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 04bfcbbfee83..5084333b5ab6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -76,6 +76,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/inetdevice.h>
+#include <linux/btf_ids.h>
#include <crypto/hash.h>
#include <linux/scatterlist.h>
@@ -1194,7 +1195,7 @@ static void tcp_clear_md5_list(struct sock *sk)
}
static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
- char __user *optval, int optlen)
+ sockptr_t optval, int optlen)
{
struct tcp_md5sig cmd;
struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
@@ -1205,7 +1206,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
if (optlen < sizeof(cmd))
return -EINVAL;
- if (copy_from_user(&cmd, optval, sizeof(cmd)))
+ if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
return -EFAULT;
if (sin->sin_family != AF_INET)
@@ -2134,10 +2135,6 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
.getsockopt = ip_getsockopt,
.addr2sockaddr = inet_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in),
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_ip_setsockopt,
- .compat_getsockopt = compat_ip_getsockopt,
-#endif
.mtu_reduced = tcp_v4_mtu_reduced,
};
EXPORT_SYMBOL(ipv4_specific);
@@ -2223,13 +2220,18 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
*/
static void *listening_get_next(struct seq_file *seq, void *cur)
{
- struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
+ struct tcp_seq_afinfo *afinfo;
struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq);
struct inet_listen_hashbucket *ilb;
struct hlist_nulls_node *node;
struct sock *sk = cur;
+ if (st->bpf_seq_afinfo)
+ afinfo = st->bpf_seq_afinfo;
+ else
+ afinfo = PDE_DATA(file_inode(seq->file));
+
if (!sk) {
get_head:
ilb = &tcp_hashinfo.listening_hash[st->bucket];
@@ -2247,7 +2249,8 @@ get_sk:
sk_nulls_for_each_from(sk, node) {
if (!net_eq(sock_net(sk), net))
continue;
- if (sk->sk_family == afinfo->family)
+ if (afinfo->family == AF_UNSPEC ||
+ sk->sk_family == afinfo->family)
return sk;
}
spin_unlock(&ilb->lock);
@@ -2284,11 +2287,16 @@ static inline bool empty_bucket(const struct tcp_iter_state *st)
*/
static void *established_get_first(struct seq_file *seq)
{
- struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
+ struct tcp_seq_afinfo *afinfo;
struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq);
void *rc = NULL;
+ if (st->bpf_seq_afinfo)
+ afinfo = st->bpf_seq_afinfo;
+ else
+ afinfo = PDE_DATA(file_inode(seq->file));
+
st->offset = 0;
for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
struct sock *sk;
@@ -2301,7 +2309,8 @@ static void *established_get_first(struct seq_file *seq)
spin_lock_bh(lock);
sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
- if (sk->sk_family != afinfo->family ||
+ if ((afinfo->family != AF_UNSPEC &&
+ sk->sk_family != afinfo->family) ||
!net_eq(sock_net(sk), net)) {
continue;
}
@@ -2316,19 +2325,25 @@ out:
static void *established_get_next(struct seq_file *seq, void *cur)
{
- struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
+ struct tcp_seq_afinfo *afinfo;
struct sock *sk = cur;
struct hlist_nulls_node *node;
struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq);
+ if (st->bpf_seq_afinfo)
+ afinfo = st->bpf_seq_afinfo;
+ else
+ afinfo = PDE_DATA(file_inode(seq->file));
+
++st->num;
++st->offset;
sk = sk_nulls_next(sk);
sk_nulls_for_each_from(sk, node) {
- if (sk->sk_family == afinfo->family &&
+ if ((afinfo->family == AF_UNSPEC ||
+ sk->sk_family == afinfo->family) &&
net_eq(sock_net(sk), net))
return sk;
}
@@ -2607,6 +2622,74 @@ out:
return 0;
}
+#ifdef CONFIG_BPF_SYSCALL
+struct bpf_iter__tcp {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct sock_common *, sk_common);
+ uid_t uid __aligned(8);
+};
+
+static int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
+ struct sock_common *sk_common, uid_t uid)
+{
+ struct bpf_iter__tcp ctx;
+
+ meta->seq_num--; /* skip SEQ_START_TOKEN */
+ ctx.meta = meta;
+ ctx.sk_common = sk_common;
+ ctx.uid = uid;
+ return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+ struct sock *sk = v;
+ uid_t uid;
+
+ if (v == SEQ_START_TOKEN)
+ return 0;
+
+ if (sk->sk_state == TCP_TIME_WAIT) {
+ uid = 0;
+ } else if (sk->sk_state == TCP_NEW_SYN_RECV) {
+ const struct request_sock *req = v;
+
+ uid = from_kuid_munged(seq_user_ns(seq),
+ sock_i_uid(req->rsk_listener));
+ } else {
+ uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
+ }
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, false);
+ return tcp_prog_seq_show(prog, &meta, v, uid);
+}
+
+static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+
+ if (!v) {
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, true);
+ if (prog)
+ (void)tcp_prog_seq_show(prog, &meta, v, 0);
+ }
+
+ tcp_seq_stop(seq, v);
+}
+
+static const struct seq_operations bpf_iter_tcp_seq_ops = {
+ .show = bpf_iter_tcp_seq_show,
+ .start = tcp_seq_start,
+ .next = tcp_seq_next,
+ .stop = bpf_iter_tcp_seq_stop,
+};
+#endif
+
static const struct seq_operations tcp4_seq_ops = {
.show = tcp4_seq_show,
.start = tcp_seq_start,
@@ -2687,10 +2770,6 @@ struct proto tcp_prot = {
.rsk_prot = &tcp_request_sock_ops,
.h.hashinfo = &tcp_hashinfo,
.no_autobind = true,
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_tcp_setsockopt,
- .compat_getsockopt = compat_tcp_getsockopt,
-#endif
.diag_destroy = tcp_abort,
};
EXPORT_SYMBOL(tcp_prot);
@@ -2838,8 +2917,68 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {
.exit_batch = tcp_sk_exit_batch,
};
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+DEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta,
+ struct sock_common *sk_common, uid_t uid)
+
+static int bpf_iter_init_tcp(void *priv_data, struct bpf_iter_aux_info *aux)
+{
+ struct tcp_iter_state *st = priv_data;
+ struct tcp_seq_afinfo *afinfo;
+ int ret;
+
+ afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN);
+ if (!afinfo)
+ return -ENOMEM;
+
+ afinfo->family = AF_UNSPEC;
+ st->bpf_seq_afinfo = afinfo;
+ ret = bpf_iter_init_seq_net(priv_data, aux);
+ if (ret)
+ kfree(afinfo);
+ return ret;
+}
+
+static void bpf_iter_fini_tcp(void *priv_data)
+{
+ struct tcp_iter_state *st = priv_data;
+
+ kfree(st->bpf_seq_afinfo);
+ bpf_iter_fini_seq_net(priv_data);
+}
+
+static const struct bpf_iter_seq_info tcp_seq_info = {
+ .seq_ops = &bpf_iter_tcp_seq_ops,
+ .init_seq_private = bpf_iter_init_tcp,
+ .fini_seq_private = bpf_iter_fini_tcp,
+ .seq_priv_size = sizeof(struct tcp_iter_state),
+};
+
+static struct bpf_iter_reg tcp_reg_info = {
+ .target = "tcp",
+ .ctx_arg_info_size = 1,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__tcp, sk_common),
+ PTR_TO_BTF_ID_OR_NULL },
+ },
+ .seq_info = &tcp_seq_info,
+};
+
+static void __init bpf_iter_register(void)
+{
+ tcp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON];
+ if (bpf_iter_reg_target(&tcp_reg_info))
+ pr_warn("Warning: could not register bpf iterator tcp\n");
+}
+
+#endif
+
void __init tcp_v4_init(void)
{
if (register_pernet_subsys(&tcp_sk_ops))
panic("Failed to create the TCP control socket.\n");
+
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+ bpf_iter_register();
+#endif
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0bc05d68cd74..85ff417bda7f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1066,6 +1066,10 @@ static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb,
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
}
+INDIRECT_CALLABLE_DECLARE(int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl));
+INDIRECT_CALLABLE_DECLARE(int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl));
+INDIRECT_CALLABLE_DECLARE(void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb));
+
/* This routine actually transmits TCP packets queued in by
* tcp_do_sendmsg(). This is used by both the initial
* transmission and possible later retransmissions.
@@ -1209,7 +1213,9 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
}
#endif
- icsk->icsk_af_ops->send_check(sk, skb);
+ INDIRECT_CALL_INET(icsk->icsk_af_ops->send_check,
+ tcp_v6_send_check, tcp_v4_send_check,
+ sk, skb);
if (likely(tcb->tcp_flags & TCPHDR_ACK))
tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
@@ -1237,7 +1243,9 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
tcp_add_tx_delay(skb, tp);
- err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
+ err = INDIRECT_CALL_INET(icsk->icsk_af_ops->queue_xmit,
+ inet6_csk_xmit, ip_queue_xmit,
+ sk, skb, &inet->cork.fl);
if (unlikely(err > 0)) {
tcp_enter_cwr(sk);
@@ -3332,6 +3340,8 @@ int tcp_send_synack(struct sock *sk)
* sk: listener socket
* dst: dst entry attached to the SYNACK
* req: request_sock pointer
+ * foc: cookie for tcp fast open
+ * synack_type: Type of synback to prepare
*
* Allocate one skb and build a SYNACK packet.
* @dst is consumed : Caller should not use it again.
@@ -3383,7 +3393,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
memset(&opts, 0, sizeof(opts));
now = tcp_clock_ns();
#ifdef CONFIG_SYN_COOKIES
- if (unlikely(req->cookie_ts))
+ if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok))
skb->skb_mstamp_ns = cookie_init_timestamp(req, now);
else
#endif
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index ada046f425d2..0c08c420fbc2 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -314,7 +314,7 @@ out:
/**
* tcp_delack_timer() - The TCP delayed ACK timeout handler
- * @data: Pointer to the current socket. (gets casted to struct sock *)
+ * @t: Pointer to the timer. (gets casted to struct sock *)
*
* This function gets (indirectly) called when the kernel timer for a TCP packet
* of this socket expires. Calls tcp_delack_timer_handler() to do the actual work.
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 50a9a6e2c4cd..cd50a61c9976 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -7,7 +7,7 @@
* "TCP Veno: TCP Enhancement for Transmission over Wireless Access Networks."
* IEEE Journal on Selected Areas in Communication,
* Feb. 2003.
- * See http://www.ie.cuhk.edu.hk/fileadmin/staff_upload/soung/Journal/J3.pdf
+ * See https://www.ie.cuhk.edu.hk/fileadmin/staff_upload/soung/Journal/J3.pdf
*/
#include <linux/mm.h>
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index c4b2ccbeba04..e44aaf41a138 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -110,6 +110,33 @@ drop:
return 0;
}
+#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL)
+static int tunnel4_rcv_cb(struct sk_buff *skb, u8 proto, int err)
+{
+ struct xfrm_tunnel __rcu *head;
+ struct xfrm_tunnel *handler;
+ int ret;
+
+ head = (proto == IPPROTO_IPIP) ? tunnel4_handlers : tunnel64_handlers;
+
+ for_each_tunnel_rcu(head, handler) {
+ if (handler->cb_handler) {
+ ret = handler->cb_handler(skb, err);
+ if (ret <= 0)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static const struct xfrm_input_afinfo tunnel4_input_afinfo = {
+ .family = AF_INET,
+ .is_ipip = true,
+ .callback = tunnel4_rcv_cb,
+};
+#endif
+
#if IS_ENABLED(CONFIG_IPV6)
static int tunnel64_rcv(struct sk_buff *skb)
{
@@ -231,6 +258,18 @@ static int __init tunnel4_init(void)
goto err;
}
#endif
+#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL)
+ if (xfrm_input_register_afinfo(&tunnel4_input_afinfo)) {
+ inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP);
+#if IS_ENABLED(CONFIG_IPV6)
+ inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6);
+#endif
+#if IS_ENABLED(CONFIG_MPLS)
+ inet_del_protocol(&tunnelmpls4_protocol, IPPROTO_MPLS);
+#endif
+ goto err;
+ }
+#endif
return 0;
err:
@@ -240,6 +279,10 @@ err:
static void __exit tunnel4_fini(void)
{
+#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL)
+ if (xfrm_input_unregister_afinfo(&tunnel4_input_afinfo))
+ pr_err("tunnel4 close: can't remove input afinfo\n");
+#endif
#if IS_ENABLED(CONFIG_MPLS)
if (inet_del_protocol(&tunnelmpls4_protocol, IPPROTO_MPLS))
pr_err("tunnelmpls4 close: can't remove protocol\n");
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4077d589b72e..e88efba07551 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -106,6 +106,7 @@
#include <net/xfrm.h>
#include <trace/events/udp.h>
#include <linux/static_key.h>
+#include <linux/btf_ids.h>
#include <trace/events/skb.h>
#include <net/busy_poll.h>
#include "udp_impl.h"
@@ -408,6 +409,22 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
udp_ehash_secret + net_hash_mix(net));
}
+static struct sock *lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, unsigned short hnum)
+{
+ struct sock *reuse_sk = NULL;
+ u32 hash;
+
+ if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) {
+ hash = udp_ehashfn(net, daddr, hnum, saddr, sport);
+ reuse_sk = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ }
+ return reuse_sk;
+}
+
/* called with rcu_read_lock() */
static struct sock *udp4_lib_lookup2(struct net *net,
__be32 saddr, __be16 sport,
@@ -416,9 +433,8 @@ static struct sock *udp4_lib_lookup2(struct net *net,
struct udp_hslot *hslot2,
struct sk_buff *skb)
{
- struct sock *sk, *result, *reuseport_result;
+ struct sock *sk, *result;
int score, badness;
- u32 hash = 0;
result = NULL;
badness = 0;
@@ -426,25 +442,42 @@ static struct sock *udp4_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
- reuseport_result = NULL;
-
- if (sk->sk_reuseport &&
- sk->sk_state != TCP_ESTABLISHED) {
- hash = udp_ehashfn(net, daddr, hnum,
- saddr, sport);
- reuseport_result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (reuseport_result && !reuseport_has_conns(sk, false))
- return reuseport_result;
- }
+ result = lookup_reuseport(net, sk, skb,
+ saddr, sport, daddr, hnum);
+ /* Fall back to scoring if group has connections */
+ if (result && !reuseport_has_conns(sk, false))
+ return result;
- result = reuseport_result ? : sk;
+ result = result ? : sk;
badness = score;
}
}
return result;
}
+static struct sock *udp4_lookup_run_bpf(struct net *net,
+ struct udp_table *udptable,
+ struct sk_buff *skb,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, u16 hnum)
+{
+ struct sock *sk, *reuse_sk;
+ bool no_reuseport;
+
+ if (udptable != &udp_table)
+ return NULL; /* only UDP is supported */
+
+ no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP,
+ saddr, sport, daddr, hnum, &sk);
+ if (no_reuseport || IS_ERR_OR_NULL(sk))
+ return sk;
+
+ reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
+ if (reuse_sk)
+ sk = reuse_sk;
+ return sk;
+}
+
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
@@ -452,27 +485,45 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
__be16 sport, __be32 daddr, __be16 dport, int dif,
int sdif, struct udp_table *udptable, struct sk_buff *skb)
{
- struct sock *result;
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2;
struct udp_hslot *hslot2;
+ struct sock *result, *sk;
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
+ /* Lookup connected or non-wildcard socket */
result = udp4_lib_lookup2(net, saddr, sport,
daddr, hnum, dif, sdif,
hslot2, skb);
- if (!result) {
- hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
- slot2 = hash2 & udptable->mask;
- hslot2 = &udptable->hash2[slot2];
-
- result = udp4_lib_lookup2(net, saddr, sport,
- htonl(INADDR_ANY), hnum, dif, sdif,
- hslot2, skb);
+ if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED)
+ goto done;
+
+ /* Lookup redirect from BPF */
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
+ sk = udp4_lookup_run_bpf(net, udptable, skb,
+ saddr, sport, daddr, hnum);
+ if (sk) {
+ result = sk;
+ goto done;
+ }
}
+
+ /* Got non-wildcard socket or error on first lookup */
+ if (result)
+ goto done;
+
+ /* Lookup wildcard sockets */
+ hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
+ slot2 = hash2 & udptable->mask;
+ hslot2 = &udptable->hash2[slot2];
+
+ result = udp4_lib_lookup2(net, saddr, sport,
+ htonl(INADDR_ANY), hnum, dif, sdif,
+ hslot2, skb);
+done:
if (IS_ERR(result))
return NULL;
return result;
@@ -2535,7 +2586,7 @@ void udp_destroy_sock(struct sock *sk)
* Socket option code for UDP
*/
int udp_lib_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen,
+ sockptr_t optval, unsigned int optlen,
int (*push_pending_frames)(struct sock *))
{
struct udp_sock *up = udp_sk(sk);
@@ -2546,7 +2597,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *)optval))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
valbool = val ? 1 : 0;
@@ -2650,26 +2701,16 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
}
EXPORT_SYMBOL(udp_lib_setsockopt);
-int udp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+ unsigned int optlen)
{
if (level == SOL_UDP || level == SOL_UDPLITE)
- return udp_lib_setsockopt(sk, level, optname, optval, optlen,
+ return udp_lib_setsockopt(sk, level, optname,
+ optval, optlen,
udp_push_pending_frames);
return ip_setsockopt(sk, level, optname, optval, optlen);
}
-#ifdef CONFIG_COMPAT
-int compat_udp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
-{
- if (level == SOL_UDP || level == SOL_UDPLITE)
- return udp_lib_setsockopt(sk, level, optname, optval, optlen,
- udp_push_pending_frames);
- return compat_ip_setsockopt(sk, level, optname, optval, optlen);
-}
-#endif
-
int udp_lib_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -2735,20 +2776,11 @@ int udp_getsockopt(struct sock *sk, int level, int optname,
return ip_getsockopt(sk, level, optname, optval, optlen);
}
-#ifdef CONFIG_COMPAT
-int compat_udp_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
-{
- if (level == SOL_UDP || level == SOL_UDPLITE)
- return udp_lib_getsockopt(sk, level, optname, optval, optlen);
- return compat_ip_getsockopt(sk, level, optname, optval, optlen);
-}
-#endif
/**
* udp_poll - wait for a UDP event.
- * @file - file struct
- * @sock - socket
- * @wait - poll table
+ * @file: - file struct
+ * @sock: - socket
+ * @wait: - poll table
*
* This is same as datagram poll, except for the special case of
* blocking sockets. If application is using a blocking fd
@@ -2815,10 +2847,6 @@ struct proto udp_prot = {
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp_sock),
.h.udp_table = &udp_table,
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_udp_setsockopt,
- .compat_getsockopt = compat_udp_getsockopt,
-#endif
.diag_destroy = udp_abort,
};
EXPORT_SYMBOL(udp_prot);
@@ -2829,10 +2857,15 @@ EXPORT_SYMBOL(udp_prot);
static struct sock *udp_get_first(struct seq_file *seq, int start)
{
struct sock *sk;
- struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
+ struct udp_seq_afinfo *afinfo;
struct udp_iter_state *state = seq->private;
struct net *net = seq_file_net(seq);
+ if (state->bpf_seq_afinfo)
+ afinfo = state->bpf_seq_afinfo;
+ else
+ afinfo = PDE_DATA(file_inode(seq->file));
+
for (state->bucket = start; state->bucket <= afinfo->udp_table->mask;
++state->bucket) {
struct udp_hslot *hslot = &afinfo->udp_table->hash[state->bucket];
@@ -2844,7 +2877,8 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
sk_for_each(sk, &hslot->head) {
if (!net_eq(sock_net(sk), net))
continue;
- if (sk->sk_family == afinfo->family)
+ if (afinfo->family == AF_UNSPEC ||
+ sk->sk_family == afinfo->family)
goto found;
}
spin_unlock_bh(&hslot->lock);
@@ -2856,13 +2890,20 @@ found:
static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
{
- struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
+ struct udp_seq_afinfo *afinfo;
struct udp_iter_state *state = seq->private;
struct net *net = seq_file_net(seq);
+ if (state->bpf_seq_afinfo)
+ afinfo = state->bpf_seq_afinfo;
+ else
+ afinfo = PDE_DATA(file_inode(seq->file));
+
do {
sk = sk_next(sk);
- } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != afinfo->family));
+ } while (sk && (!net_eq(sock_net(sk), net) ||
+ (afinfo->family != AF_UNSPEC &&
+ sk->sk_family != afinfo->family)));
if (!sk) {
if (state->bucket <= afinfo->udp_table->mask)
@@ -2907,9 +2948,14 @@ EXPORT_SYMBOL(udp_seq_next);
void udp_seq_stop(struct seq_file *seq, void *v)
{
- struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
+ struct udp_seq_afinfo *afinfo;
struct udp_iter_state *state = seq->private;
+ if (state->bpf_seq_afinfo)
+ afinfo = state->bpf_seq_afinfo;
+ else
+ afinfo = PDE_DATA(file_inode(seq->file));
+
if (state->bucket <= afinfo->udp_table->mask)
spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
}
@@ -2953,6 +2999,67 @@ int udp4_seq_show(struct seq_file *seq, void *v)
return 0;
}
+#ifdef CONFIG_BPF_SYSCALL
+struct bpf_iter__udp {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct udp_sock *, udp_sk);
+ uid_t uid __aligned(8);
+ int bucket __aligned(8);
+};
+
+static int udp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
+ struct udp_sock *udp_sk, uid_t uid, int bucket)
+{
+ struct bpf_iter__udp ctx;
+
+ meta->seq_num--; /* skip SEQ_START_TOKEN */
+ ctx.meta = meta;
+ ctx.udp_sk = udp_sk;
+ ctx.uid = uid;
+ ctx.bucket = bucket;
+ return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int bpf_iter_udp_seq_show(struct seq_file *seq, void *v)
+{
+ struct udp_iter_state *state = seq->private;
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+ struct sock *sk = v;
+ uid_t uid;
+
+ if (v == SEQ_START_TOKEN)
+ return 0;
+
+ uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, false);
+ return udp_prog_seq_show(prog, &meta, v, uid, state->bucket);
+}
+
+static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+
+ if (!v) {
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, true);
+ if (prog)
+ (void)udp_prog_seq_show(prog, &meta, v, 0, 0);
+ }
+
+ udp_seq_stop(seq, v);
+}
+
+static const struct seq_operations bpf_iter_udp_seq_ops = {
+ .start = udp_seq_start,
+ .next = udp_seq_next,
+ .stop = bpf_iter_udp_seq_stop,
+ .show = bpf_iter_udp_seq_show,
+};
+#endif
+
const struct seq_operations udp_seq_ops = {
.start = udp_seq_start,
.next = udp_seq_next,
@@ -3070,6 +3177,62 @@ static struct pernet_operations __net_initdata udp_sysctl_ops = {
.init = udp_sysctl_init,
};
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+DEFINE_BPF_ITER_FUNC(udp, struct bpf_iter_meta *meta,
+ struct udp_sock *udp_sk, uid_t uid, int bucket)
+
+static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux)
+{
+ struct udp_iter_state *st = priv_data;
+ struct udp_seq_afinfo *afinfo;
+ int ret;
+
+ afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN);
+ if (!afinfo)
+ return -ENOMEM;
+
+ afinfo->family = AF_UNSPEC;
+ afinfo->udp_table = &udp_table;
+ st->bpf_seq_afinfo = afinfo;
+ ret = bpf_iter_init_seq_net(priv_data, aux);
+ if (ret)
+ kfree(afinfo);
+ return ret;
+}
+
+static void bpf_iter_fini_udp(void *priv_data)
+{
+ struct udp_iter_state *st = priv_data;
+
+ kfree(st->bpf_seq_afinfo);
+ bpf_iter_fini_seq_net(priv_data);
+}
+
+static const struct bpf_iter_seq_info udp_seq_info = {
+ .seq_ops = &bpf_iter_udp_seq_ops,
+ .init_seq_private = bpf_iter_init_udp,
+ .fini_seq_private = bpf_iter_fini_udp,
+ .seq_priv_size = sizeof(struct udp_iter_state),
+};
+
+static struct bpf_iter_reg udp_reg_info = {
+ .target = "udp",
+ .ctx_arg_info_size = 1,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__udp, udp_sk),
+ PTR_TO_BTF_ID_OR_NULL },
+ },
+ .seq_info = &udp_seq_info,
+};
+
+static void __init bpf_iter_register(void)
+{
+ udp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UDP];
+ if (bpf_iter_reg_target(&udp_reg_info))
+ pr_warn("Warning: could not register bpf iterator udp\n");
+}
+#endif
+
void __init udp_init(void)
{
unsigned long limit;
@@ -3095,4 +3258,8 @@ void __init udp_init(void)
if (register_pernet_subsys(&udp_sysctl_ops))
panic("UDP: failed to init sysctl parameters.\n");
+
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+ bpf_iter_register();
+#endif
}
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index 6b2fa77eeb1c..2878d8285caf 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -12,17 +12,11 @@ int __udp4_lib_err(struct sk_buff *, u32, struct udp_table *);
int udp_v4_get_port(struct sock *sk, unsigned short snum);
void udp_v4_rehash(struct sock *sk);
-int udp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
+int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+ unsigned int optlen);
int udp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
-#ifdef CONFIG_COMPAT
-int compat_udp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
-int compat_udp_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen);
-#endif
int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
int flags, int *addr_len);
int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel_core.c
index 3eecba0874aa..3eecba0874aa 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel_core.c
diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
new file mode 100644
index 000000000000..69962165c0e8
--- /dev/null
+++ b/net/ipv4/udp_tunnel_nic.c
@@ -0,0 +1,897 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (c) 2020 Facebook Inc.
+
+#include <linux/ethtool_netlink.h>
+#include <linux/netdevice.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <net/udp_tunnel.h>
+#include <net/vxlan.h>
+
+enum udp_tunnel_nic_table_entry_flags {
+ UDP_TUNNEL_NIC_ENTRY_ADD = BIT(0),
+ UDP_TUNNEL_NIC_ENTRY_DEL = BIT(1),
+ UDP_TUNNEL_NIC_ENTRY_OP_FAIL = BIT(2),
+ UDP_TUNNEL_NIC_ENTRY_FROZEN = BIT(3),
+};
+
+struct udp_tunnel_nic_table_entry {
+ __be16 port;
+ u8 type;
+ u8 use_cnt;
+ u8 flags;
+ u8 hw_priv;
+};
+
+/**
+ * struct udp_tunnel_nic - UDP tunnel port offload state
+ * @work: async work for talking to hardware from process context
+ * @dev: netdev pointer
+ * @need_sync: at least one port start changed
+ * @need_replay: space was freed, we need a replay of all ports
+ * @work_pending: @work is currently scheduled
+ * @n_tables: number of tables under @entries
+ * @missed: bitmap of tables which overflown
+ * @entries: table of tables of ports currently offloaded
+ */
+struct udp_tunnel_nic {
+ struct work_struct work;
+
+ struct net_device *dev;
+
+ u8 need_sync:1;
+ u8 need_replay:1;
+ u8 work_pending:1;
+
+ unsigned int n_tables;
+ unsigned long missed;
+ struct udp_tunnel_nic_table_entry **entries;
+};
+
+/* We ensure all work structs are done using driver state, but not the code.
+ * We need a workqueue we can flush before module gets removed.
+ */
+static struct workqueue_struct *udp_tunnel_nic_workqueue;
+
+static const char *udp_tunnel_nic_tunnel_type_name(unsigned int type)
+{
+ switch (type) {
+ case UDP_TUNNEL_TYPE_VXLAN:
+ return "vxlan";
+ case UDP_TUNNEL_TYPE_GENEVE:
+ return "geneve";
+ case UDP_TUNNEL_TYPE_VXLAN_GPE:
+ return "vxlan-gpe";
+ default:
+ return "unknown";
+ }
+}
+
+static bool
+udp_tunnel_nic_entry_is_free(struct udp_tunnel_nic_table_entry *entry)
+{
+ return entry->use_cnt == 0 && !entry->flags;
+}
+
+static bool
+udp_tunnel_nic_entry_is_present(struct udp_tunnel_nic_table_entry *entry)
+{
+ return entry->use_cnt && !(entry->flags & ~UDP_TUNNEL_NIC_ENTRY_FROZEN);
+}
+
+static bool
+udp_tunnel_nic_entry_is_frozen(struct udp_tunnel_nic_table_entry *entry)
+{
+ return entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN;
+}
+
+static void
+udp_tunnel_nic_entry_freeze_used(struct udp_tunnel_nic_table_entry *entry)
+{
+ if (!udp_tunnel_nic_entry_is_free(entry))
+ entry->flags |= UDP_TUNNEL_NIC_ENTRY_FROZEN;
+}
+
+static void
+udp_tunnel_nic_entry_unfreeze(struct udp_tunnel_nic_table_entry *entry)
+{
+ entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_FROZEN;
+}
+
+static bool
+udp_tunnel_nic_entry_is_queued(struct udp_tunnel_nic_table_entry *entry)
+{
+ return entry->flags & (UDP_TUNNEL_NIC_ENTRY_ADD |
+ UDP_TUNNEL_NIC_ENTRY_DEL);
+}
+
+static void
+udp_tunnel_nic_entry_queue(struct udp_tunnel_nic *utn,
+ struct udp_tunnel_nic_table_entry *entry,
+ unsigned int flag)
+{
+ entry->flags |= flag;
+ utn->need_sync = 1;
+}
+
+static void
+udp_tunnel_nic_ti_from_entry(struct udp_tunnel_nic_table_entry *entry,
+ struct udp_tunnel_info *ti)
+{
+ memset(ti, 0, sizeof(*ti));
+ ti->port = entry->port;
+ ti->type = entry->type;
+ ti->hw_priv = entry->hw_priv;
+}
+
+static bool
+udp_tunnel_nic_is_empty(struct net_device *dev, struct udp_tunnel_nic *utn)
+{
+ const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
+ unsigned int i, j;
+
+ for (i = 0; i < utn->n_tables; i++)
+ for (j = 0; j < info->tables[i].n_entries; j++)
+ if (!udp_tunnel_nic_entry_is_free(&utn->entries[i][j]))
+ return false;
+ return true;
+}
+
+static bool
+udp_tunnel_nic_should_replay(struct net_device *dev, struct udp_tunnel_nic *utn)
+{
+ const struct udp_tunnel_nic_table_info *table;
+ unsigned int i, j;
+
+ if (!utn->missed)
+ return false;
+
+ for (i = 0; i < utn->n_tables; i++) {
+ table = &dev->udp_tunnel_nic_info->tables[i];
+ if (!test_bit(i, &utn->missed))
+ continue;
+
+ for (j = 0; j < table->n_entries; j++)
+ if (udp_tunnel_nic_entry_is_free(&utn->entries[i][j]))
+ return true;
+ }
+
+ return false;
+}
+
+static void
+__udp_tunnel_nic_get_port(struct net_device *dev, unsigned int table,
+ unsigned int idx, struct udp_tunnel_info *ti)
+{
+ struct udp_tunnel_nic_table_entry *entry;
+ struct udp_tunnel_nic *utn;
+
+ utn = dev->udp_tunnel_nic;
+ entry = &utn->entries[table][idx];
+
+ if (entry->use_cnt)
+ udp_tunnel_nic_ti_from_entry(entry, ti);
+}
+
+static void
+__udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table,
+ unsigned int idx, u8 priv)
+{
+ dev->udp_tunnel_nic->entries[table][idx].hw_priv = priv;
+}
+
+static void
+udp_tunnel_nic_entry_update_done(struct udp_tunnel_nic_table_entry *entry,
+ int err)
+{
+ bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
+
+ WARN_ON_ONCE(entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD &&
+ entry->flags & UDP_TUNNEL_NIC_ENTRY_DEL);
+
+ if (entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD &&
+ (!err || (err == -EEXIST && dodgy)))
+ entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_ADD;
+
+ if (entry->flags & UDP_TUNNEL_NIC_ENTRY_DEL &&
+ (!err || (err == -ENOENT && dodgy)))
+ entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_DEL;
+
+ if (!err)
+ entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
+ else
+ entry->flags |= UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
+}
+
+static void
+udp_tunnel_nic_device_sync_one(struct net_device *dev,
+ struct udp_tunnel_nic *utn,
+ unsigned int table, unsigned int idx)
+{
+ struct udp_tunnel_nic_table_entry *entry;
+ struct udp_tunnel_info ti;
+ int err;
+
+ entry = &utn->entries[table][idx];
+ if (!udp_tunnel_nic_entry_is_queued(entry))
+ return;
+
+ udp_tunnel_nic_ti_from_entry(entry, &ti);
+ if (entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD)
+ err = dev->udp_tunnel_nic_info->set_port(dev, table, idx, &ti);
+ else
+ err = dev->udp_tunnel_nic_info->unset_port(dev, table, idx,
+ &ti);
+ udp_tunnel_nic_entry_update_done(entry, err);
+
+ if (err)
+ netdev_warn(dev,
+ "UDP tunnel port sync failed port %d type %s: %d\n",
+ be16_to_cpu(entry->port),
+ udp_tunnel_nic_tunnel_type_name(entry->type),
+ err);
+}
+
+static void
+udp_tunnel_nic_device_sync_by_port(struct net_device *dev,
+ struct udp_tunnel_nic *utn)
+{
+ const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
+ unsigned int i, j;
+
+ for (i = 0; i < utn->n_tables; i++)
+ for (j = 0; j < info->tables[i].n_entries; j++)
+ udp_tunnel_nic_device_sync_one(dev, utn, i, j);
+}
+
+static void
+udp_tunnel_nic_device_sync_by_table(struct net_device *dev,
+ struct udp_tunnel_nic *utn)
+{
+ const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
+ unsigned int i, j;
+ int err;
+
+ for (i = 0; i < utn->n_tables; i++) {
+ /* Find something that needs sync in this table */
+ for (j = 0; j < info->tables[i].n_entries; j++)
+ if (udp_tunnel_nic_entry_is_queued(&utn->entries[i][j]))
+ break;
+ if (j == info->tables[i].n_entries)
+ continue;
+
+ err = info->sync_table(dev, i);
+ if (err)
+ netdev_warn(dev, "UDP tunnel port sync failed for table %d: %d\n",
+ i, err);
+
+ for (j = 0; j < info->tables[i].n_entries; j++) {
+ struct udp_tunnel_nic_table_entry *entry;
+
+ entry = &utn->entries[i][j];
+ if (udp_tunnel_nic_entry_is_queued(entry))
+ udp_tunnel_nic_entry_update_done(entry, err);
+ }
+ }
+}
+
+static void
+__udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
+{
+ if (!utn->need_sync)
+ return;
+
+ if (dev->udp_tunnel_nic_info->sync_table)
+ udp_tunnel_nic_device_sync_by_table(dev, utn);
+ else
+ udp_tunnel_nic_device_sync_by_port(dev, utn);
+
+ utn->need_sync = 0;
+ /* Can't replay directly here, in case we come from the tunnel driver's
+ * notification - trying to replay may deadlock inside tunnel driver.
+ */
+ utn->need_replay = udp_tunnel_nic_should_replay(dev, utn);
+}
+
+static void
+udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
+{
+ const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
+ bool may_sleep;
+
+ if (!utn->need_sync)
+ return;
+
+ /* Drivers which sleep in the callback need to update from
+ * the workqueue, if we come from the tunnel driver's notification.
+ */
+ may_sleep = info->flags & UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
+ if (!may_sleep)
+ __udp_tunnel_nic_device_sync(dev, utn);
+ if (may_sleep || utn->need_replay) {
+ queue_work(udp_tunnel_nic_workqueue, &utn->work);
+ utn->work_pending = 1;
+ }
+}
+
+static bool
+udp_tunnel_nic_table_is_capable(const struct udp_tunnel_nic_table_info *table,
+ struct udp_tunnel_info *ti)
+{
+ return table->tunnel_types & ti->type;
+}
+
+static bool
+udp_tunnel_nic_is_capable(struct net_device *dev, struct udp_tunnel_nic *utn,
+ struct udp_tunnel_info *ti)
+{
+ const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
+ unsigned int i;
+
+ /* Special case IPv4-only NICs */
+ if (info->flags & UDP_TUNNEL_NIC_INFO_IPV4_ONLY &&
+ ti->sa_family != AF_INET)
+ return false;
+
+ for (i = 0; i < utn->n_tables; i++)
+ if (udp_tunnel_nic_table_is_capable(&info->tables[i], ti))
+ return true;
+ return false;
+}
+
+static int
+udp_tunnel_nic_has_collision(struct net_device *dev, struct udp_tunnel_nic *utn,
+ struct udp_tunnel_info *ti)
+{
+ const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
+ struct udp_tunnel_nic_table_entry *entry;
+ unsigned int i, j;
+
+ for (i = 0; i < utn->n_tables; i++)
+ for (j = 0; j < info->tables[i].n_entries; j++) {
+ entry = &utn->entries[i][j];
+
+ if (!udp_tunnel_nic_entry_is_free(entry) &&
+ entry->port == ti->port &&
+ entry->type != ti->type) {
+ __set_bit(i, &utn->missed);
+ return true;
+ }
+ }
+ return false;
+}
+
+static void
+udp_tunnel_nic_entry_adj(struct udp_tunnel_nic *utn,
+ unsigned int table, unsigned int idx, int use_cnt_adj)
+{
+ struct udp_tunnel_nic_table_entry *entry = &utn->entries[table][idx];
+ bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
+ unsigned int from, to;
+
+ /* If not going from used to unused or vice versa - all done.
+ * For dodgy entries make sure we try to sync again (queue the entry).
+ */
+ entry->use_cnt += use_cnt_adj;
+ if (!dodgy && !entry->use_cnt == !(entry->use_cnt - use_cnt_adj))
+ return;
+
+ /* Cancel the op before it was sent to the device, if possible,
+ * otherwise we'd need to take special care to issue commands
+ * in the same order the ports arrived.
+ */
+ if (use_cnt_adj < 0) {
+ from = UDP_TUNNEL_NIC_ENTRY_ADD;
+ to = UDP_TUNNEL_NIC_ENTRY_DEL;
+ } else {
+ from = UDP_TUNNEL_NIC_ENTRY_DEL;
+ to = UDP_TUNNEL_NIC_ENTRY_ADD;
+ }
+
+ if (entry->flags & from) {
+ entry->flags &= ~from;
+ if (!dodgy)
+ return;
+ }
+
+ udp_tunnel_nic_entry_queue(utn, entry, to);
+}
+
+static bool
+udp_tunnel_nic_entry_try_adj(struct udp_tunnel_nic *utn,
+ unsigned int table, unsigned int idx,
+ struct udp_tunnel_info *ti, int use_cnt_adj)
+{
+ struct udp_tunnel_nic_table_entry *entry = &utn->entries[table][idx];
+
+ if (udp_tunnel_nic_entry_is_free(entry) ||
+ entry->port != ti->port ||
+ entry->type != ti->type)
+ return false;
+
+ if (udp_tunnel_nic_entry_is_frozen(entry))
+ return true;
+
+ udp_tunnel_nic_entry_adj(utn, table, idx, use_cnt_adj);
+ return true;
+}
+
+/* Try to find existing matching entry and adjust its use count, instead of
+ * adding a new one. Returns true if entry was found. In case of delete the
+ * entry may have gotten removed in the process, in which case it will be
+ * queued for removal.
+ */
+static bool
+udp_tunnel_nic_try_existing(struct net_device *dev, struct udp_tunnel_nic *utn,
+ struct udp_tunnel_info *ti, int use_cnt_adj)
+{
+ const struct udp_tunnel_nic_table_info *table;
+ unsigned int i, j;
+
+ for (i = 0; i < utn->n_tables; i++) {
+ table = &dev->udp_tunnel_nic_info->tables[i];
+ if (!udp_tunnel_nic_table_is_capable(table, ti))
+ continue;
+
+ for (j = 0; j < table->n_entries; j++)
+ if (udp_tunnel_nic_entry_try_adj(utn, i, j, ti,
+ use_cnt_adj))
+ return true;
+ }
+
+ return false;
+}
+
+static bool
+udp_tunnel_nic_add_existing(struct net_device *dev, struct udp_tunnel_nic *utn,
+ struct udp_tunnel_info *ti)
+{
+ return udp_tunnel_nic_try_existing(dev, utn, ti, +1);
+}
+
+static bool
+udp_tunnel_nic_del_existing(struct net_device *dev, struct udp_tunnel_nic *utn,
+ struct udp_tunnel_info *ti)
+{
+ return udp_tunnel_nic_try_existing(dev, utn, ti, -1);
+}
+
+static bool
+udp_tunnel_nic_add_new(struct net_device *dev, struct udp_tunnel_nic *utn,
+ struct udp_tunnel_info *ti)
+{
+ const struct udp_tunnel_nic_table_info *table;
+ unsigned int i, j;
+
+ for (i = 0; i < utn->n_tables; i++) {
+ table = &dev->udp_tunnel_nic_info->tables[i];
+ if (!udp_tunnel_nic_table_is_capable(table, ti))
+ continue;
+
+ for (j = 0; j < table->n_entries; j++) {
+ struct udp_tunnel_nic_table_entry *entry;
+
+ entry = &utn->entries[i][j];
+ if (!udp_tunnel_nic_entry_is_free(entry))
+ continue;
+
+ entry->port = ti->port;
+ entry->type = ti->type;
+ entry->use_cnt = 1;
+ udp_tunnel_nic_entry_queue(utn, entry,
+ UDP_TUNNEL_NIC_ENTRY_ADD);
+ return true;
+ }
+
+ /* The different table may still fit this port in, but there
+ * are no devices currently which have multiple tables accepting
+ * the same tunnel type, and false positives are okay.
+ */
+ __set_bit(i, &utn->missed);
+ }
+
+ return false;
+}
+
+static void
+__udp_tunnel_nic_add_port(struct net_device *dev, struct udp_tunnel_info *ti)
+{
+ const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
+ struct udp_tunnel_nic *utn;
+
+ utn = dev->udp_tunnel_nic;
+ if (!utn)
+ return;
+ if (!netif_running(dev) && info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)
+ return;
+ if (info->flags & UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN &&
+ ti->port == htons(IANA_VXLAN_UDP_PORT)) {
+ if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+ netdev_warn(dev, "device assumes port 4789 will be used by vxlan tunnels\n");
+ return;
+ }
+
+ if (!udp_tunnel_nic_is_capable(dev, utn, ti))
+ return;
+
+ /* It may happen that a tunnel of one type is removed and different
+ * tunnel type tries to reuse its port before the device was informed.
+ * Rely on utn->missed to re-add this port later.
+ */
+ if (udp_tunnel_nic_has_collision(dev, utn, ti))
+ return;
+
+ if (!udp_tunnel_nic_add_existing(dev, utn, ti))
+ udp_tunnel_nic_add_new(dev, utn, ti);
+
+ udp_tunnel_nic_device_sync(dev, utn);
+}
+
+static void
+__udp_tunnel_nic_del_port(struct net_device *dev, struct udp_tunnel_info *ti)
+{
+ struct udp_tunnel_nic *utn;
+
+ utn = dev->udp_tunnel_nic;
+ if (!utn)
+ return;
+
+ if (!udp_tunnel_nic_is_capable(dev, utn, ti))
+ return;
+
+ udp_tunnel_nic_del_existing(dev, utn, ti);
+
+ udp_tunnel_nic_device_sync(dev, utn);
+}
+
+static void __udp_tunnel_nic_reset_ntf(struct net_device *dev)
+{
+ const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
+ struct udp_tunnel_nic *utn;
+ unsigned int i, j;
+
+ ASSERT_RTNL();
+
+ utn = dev->udp_tunnel_nic;
+ if (!utn)
+ return;
+
+ utn->need_sync = false;
+ for (i = 0; i < utn->n_tables; i++)
+ for (j = 0; j < info->tables[i].n_entries; j++) {
+ struct udp_tunnel_nic_table_entry *entry;
+
+ entry = &utn->entries[i][j];
+
+ entry->flags &= ~(UDP_TUNNEL_NIC_ENTRY_DEL |
+ UDP_TUNNEL_NIC_ENTRY_OP_FAIL);
+ /* We don't release rtnl across ops */
+ WARN_ON(entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN);
+ if (!entry->use_cnt)
+ continue;
+
+ udp_tunnel_nic_entry_queue(utn, entry,
+ UDP_TUNNEL_NIC_ENTRY_ADD);
+ }
+
+ __udp_tunnel_nic_device_sync(dev, utn);
+}
+
+static size_t
+__udp_tunnel_nic_dump_size(struct net_device *dev, unsigned int table)
+{
+ const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
+ struct udp_tunnel_nic *utn;
+ unsigned int j;
+ size_t size;
+
+ utn = dev->udp_tunnel_nic;
+ if (!utn)
+ return 0;
+
+ size = 0;
+ for (j = 0; j < info->tables[table].n_entries; j++) {
+ if (!udp_tunnel_nic_entry_is_present(&utn->entries[table][j]))
+ continue;
+
+ size += nla_total_size(0) + /* _TABLE_ENTRY */
+ nla_total_size(sizeof(__be16)) + /* _ENTRY_PORT */
+ nla_total_size(sizeof(u32)); /* _ENTRY_TYPE */
+ }
+
+ return size;
+}
+
+static int
+__udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table,
+ struct sk_buff *skb)
+{
+ const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
+ struct udp_tunnel_nic *utn;
+ struct nlattr *nest;
+ unsigned int j;
+
+ utn = dev->udp_tunnel_nic;
+ if (!utn)
+ return 0;
+
+ for (j = 0; j < info->tables[table].n_entries; j++) {
+ if (!udp_tunnel_nic_entry_is_present(&utn->entries[table][j]))
+ continue;
+
+ nest = nla_nest_start(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY);
+
+ if (nla_put_be16(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT,
+ utn->entries[table][j].port) ||
+ nla_put_u32(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE,
+ ilog2(utn->entries[table][j].type)))
+ goto err_cancel;
+
+ nla_nest_end(skb, nest);
+ }
+
+ return 0;
+
+err_cancel:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static const struct udp_tunnel_nic_ops __udp_tunnel_nic_ops = {
+ .get_port = __udp_tunnel_nic_get_port,
+ .set_port_priv = __udp_tunnel_nic_set_port_priv,
+ .add_port = __udp_tunnel_nic_add_port,
+ .del_port = __udp_tunnel_nic_del_port,
+ .reset_ntf = __udp_tunnel_nic_reset_ntf,
+ .dump_size = __udp_tunnel_nic_dump_size,
+ .dump_write = __udp_tunnel_nic_dump_write,
+};
+
+static void
+udp_tunnel_nic_flush(struct net_device *dev, struct udp_tunnel_nic *utn)
+{
+ const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
+ unsigned int i, j;
+
+ for (i = 0; i < utn->n_tables; i++)
+ for (j = 0; j < info->tables[i].n_entries; j++) {
+ int adj_cnt = -utn->entries[i][j].use_cnt;
+
+ if (adj_cnt)
+ udp_tunnel_nic_entry_adj(utn, i, j, adj_cnt);
+ }
+
+ __udp_tunnel_nic_device_sync(dev, utn);
+
+ for (i = 0; i < utn->n_tables; i++)
+ memset(utn->entries[i], 0, array_size(info->tables[i].n_entries,
+ sizeof(**utn->entries)));
+ WARN_ON(utn->need_sync);
+ utn->need_replay = 0;
+}
+
+static void
+udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn)
+{
+ const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
+ unsigned int i, j;
+
+ /* Freeze all the ports we are already tracking so that the replay
+ * does not double up the refcount.
+ */
+ for (i = 0; i < utn->n_tables; i++)
+ for (j = 0; j < info->tables[i].n_entries; j++)
+ udp_tunnel_nic_entry_freeze_used(&utn->entries[i][j]);
+ utn->missed = 0;
+ utn->need_replay = 0;
+
+ udp_tunnel_get_rx_info(dev);
+
+ for (i = 0; i < utn->n_tables; i++)
+ for (j = 0; j < info->tables[i].n_entries; j++)
+ udp_tunnel_nic_entry_unfreeze(&utn->entries[i][j]);
+}
+
+static void udp_tunnel_nic_device_sync_work(struct work_struct *work)
+{
+ struct udp_tunnel_nic *utn =
+ container_of(work, struct udp_tunnel_nic, work);
+
+ rtnl_lock();
+ utn->work_pending = 0;
+ __udp_tunnel_nic_device_sync(utn->dev, utn);
+
+ if (utn->need_replay)
+ udp_tunnel_nic_replay(utn->dev, utn);
+ rtnl_unlock();
+}
+
+static struct udp_tunnel_nic *
+udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info,
+ unsigned int n_tables)
+{
+ struct udp_tunnel_nic *utn;
+ unsigned int i;
+
+ utn = kzalloc(sizeof(*utn), GFP_KERNEL);
+ if (!utn)
+ return NULL;
+ utn->n_tables = n_tables;
+ INIT_WORK(&utn->work, udp_tunnel_nic_device_sync_work);
+
+ utn->entries = kmalloc_array(n_tables, sizeof(void *), GFP_KERNEL);
+ if (!utn->entries)
+ goto err_free_utn;
+
+ for (i = 0; i < n_tables; i++) {
+ utn->entries[i] = kcalloc(info->tables[i].n_entries,
+ sizeof(*utn->entries[i]), GFP_KERNEL);
+ if (!utn->entries[i])
+ goto err_free_prev_entries;
+ }
+
+ return utn;
+
+err_free_prev_entries:
+ while (i--)
+ kfree(utn->entries[i]);
+ kfree(utn->entries);
+err_free_utn:
+ kfree(utn);
+ return NULL;
+}
+
+static int udp_tunnel_nic_register(struct net_device *dev)
+{
+ const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
+ struct udp_tunnel_nic *utn;
+ unsigned int n_tables, i;
+
+ BUILD_BUG_ON(sizeof(utn->missed) * BITS_PER_BYTE <
+ UDP_TUNNEL_NIC_MAX_TABLES);
+
+ if (WARN_ON(!info->set_port != !info->unset_port) ||
+ WARN_ON(!info->set_port == !info->sync_table) ||
+ WARN_ON(!info->tables[0].n_entries))
+ return -EINVAL;
+
+ n_tables = 1;
+ for (i = 1; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) {
+ if (!info->tables[i].n_entries)
+ continue;
+
+ n_tables++;
+ if (WARN_ON(!info->tables[i - 1].n_entries))
+ return -EINVAL;
+ }
+
+ utn = udp_tunnel_nic_alloc(info, n_tables);
+ if (!utn)
+ return -ENOMEM;
+
+ utn->dev = dev;
+ dev_hold(dev);
+ dev->udp_tunnel_nic = utn;
+
+ if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY))
+ udp_tunnel_get_rx_info(dev);
+
+ return 0;
+}
+
+static void
+udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
+{
+ unsigned int i;
+
+ /* Flush before we check work, so we don't waste time adding entries
+ * from the work which we will boot immediately.
+ */
+ udp_tunnel_nic_flush(dev, utn);
+
+ /* Wait for the work to be done using the state, netdev core will
+ * retry unregister until we give up our reference on this device.
+ */
+ if (utn->work_pending)
+ return;
+
+ for (i = 0; i < utn->n_tables; i++)
+ kfree(utn->entries[i]);
+ kfree(utn->entries);
+ kfree(utn);
+ dev->udp_tunnel_nic = NULL;
+ dev_put(dev);
+}
+
+static int
+udp_tunnel_nic_netdevice_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ const struct udp_tunnel_nic_info *info;
+ struct udp_tunnel_nic *utn;
+
+ info = dev->udp_tunnel_nic_info;
+ if (!info)
+ return NOTIFY_DONE;
+
+ if (event == NETDEV_REGISTER) {
+ int err;
+
+ err = udp_tunnel_nic_register(dev);
+ if (err)
+ netdev_WARN(dev, "failed to register for UDP tunnel offloads: %d", err);
+ return notifier_from_errno(err);
+ }
+ /* All other events will need the udp_tunnel_nic state */
+ utn = dev->udp_tunnel_nic;
+ if (!utn)
+ return NOTIFY_DONE;
+
+ if (event == NETDEV_UNREGISTER) {
+ udp_tunnel_nic_unregister(dev, utn);
+ return NOTIFY_OK;
+ }
+
+ /* All other events only matter if NIC has to be programmed open */
+ if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY))
+ return NOTIFY_DONE;
+
+ if (event == NETDEV_UP) {
+ WARN_ON(!udp_tunnel_nic_is_empty(dev, utn));
+ udp_tunnel_get_rx_info(dev);
+ return NOTIFY_OK;
+ }
+ if (event == NETDEV_GOING_DOWN) {
+ udp_tunnel_nic_flush(dev, utn);
+ return NOTIFY_OK;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block udp_tunnel_nic_notifier_block __read_mostly = {
+ .notifier_call = udp_tunnel_nic_netdevice_event,
+};
+
+static int __init udp_tunnel_nic_init_module(void)
+{
+ int err;
+
+ udp_tunnel_nic_workqueue = alloc_workqueue("udp_tunnel_nic", 0, 0);
+ if (!udp_tunnel_nic_workqueue)
+ return -ENOMEM;
+
+ rtnl_lock();
+ udp_tunnel_nic_ops = &__udp_tunnel_nic_ops;
+ rtnl_unlock();
+
+ err = register_netdevice_notifier(&udp_tunnel_nic_notifier_block);
+ if (err)
+ goto err_unset_ops;
+
+ return 0;
+
+err_unset_ops:
+ rtnl_lock();
+ udp_tunnel_nic_ops = NULL;
+ rtnl_unlock();
+ destroy_workqueue(udp_tunnel_nic_workqueue);
+ return err;
+}
+late_initcall(udp_tunnel_nic_init_module);
+
+static void __exit udp_tunnel_nic_cleanup_module(void)
+{
+ unregister_netdevice_notifier(&udp_tunnel_nic_notifier_block);
+
+ rtnl_lock();
+ udp_tunnel_nic_ops = NULL;
+ rtnl_unlock();
+
+ destroy_workqueue(udp_tunnel_nic_workqueue);
+}
+module_exit(udp_tunnel_nic_cleanup_module);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/udp_tunnel_stub.c b/net/ipv4/udp_tunnel_stub.c
new file mode 100644
index 000000000000..c4b2888f5fef
--- /dev/null
+++ b/net/ipv4/udp_tunnel_stub.c
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (c) 2020 Facebook Inc.
+
+#include <net/udp_tunnel.h>
+
+const struct udp_tunnel_nic_ops *udp_tunnel_nic_ops;
+EXPORT_SYMBOL_GPL(udp_tunnel_nic_ops);
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 5936d66d1ce2..bd8773b49e72 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -56,10 +56,6 @@ struct proto udplite_prot = {
.sysctl_mem = sysctl_udp_mem,
.obj_size = sizeof(struct udp_sock),
.h.udp_table = &udplite_table,
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_udp_setsockopt,
- .compat_getsockopt = compat_udp_getsockopt,
-#endif
};
EXPORT_SYMBOL(udplite_prot);
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index f4f19e89af5e..76bff79d6fed 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -14,7 +14,7 @@ menuconfig IPV6
<https://en.wikipedia.org/wiki/IPv6>.
For specific information about IPv6 under Linux, see
Documentation/networking/ipv6.rst and read the HOWTO at
- <http://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/>
+ <https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/>
To compile this protocol support as a module, choose M here: the
module will be called ipv6.
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 840bfdb3d7bd..0acf6a9796ca 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -163,7 +163,7 @@ static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
static void addrconf_type_change(struct net_device *dev,
unsigned long event);
-static int addrconf_ifdown(struct net_device *dev, int how);
+static int addrconf_ifdown(struct net_device *dev, bool unregister);
static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
int plen,
@@ -3630,7 +3630,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
* an L3 master device (e.g., VRF)
*/
if (info->upper_dev && netif_is_l3_master(info->upper_dev))
- addrconf_ifdown(dev, 0);
+ addrconf_ifdown(dev, false);
}
return NOTIFY_OK;
@@ -3663,9 +3663,9 @@ static bool addr_is_local(const struct in6_addr *addr)
(IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
}
-static int addrconf_ifdown(struct net_device *dev, int how)
+static int addrconf_ifdown(struct net_device *dev, bool unregister)
{
- unsigned long event = how ? NETDEV_UNREGISTER : NETDEV_DOWN;
+ unsigned long event = unregister ? NETDEV_UNREGISTER : NETDEV_DOWN;
struct net *net = dev_net(dev);
struct inet6_dev *idev;
struct inet6_ifaddr *ifa, *tmp;
@@ -3684,7 +3684,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
* Step 1: remove reference to ipv6 device from parent device.
* Do not dev_put!
*/
- if (how) {
+ if (unregister) {
idev->dead = 1;
/* protected by rtnl_lock */
@@ -3698,7 +3698,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
/* combine the user config with event to determine if permanent
* addresses are to be removed from address hash table
*/
- if (!how && !idev->cnf.disable_ipv6) {
+ if (!unregister && !idev->cnf.disable_ipv6) {
/* aggregate the system setting and interface setting */
int _keep_addr = net->ipv6.devconf_all->keep_addr_on_down;
@@ -3736,7 +3736,7 @@ restart:
addrconf_del_rs_timer(idev);
/* Step 2: clear flags for stateless addrconf */
- if (!how)
+ if (!unregister)
idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
/* Step 3: clear tempaddr list */
@@ -3806,7 +3806,7 @@ restart:
write_unlock_bh(&idev->lock);
/* Step 5: Discard anycast and multicast list */
- if (how) {
+ if (unregister) {
ipv6_ac_destroy_dev(idev);
ipv6_mc_destroy_dev(idev);
} else {
@@ -3816,7 +3816,7 @@ restart:
idev->tstamp = jiffies;
/* Last: Shot the device (if unregistered) */
- if (how) {
+ if (unregister) {
addrconf_sysctl_unregister(idev);
neigh_parms_release(&nd_tbl, idev->nd_parms);
neigh_ifdown(&nd_tbl, dev);
@@ -4038,7 +4038,7 @@ static void addrconf_dad_work(struct work_struct *w)
in6_ifa_hold(ifp);
addrconf_dad_stop(ifp, 1);
if (disable_ipv6)
- addrconf_ifdown(idev->dev, 0);
+ addrconf_ifdown(idev->dev, false);
goto out;
}
@@ -7187,9 +7187,9 @@ void addrconf_cleanup(void)
for_each_netdev(&init_net, dev) {
if (__in6_dev_get(dev) == NULL)
continue;
- addrconf_ifdown(dev, 1);
+ addrconf_ifdown(dev, true);
}
- addrconf_ifdown(init_net.loopback_dev, 2);
+ addrconf_ifdown(init_net.loopback_dev, true);
/*
* Check hash table.
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index b304b882e031..0306509ab063 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -688,8 +688,6 @@ const struct proto_ops inet6_stream_ops = {
.peek_len = tcp_peek_len,
#ifdef CONFIG_COMPAT
.compat_ioctl = inet6_compat_ioctl,
- .compat_setsockopt = compat_sock_common_setsockopt,
- .compat_getsockopt = compat_sock_common_getsockopt,
#endif
.set_rcvlowat = tcp_set_rcvlowat,
};
@@ -717,8 +715,6 @@ const struct proto_ops inet6_dgram_ops = {
.set_peek_off = sk_set_peek_off,
#ifdef CONFIG_COMPAT
.compat_ioctl = inet6_compat_ioctl,
- .compat_setsockopt = compat_sock_common_setsockopt,
- .compat_getsockopt = compat_sock_common_getsockopt,
#endif
};
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 390bedde21a5..cc8ad7ddecda 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -19,6 +19,7 @@
#include <linux/route.h>
#include <linux/slab.h>
#include <linux/export.h>
+#include <linux/icmp.h>
#include <net/ipv6.h>
#include <net/ndisc.h>
@@ -284,6 +285,17 @@ int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *uaddr,
}
EXPORT_SYMBOL_GPL(ip6_datagram_connect_v6_only);
+static void ipv6_icmp_error_rfc4884(const struct sk_buff *skb,
+ struct sock_ee_data_rfc4884 *out)
+{
+ switch (icmp6_hdr(skb)->icmp6_type) {
+ case ICMPV6_TIME_EXCEED:
+ case ICMPV6_DEST_UNREACH:
+ ip_icmp_error_rfc4884(skb, out, sizeof(struct icmp6hdr),
+ icmp6_hdr(skb)->icmp6_datagram_len * 8);
+ }
+}
+
void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
__be16 port, u32 info, u8 *payload)
{
@@ -313,6 +325,10 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
serr->port = port;
__skb_pull(skb, payload - skb->data);
+
+ if (inet6_sk(sk)->recverr_rfc4884)
+ ipv6_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884);
+
skb_reset_transport_header(skb);
if (sock_queue_err_skb(sk, skb))
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index c43592771126..52c2f063529f 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -805,10 +805,17 @@ int esp6_input_done2(struct sk_buff *skb, int err)
if (x->encap) {
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ int offset = skb_network_offset(skb) + sizeof(*ip6h);
struct xfrm_encap_tmpl *encap = x->encap;
- struct udphdr *uh = (void *)(skb_network_header(skb) + hdr_len);
- struct tcphdr *th = (void *)(skb_network_header(skb) + hdr_len);
- __be16 source;
+ u8 nexthdr = ip6h->nexthdr;
+ __be16 frag_off, source;
+ struct udphdr *uh;
+ struct tcphdr *th;
+
+ offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off);
+ uh = (void *)(skb->data + offset);
+ th = (void *)(skb->data + offset);
+ hdr_len += offset;
switch (x->encap->encap_type) {
case TCP_ENCAP_ESPINTCP:
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 5a8bbcdcaf2b..374105e4394f 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -580,7 +580,7 @@ looped_back:
hdr->segments_left--;
i = n - hdr->segments_left;
- buf = kzalloc(ipv6_rpl_srh_alloc_size(n + 1) * 2, GFP_ATOMIC);
+ buf = kcalloc(struct_size(hdr, segments.addr, n + 2), 2, GFP_ATOMIC);
if (unlikely(!buf)) {
kfree_skb(skb);
return -1;
@@ -1232,7 +1232,6 @@ static void ipv6_renew_option(int renewtype,
* @opt: original options
* @newtype: option type to replace in @opt
* @newopt: new option of type @newtype to replace (user-mem)
- * @newoptlen: length of @newopt
*
* Returns a new set of options which is a copy of @opt with the
* option type @newtype replaced with @newopt.
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index fafe556d21e0..8f9a83314de7 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -13,6 +13,7 @@
#include <linux/netdevice.h>
#include <linux/notifier.h>
#include <linux/export.h>
+#include <linux/indirect_call_wrapper.h>
#include <net/fib_rules.h>
#include <net/ipv6.h>
@@ -111,11 +112,13 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
} else {
struct rt6_info *rt;
- rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, skb, flags);
+ rt = pol_lookup_func(lookup,
+ net, net->ipv6.fib6_local_tbl, fl6, skb, flags);
if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN)
return &rt->dst;
ip6_rt_put_flags(rt, flags);
- rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
+ rt = pol_lookup_func(lookup,
+ net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
if (rt->dst.error != -EAGAIN)
return &rt->dst;
ip6_rt_put_flags(rt, flags);
@@ -226,7 +229,8 @@ static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
goto out;
}
- rt = lookup(net, table, flp6, arg->lookup_data, flags);
+ rt = pol_lookup_func(lookup,
+ net, table, flp6, arg->lookup_data, flags);
if (rt != net->ipv6.ip6_null_entry) {
err = fib6_rule_saddr(net, rule, flags, flp6,
ip6_dst_idev(&rt->dst)->dev);
@@ -252,8 +256,9 @@ out:
return err;
}
-static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
- int flags, struct fib_lookup_arg *arg)
+INDIRECT_CALLABLE_SCOPE int fib6_rule_action(struct fib_rule *rule,
+ struct flowi *flp, int flags,
+ struct fib_lookup_arg *arg)
{
if (arg->lookup_ptr == fib6_table_lookup)
return fib6_rule_action_alt(rule, flp, flags, arg);
@@ -261,7 +266,8 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
return __fib6_rule_action(rule, flp, flags, arg);
}
-static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)
+INDIRECT_CALLABLE_SCOPE bool fib6_rule_suppress(struct fib_rule *rule,
+ struct fib_lookup_arg *arg)
{
struct fib6_result *res = arg->result;
struct rt6_info *rt = res->rt6;
@@ -293,7 +299,8 @@ suppress_route:
return true;
}
-static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
+INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule,
+ struct flowi *fl, int flags)
{
struct fib6_rule *r = (struct fib6_rule *) rule;
struct flowi6 *fl6 = &fl->u.ip6;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 9df8737ae0d3..a4e4912ad607 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -439,8 +439,8 @@ static int icmp6_iif(const struct sk_buff *skb)
/*
* Send an ICMP message in response to a packet in error
*/
-static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
- const struct in6_addr *force_saddr)
+void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
+ const struct in6_addr *force_saddr)
{
struct inet6_dev *idev = NULL;
struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -625,6 +625,7 @@ out:
out_bh_enable:
local_bh_enable();
}
+EXPORT_SYMBOL(icmp6_send);
/* Slightly more convenient version of icmp6_send.
*/
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index fbe9d4295eac..2d3add9e6116 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -21,6 +21,8 @@
#include <net/ip.h>
#include <net/sock_reuseport.h>
+extern struct inet_hashinfo tcp_hashinfo;
+
u32 inet6_ehashfn(const struct net *net,
const struct in6_addr *laddr, const u16 lport,
const struct in6_addr *faddr, const __be16 fport)
@@ -111,6 +113,23 @@ static inline int compute_score(struct sock *sk, struct net *net,
return score;
}
+static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned short hnum)
+{
+ struct sock *reuse_sk = NULL;
+ u32 phash;
+
+ if (sk->sk_reuseport) {
+ phash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
+ reuse_sk = reuseport_select_sock(sk, phash, skb, doff);
+ }
+ return reuse_sk;
+}
+
/* called with rcu_read_lock() */
static struct sock *inet6_lhash2_lookup(struct net *net,
struct inet_listen_hashbucket *ilb2,
@@ -123,21 +142,17 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
struct inet_connection_sock *icsk;
struct sock *sk, *result = NULL;
int score, hiscore = 0;
- u32 phash = 0;
inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
sk = (struct sock *)icsk;
score = compute_score(sk, net, hnum, daddr, dif, sdif,
exact_dif);
if (score > hiscore) {
- if (sk->sk_reuseport) {
- phash = inet6_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, phash,
- skb, doff);
- if (result)
- return result;
- }
+ result = lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, hnum);
+ if (result)
+ return result;
+
result = sk;
hiscore = score;
}
@@ -146,6 +161,31 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
return result;
}
+static inline struct sock *inet6_lookup_run_bpf(struct net *net,
+ struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ const __be16 sport,
+ const struct in6_addr *daddr,
+ const u16 hnum)
+{
+ struct sock *sk, *reuse_sk;
+ bool no_reuseport;
+
+ if (hashinfo != &tcp_hashinfo)
+ return NULL; /* only TCP is supported */
+
+ no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP,
+ saddr, sport, daddr, hnum, &sk);
+ if (no_reuseport || IS_ERR_OR_NULL(sk))
+ return sk;
+
+ reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum);
+ if (reuse_sk)
+ sk = reuse_sk;
+ return sk;
+}
+
struct sock *inet6_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
@@ -157,6 +197,14 @@ struct sock *inet6_lookup_listener(struct net *net,
struct sock *result = NULL;
unsigned int hash2;
+ /* Lookup redirect from BPF */
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
+ result = inet6_lookup_run_bpf(net, hashinfo, skb, doff,
+ saddr, sport, daddr, hnum);
+ if (result)
+ goto done;
+ }
+
hash2 = ipv6_portaddr_hash(net, daddr, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 49ee89bbcba0..25a90f3f705c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -314,7 +314,8 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
{
struct rt6_info *rt;
- rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
+ rt = pol_lookup_func(lookup,
+ net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
if (rt->dst.error == -EAGAIN) {
ip6_rt_put_flags(rt, flags);
rt = net->ipv6.ip6_null_entry;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index ce4fbba4acce..2d655260dedc 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -371,7 +371,7 @@ static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned lo
static struct ip6_flowlabel *
fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
- char __user *optval, int optlen, int *err_p)
+ sockptr_t optval, int optlen, int *err_p)
{
struct ip6_flowlabel *fl = NULL;
int olen;
@@ -401,7 +401,8 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
memset(fl->opt, 0, sizeof(*fl->opt));
fl->opt->tot_len = sizeof(*fl->opt) + olen;
err = -EFAULT;
- if (copy_from_user(fl->opt+1, optval+CMSG_ALIGN(sizeof(*freq)), olen))
+ if (copy_from_sockptr_offset(fl->opt + 1, optval,
+ CMSG_ALIGN(sizeof(*freq)), olen))
goto done;
msg.msg_controllen = olen;
@@ -533,187 +534,212 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
return -ENOENT;
}
-int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
+#define socklist_dereference(__sflp) \
+ rcu_dereference_protected(__sflp, lockdep_is_held(&ip6_sk_fl_lock))
+
+static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq)
{
- int uninitialized_var(err);
- struct net *net = sock_net(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
- struct in6_flowlabel_req freq;
- struct ipv6_fl_socklist *sfl1 = NULL;
- struct ipv6_fl_socklist *sfl;
struct ipv6_fl_socklist __rcu **sflp;
- struct ip6_flowlabel *fl, *fl1 = NULL;
+ struct ipv6_fl_socklist *sfl;
+ if (freq->flr_flags & IPV6_FL_F_REFLECT) {
+ if (sk->sk_protocol != IPPROTO_TCP)
+ return -ENOPROTOOPT;
+ if (!np->repflow)
+ return -ESRCH;
+ np->flow_label = 0;
+ np->repflow = 0;
+ return 0;
+ }
- if (optlen < sizeof(freq))
- return -EINVAL;
+ spin_lock_bh(&ip6_sk_fl_lock);
+ for (sflp = &np->ipv6_fl_list;
+ (sfl = socklist_dereference(*sflp)) != NULL;
+ sflp = &sfl->next) {
+ if (sfl->fl->label == freq->flr_label)
+ goto found;
+ }
+ spin_unlock_bh(&ip6_sk_fl_lock);
+ return -ESRCH;
+found:
+ if (freq->flr_label == (np->flow_label & IPV6_FLOWLABEL_MASK))
+ np->flow_label &= ~IPV6_FLOWLABEL_MASK;
+ *sflp = sfl->next;
+ spin_unlock_bh(&ip6_sk_fl_lock);
+ fl_release(sfl->fl);
+ kfree_rcu(sfl, rcu);
+ return 0;
+}
- if (copy_from_user(&freq, optval, sizeof(freq)))
- return -EFAULT;
+static int ipv6_flowlabel_renew(struct sock *sk, struct in6_flowlabel_req *freq)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net *net = sock_net(sk);
+ struct ipv6_fl_socklist *sfl;
+ int err;
- switch (freq.flr_action) {
- case IPV6_FL_A_PUT:
- if (freq.flr_flags & IPV6_FL_F_REFLECT) {
- if (sk->sk_protocol != IPPROTO_TCP)
- return -ENOPROTOOPT;
- if (!np->repflow)
- return -ESRCH;
- np->flow_label = 0;
- np->repflow = 0;
- return 0;
- }
- spin_lock_bh(&ip6_sk_fl_lock);
- for (sflp = &np->ipv6_fl_list;
- (sfl = rcu_dereference_protected(*sflp,
- lockdep_is_held(&ip6_sk_fl_lock))) != NULL;
- sflp = &sfl->next) {
- if (sfl->fl->label == freq.flr_label) {
- if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
- np->flow_label &= ~IPV6_FLOWLABEL_MASK;
- *sflp = sfl->next;
- spin_unlock_bh(&ip6_sk_fl_lock);
- fl_release(sfl->fl);
- kfree_rcu(sfl, rcu);
- return 0;
- }
+ rcu_read_lock_bh();
+ for_each_sk_fl_rcu(np, sfl) {
+ if (sfl->fl->label == freq->flr_label) {
+ err = fl6_renew(sfl->fl, freq->flr_linger,
+ freq->flr_expires);
+ rcu_read_unlock_bh();
+ return err;
}
- spin_unlock_bh(&ip6_sk_fl_lock);
- return -ESRCH;
+ }
+ rcu_read_unlock_bh();
- case IPV6_FL_A_RENEW:
- rcu_read_lock_bh();
- for_each_sk_fl_rcu(np, sfl) {
- if (sfl->fl->label == freq.flr_label) {
- err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires);
- rcu_read_unlock_bh();
- return err;
- }
- }
- rcu_read_unlock_bh();
+ if (freq->flr_share == IPV6_FL_S_NONE &&
+ ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+ struct ip6_flowlabel *fl = fl_lookup(net, freq->flr_label);
- if (freq.flr_share == IPV6_FL_S_NONE &&
- ns_capable(net->user_ns, CAP_NET_ADMIN)) {
- fl = fl_lookup(net, freq.flr_label);
- if (fl) {
- err = fl6_renew(fl, freq.flr_linger, freq.flr_expires);
- fl_release(fl);
- return err;
- }
+ if (fl) {
+ err = fl6_renew(fl, freq->flr_linger,
+ freq->flr_expires);
+ fl_release(fl);
+ return err;
}
- return -ESRCH;
-
- case IPV6_FL_A_GET:
- if (freq.flr_flags & IPV6_FL_F_REFLECT) {
- struct net *net = sock_net(sk);
- if (net->ipv6.sysctl.flowlabel_consistency) {
- net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n");
- return -EPERM;
- }
+ }
+ return -ESRCH;
+}
- if (sk->sk_protocol != IPPROTO_TCP)
- return -ENOPROTOOPT;
+static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq,
+ sockptr_t optval, int optlen)
+{
+ struct ipv6_fl_socklist *sfl, *sfl1 = NULL;
+ struct ip6_flowlabel *fl, *fl1 = NULL;
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net *net = sock_net(sk);
+ int uninitialized_var(err);
- np->repflow = 1;
- return 0;
+ if (freq->flr_flags & IPV6_FL_F_REFLECT) {
+ if (net->ipv6.sysctl.flowlabel_consistency) {
+ net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n");
+ return -EPERM;
}
- if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
- return -EINVAL;
+ if (sk->sk_protocol != IPPROTO_TCP)
+ return -ENOPROTOOPT;
+ np->repflow = 1;
+ return 0;
+ }
- if (net->ipv6.sysctl.flowlabel_state_ranges &&
- (freq.flr_label & IPV6_FLOWLABEL_STATELESS_FLAG))
- return -ERANGE;
+ if (freq->flr_label & ~IPV6_FLOWLABEL_MASK)
+ return -EINVAL;
+ if (net->ipv6.sysctl.flowlabel_state_ranges &&
+ (freq->flr_label & IPV6_FLOWLABEL_STATELESS_FLAG))
+ return -ERANGE;
- fl = fl_create(net, sk, &freq, optval, optlen, &err);
- if (!fl)
- return err;
- sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
+ fl = fl_create(net, sk, freq, optval, optlen, &err);
+ if (!fl)
+ return err;
- if (freq.flr_label) {
- err = -EEXIST;
- rcu_read_lock_bh();
- for_each_sk_fl_rcu(np, sfl) {
- if (sfl->fl->label == freq.flr_label) {
- if (freq.flr_flags&IPV6_FL_F_EXCL) {
- rcu_read_unlock_bh();
- goto done;
- }
- fl1 = sfl->fl;
- if (!atomic_inc_not_zero(&fl1->users))
- fl1 = NULL;
- break;
+ sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
+
+ if (freq->flr_label) {
+ err = -EEXIST;
+ rcu_read_lock_bh();
+ for_each_sk_fl_rcu(np, sfl) {
+ if (sfl->fl->label == freq->flr_label) {
+ if (freq->flr_flags & IPV6_FL_F_EXCL) {
+ rcu_read_unlock_bh();
+ goto done;
}
+ fl1 = sfl->fl;
+ if (!atomic_inc_not_zero(&fl1->users))
+ fl1 = NULL;
+ break;
}
- rcu_read_unlock_bh();
+ }
+ rcu_read_unlock_bh();
- if (!fl1)
- fl1 = fl_lookup(net, freq.flr_label);
- if (fl1) {
+ if (!fl1)
+ fl1 = fl_lookup(net, freq->flr_label);
+ if (fl1) {
recheck:
- err = -EEXIST;
- if (freq.flr_flags&IPV6_FL_F_EXCL)
- goto release;
- err = -EPERM;
- if (fl1->share == IPV6_FL_S_EXCL ||
- fl1->share != fl->share ||
- ((fl1->share == IPV6_FL_S_PROCESS) &&
- (fl1->owner.pid != fl->owner.pid)) ||
- ((fl1->share == IPV6_FL_S_USER) &&
- !uid_eq(fl1->owner.uid, fl->owner.uid)))
- goto release;
-
- err = -ENOMEM;
- if (!sfl1)
- goto release;
- if (fl->linger > fl1->linger)
- fl1->linger = fl->linger;
- if ((long)(fl->expires - fl1->expires) > 0)
- fl1->expires = fl->expires;
- fl_link(np, sfl1, fl1);
- fl_free(fl);
- return 0;
+ err = -EEXIST;
+ if (freq->flr_flags&IPV6_FL_F_EXCL)
+ goto release;
+ err = -EPERM;
+ if (fl1->share == IPV6_FL_S_EXCL ||
+ fl1->share != fl->share ||
+ ((fl1->share == IPV6_FL_S_PROCESS) &&
+ (fl1->owner.pid != fl->owner.pid)) ||
+ ((fl1->share == IPV6_FL_S_USER) &&
+ !uid_eq(fl1->owner.uid, fl->owner.uid)))
+ goto release;
+
+ err = -ENOMEM;
+ if (!sfl1)
+ goto release;
+ if (fl->linger > fl1->linger)
+ fl1->linger = fl->linger;
+ if ((long)(fl->expires - fl1->expires) > 0)
+ fl1->expires = fl->expires;
+ fl_link(np, sfl1, fl1);
+ fl_free(fl);
+ return 0;
release:
- fl_release(fl1);
- goto done;
- }
- }
- err = -ENOENT;
- if (!(freq.flr_flags&IPV6_FL_F_CREATE))
- goto done;
-
- err = -ENOMEM;
- if (!sfl1)
+ fl_release(fl1);
goto done;
+ }
+ }
+ err = -ENOENT;
+ if (!(freq->flr_flags & IPV6_FL_F_CREATE))
+ goto done;
- err = mem_check(sk);
- if (err != 0)
- goto done;
+ err = -ENOMEM;
+ if (!sfl1)
+ goto done;
- fl1 = fl_intern(net, fl, freq.flr_label);
- if (fl1)
- goto recheck;
+ err = mem_check(sk);
+ if (err != 0)
+ goto done;
- if (!freq.flr_label) {
- if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label,
- &fl->label, sizeof(fl->label))) {
- /* Intentionally ignore fault. */
- }
- }
+ fl1 = fl_intern(net, fl, freq->flr_label);
+ if (fl1)
+ goto recheck;
- fl_link(np, sfl1, fl);
- return 0;
+ if (!freq->flr_label) {
+ size_t offset = offsetof(struct in6_flowlabel_req, flr_label);
- default:
- return -EINVAL;
+ if (copy_to_sockptr_offset(optval, offset, &fl->label,
+ sizeof(fl->label))) {
+ /* Intentionally ignore fault. */
+ }
}
+ fl_link(np, sfl1, fl);
+ return 0;
done:
fl_free(fl);
kfree(sfl1);
return err;
}
+int ipv6_flowlabel_opt(struct sock *sk, sockptr_t optval, int optlen)
+{
+ struct in6_flowlabel_req freq;
+
+ if (optlen < sizeof(freq))
+ return -EINVAL;
+ if (copy_from_sockptr(&freq, optval, sizeof(freq)))
+ return -EFAULT;
+
+ switch (freq.flr_action) {
+ case IPV6_FL_A_PUT:
+ return ipv6_flowlabel_put(sk, &freq);
+ case IPV6_FL_A_RENEW:
+ return ipv6_flowlabel_renew(sk, &freq);
+ case IPV6_FL_A_GET:
+ return ipv6_flowlabel_get(sk, &freq, optval, optlen);
+ default:
+ return -EINVAL;
+ }
+}
+
#ifdef CONFIG_PROC_FS
struct ip6fl_iter_state {
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
index e0086758b6ee..70c8c2f36c98 100644
--- a/net/ipv6/ip6_icmp.c
+++ b/net/ipv6/ip6_icmp.c
@@ -9,6 +9,8 @@
#if IS_ENABLED(CONFIG_IPV6)
+#if !IS_BUILTIN(CONFIG_IPV6)
+
static ip6_icmp_send_t __rcu *ip6_icmp_send;
int inet6_register_icmp_sender(ip6_icmp_send_t *fn)
@@ -37,14 +39,12 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
rcu_read_lock();
send = rcu_dereference(ip6_icmp_send);
-
- if (!send)
- goto out;
- send(skb, type, code, info, NULL);
-out:
+ if (send)
+ send(skb, type, code, info, NULL);
rcu_read_unlock();
}
EXPORT_SYMBOL(icmpv6_send);
+#endif
#if IS_ENABLED(CONFIG_NF_NAT)
#include <net/netfilter/nf_conntrack.h>
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 7fbb44736a34..a80f90bf3ae7 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -13,6 +13,8 @@
#include <net/protocol.h>
#include <net/ipv6.h>
#include <net/inet_common.h>
+#include <net/tcp.h>
+#include <net/udp.h>
#include "ip6_offload.h"
@@ -177,10 +179,6 @@ static int ipv6_exthdrs_len(struct ipv6hdr *iph,
return len;
}
-INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *,
- struct sk_buff *));
-INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *,
- struct sk_buff *));
INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
struct sk_buff *skb)
{
@@ -319,8 +317,6 @@ static struct sk_buff *ip4ip6_gro_receive(struct list_head *head,
return inet_gro_receive(head, skb);
}
-INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *, int));
-INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int));
INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
{
const struct net_offload *ops;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 8a8c2d0cfcc8..c78e67d7747f 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1118,6 +1118,7 @@ out_err_release:
/**
* ip6_dst_lookup - perform route lookup on flow
+ * @net: Network namespace to perform lookup in
* @sk: socket which provides route info
* @dst: pointer to dst_entry * for result
* @fl6: flow to lookup
@@ -1136,6 +1137,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup);
/**
* ip6_dst_lookup_flow - perform route lookup on flow with ipsec
+ * @net: Network namespace to perform lookup in
* @sk: socket which provides route info
* @fl6: flow to lookup
* @final_dst: final destination address for ipsec lookup
@@ -1202,11 +1204,11 @@ EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
* @skb: Packet for which lookup is done
* @dev: Tunnel device
* @net: Network namespace of tunnel device
- * @sk: Socket which provides route info
+ * @sock: Socket which provides route info
* @saddr: Memory to store the src ip address
* @info: Tunnel information
* @protocol: IP protocol
- * @use_cahce: Flag to enable cache usage
+ * @use_cache: Flag to enable cache usage
* This function performs a route lookup on a tunnel
*
* It returns a valid dst pointer and stores src address to be used in
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a18c378ca5f4..f635914f42ec 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -124,8 +124,12 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
return &dev->stats;
}
+#define for_each_ip6_tunnel_rcu(start) \
+ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
+
/**
* ip6_tnl_lookup - fetch tunnel matching the end-point addresses
+ * @net: network namespace
* @link: ifindex of underlying interface
* @remote: the address of the tunnel exit-point
* @local: the address of the tunnel entry-point
@@ -136,9 +140,6 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
* else %NULL
**/
-#define for_each_ip6_tunnel_rcu(start) \
- for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
-
static struct ip6_tnl *
ip6_tnl_lookup(struct net *net, int link,
const struct in6_addr *remote, const struct in6_addr *local)
@@ -302,8 +303,8 @@ out:
/**
* ip6_tnl_create - create a new tunnel
+ * @net: network namespace
* @p: tunnel parameters
- * @pt: pointer to new tunnel
*
* Description:
* Create tunnel matching given parameters.
@@ -351,6 +352,7 @@ failed:
/**
* ip6_tnl_locate - find or create tunnel matching given parameters
+ * @net: network namespace
* @p: tunnel parameters
* @create: != 0 if allowed to create new tunnel if no match found
*
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 0d964160a9dd..fac01b80a104 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -491,13 +491,16 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
}
dst_hold(dst);
- dst = xfrm_lookup(t->net, dst, fl, NULL, 0);
+ dst = xfrm_lookup_route(t->net, dst, fl, NULL, 0);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
dst = NULL;
goto tx_err_link_failure;
}
+ if (dst->flags & DST_XFRM_QUEUE)
+ goto queued;
+
x = dst->xfrm;
if (!vti6_state_check(x, &t->parms.raddr, &t->parms.laddr))
goto tx_err_link_failure;
@@ -533,6 +536,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
goto tx_err_dst_release;
}
+queued:
skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
skb_dst_set(skb, dst);
skb->dev = skb_dst(skb)->dev;
@@ -1219,6 +1223,33 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
.priority = 100,
};
+#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
+static int vti6_rcv_tunnel(struct sk_buff *skb)
+{
+ const xfrm_address_t *saddr;
+ __be32 spi;
+
+ saddr = (const xfrm_address_t *)&ipv6_hdr(skb)->saddr;
+ spi = xfrm6_tunnel_spi_lookup(dev_net(skb->dev), saddr);
+
+ return vti6_input_proto(skb, IPPROTO_IPV6, spi, 0);
+}
+
+static struct xfrm6_tunnel vti_ipv6_handler __read_mostly = {
+ .handler = vti6_rcv_tunnel,
+ .cb_handler = vti6_rcv_cb,
+ .err_handler = vti6_err,
+ .priority = 0,
+};
+
+static struct xfrm6_tunnel vti_ip6ip_handler __read_mostly = {
+ .handler = vti6_rcv_tunnel,
+ .cb_handler = vti6_rcv_cb,
+ .err_handler = vti6_err,
+ .priority = 0,
+};
+#endif
+
/**
* vti6_tunnel_init - register protocol and reserve needed resources
*
@@ -1244,6 +1275,15 @@ static int __init vti6_tunnel_init(void)
err = xfrm6_protocol_register(&vti_ipcomp6_protocol, IPPROTO_COMP);
if (err < 0)
goto xfrm_proto_comp_failed;
+#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
+ msg = "ipv6 tunnel";
+ err = xfrm6_tunnel_register(&vti_ipv6_handler, AF_INET6);
+ if (err < 0)
+ goto vti_tunnel_ipv6_failed;
+ err = xfrm6_tunnel_register(&vti_ip6ip_handler, AF_INET);
+ if (err < 0)
+ goto vti_tunnel_ip6ip_failed;
+#endif
msg = "netlink interface";
err = rtnl_link_register(&vti6_link_ops);
@@ -1253,6 +1293,12 @@ static int __init vti6_tunnel_init(void)
return 0;
rtnl_link_failed:
+#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
+ err = xfrm6_tunnel_deregister(&vti_ip6ip_handler, AF_INET);
+vti_tunnel_ip6ip_failed:
+ err = xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET6);
+vti_tunnel_ipv6_failed:
+#endif
xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP);
xfrm_proto_comp_failed:
xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
@@ -1271,6 +1317,10 @@ pernet_dev_failed:
static void __exit vti6_tunnel_cleanup(void)
{
rtnl_link_unregister(&vti6_link_ops);
+#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
+ xfrm6_tunnel_deregister(&vti_ip6ip_handler, AF_INET);
+ xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET6);
+#endif
xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP);
xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 1f4d20e97c07..06b0d2c329b9 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1629,7 +1629,8 @@ EXPORT_SYMBOL(mroute6_is_socket);
* MOSPF/PIM router set up we can clean this up.
*/
-int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
+int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
+ unsigned int optlen)
{
int ret, parent = 0;
struct mif6ctl vif;
@@ -1665,7 +1666,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
case MRT6_ADD_MIF:
if (optlen < sizeof(vif))
return -EINVAL;
- if (copy_from_user(&vif, optval, sizeof(vif)))
+ if (copy_from_sockptr(&vif, optval, sizeof(vif)))
return -EFAULT;
if (vif.mif6c_mifi >= MAXMIFS)
return -ENFILE;
@@ -1678,7 +1679,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
case MRT6_DEL_MIF:
if (optlen < sizeof(mifi_t))
return -EINVAL;
- if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
+ if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t)))
return -EFAULT;
rtnl_lock();
ret = mif6_delete(mrt, mifi, 0, NULL);
@@ -1697,7 +1698,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
case MRT6_DEL_MFC_PROXY:
if (optlen < sizeof(mfc))
return -EINVAL;
- if (copy_from_user(&mfc, optval, sizeof(mfc)))
+ if (copy_from_sockptr(&mfc, optval, sizeof(mfc)))
return -EFAULT;
if (parent == 0)
parent = mfc.mf6cc_parent;
@@ -1718,7 +1719,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
if (optlen != sizeof(flags))
return -EINVAL;
- if (get_user(flags, (int __user *)optval))
+ if (copy_from_sockptr(&flags, optval, sizeof(flags)))
return -EFAULT;
rtnl_lock();
mroute_clean_tables(mrt, flags);
@@ -1735,7 +1736,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
if (optlen != sizeof(v))
return -EINVAL;
- if (get_user(v, (int __user *)optval))
+ if (copy_from_sockptr(&v, optval, sizeof(v)))
return -EFAULT;
mrt->mroute_do_assert = v;
return 0;
@@ -1748,7 +1749,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
if (optlen != sizeof(v))
return -EINVAL;
- if (get_user(v, (int __user *)optval))
+ if (copy_from_sockptr(&v, optval, sizeof(v)))
return -EFAULT;
v = !!v;
rtnl_lock();
@@ -1769,7 +1770,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
if (optlen != sizeof(u32))
return -EINVAL;
- if (get_user(v, (u32 __user *)optval))
+ if (copy_from_sockptr(&v, optval, sizeof(v)))
return -EFAULT;
/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
if (v != RT_TABLE_DEFAULT && v >= 100000000)
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 99668bfebd85..daef890460b7 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -91,6 +91,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
t->props.mode = x->props.mode;
memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr));
memcpy(&t->mark, &x->mark, sizeof(t->mark));
+ t->if_id = x->if_id;
if (xfrm_init_state(t))
goto error;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 76f9e41859a2..43a894bf9a1b 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -136,13 +136,42 @@ static bool setsockopt_needs_rtnl(int optname)
return false;
}
+static int copy_group_source_from_sockptr(struct group_source_req *greqs,
+ sockptr_t optval, int optlen)
+{
+ if (in_compat_syscall()) {
+ struct compat_group_source_req gr32;
+
+ if (optlen < sizeof(gr32))
+ return -EINVAL;
+ if (copy_from_sockptr(&gr32, optval, sizeof(gr32)))
+ return -EFAULT;
+ greqs->gsr_interface = gr32.gsr_interface;
+ greqs->gsr_group = gr32.gsr_group;
+ greqs->gsr_source = gr32.gsr_source;
+ } else {
+ if (optlen < sizeof(*greqs))
+ return -EINVAL;
+ if (copy_from_sockptr(greqs, optval, sizeof(*greqs)))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
static int do_ipv6_mcast_group_source(struct sock *sk, int optname,
- struct group_source_req *greqs)
+ sockptr_t optval, int optlen)
{
+ struct group_source_req greqs;
int omode, add;
+ int ret;
+
+ ret = copy_group_source_from_sockptr(&greqs, optval, optlen);
+ if (ret)
+ return ret;
- if (greqs->gsr_group.ss_family != AF_INET6 ||
- greqs->gsr_source.ss_family != AF_INET6)
+ if (greqs.gsr_group.ss_family != AF_INET6 ||
+ greqs.gsr_source.ss_family != AF_INET6)
return -EADDRNOTAVAIL;
if (optname == MCAST_BLOCK_SOURCE) {
@@ -155,8 +184,8 @@ static int do_ipv6_mcast_group_source(struct sock *sk, int optname,
struct sockaddr_in6 *psin6;
int retv;
- psin6 = (struct sockaddr_in6 *)&greqs->gsr_group;
- retv = ipv6_sock_mc_join_ssm(sk, greqs->gsr_interface,
+ psin6 = (struct sockaddr_in6 *)&greqs.gsr_group;
+ retv = ipv6_sock_mc_join_ssm(sk, greqs.gsr_interface,
&psin6->sin6_addr,
MCAST_INCLUDE);
/* prior join w/ different source is ok */
@@ -168,11 +197,200 @@ static int do_ipv6_mcast_group_source(struct sock *sk, int optname,
omode = MCAST_INCLUDE;
add = 0;
}
- return ip6_mc_source(add, omode, sk, greqs);
+ return ip6_mc_source(add, omode, sk, &greqs);
+}
+
+static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
+ int optlen)
+{
+ struct group_filter *gsf;
+ int ret;
+
+ if (optlen < GROUP_FILTER_SIZE(0))
+ return -EINVAL;
+ if (optlen > sysctl_optmem_max)
+ return -ENOBUFS;
+
+ gsf = memdup_sockptr(optval, optlen);
+ if (IS_ERR(gsf))
+ return PTR_ERR(gsf);
+
+ /* numsrc >= (4G-140)/128 overflow in 32 bits */
+ ret = -ENOBUFS;
+ if (gsf->gf_numsrc >= 0x1ffffffU ||
+ gsf->gf_numsrc > sysctl_mld_max_msf)
+ goto out_free_gsf;
+
+ ret = -EINVAL;
+ if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen)
+ goto out_free_gsf;
+
+ ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist);
+out_free_gsf:
+ kfree(gsf);
+ return ret;
+}
+
+static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
+ int optlen)
+{
+ const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ struct compat_group_filter *gf32;
+ void *p;
+ int ret;
+ int n;
+
+ if (optlen < size0)
+ return -EINVAL;
+ if (optlen > sysctl_optmem_max - 4)
+ return -ENOBUFS;
+
+ p = kmalloc(optlen + 4, GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+ ret = -EFAULT;
+ if (copy_from_sockptr(gf32, optval, optlen))
+ goto out_free_p;
+
+ /* numsrc >= (4G-140)/128 overflow in 32 bits */
+ ret = -ENOBUFS;
+ n = gf32->gf_numsrc;
+ if (n >= 0x1ffffffU || n > sysctl_mld_max_msf)
+ goto out_free_p;
+
+ ret = -EINVAL;
+ if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen)
+ goto out_free_p;
+
+ ret = ip6_mc_msfilter(sk, &(struct group_filter){
+ .gf_interface = gf32->gf_interface,
+ .gf_group = gf32->gf_group,
+ .gf_fmode = gf32->gf_fmode,
+ .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist);
+
+out_free_p:
+ kfree(p);
+ return ret;
+}
+
+static int ipv6_mcast_join_leave(struct sock *sk, int optname,
+ sockptr_t optval, int optlen)
+{
+ struct sockaddr_in6 *psin6;
+ struct group_req greq;
+
+ if (optlen < sizeof(greq))
+ return -EINVAL;
+ if (copy_from_sockptr(&greq, optval, sizeof(greq)))
+ return -EFAULT;
+
+ if (greq.gr_group.ss_family != AF_INET6)
+ return -EADDRNOTAVAIL;
+ psin6 = (struct sockaddr_in6 *)&greq.gr_group;
+ if (optname == MCAST_JOIN_GROUP)
+ return ipv6_sock_mc_join(sk, greq.gr_interface,
+ &psin6->sin6_addr);
+ return ipv6_sock_mc_drop(sk, greq.gr_interface, &psin6->sin6_addr);
+}
+
+static int compat_ipv6_mcast_join_leave(struct sock *sk, int optname,
+ sockptr_t optval, int optlen)
+{
+ struct compat_group_req gr32;
+ struct sockaddr_in6 *psin6;
+
+ if (optlen < sizeof(gr32))
+ return -EINVAL;
+ if (copy_from_sockptr(&gr32, optval, sizeof(gr32)))
+ return -EFAULT;
+
+ if (gr32.gr_group.ss_family != AF_INET6)
+ return -EADDRNOTAVAIL;
+ psin6 = (struct sockaddr_in6 *)&gr32.gr_group;
+ if (optname == MCAST_JOIN_GROUP)
+ return ipv6_sock_mc_join(sk, gr32.gr_interface,
+ &psin6->sin6_addr);
+ return ipv6_sock_mc_drop(sk, gr32.gr_interface, &psin6->sin6_addr);
+}
+
+static int ipv6_set_opt_hdr(struct sock *sk, int optname, sockptr_t optval,
+ int optlen)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_opt_hdr *new = NULL;
+ struct net *net = sock_net(sk);
+ struct ipv6_txoptions *opt;
+ int err;
+
+ /* hop-by-hop / destination options are privileged option */
+ if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
+ return -EPERM;
+
+ /* remove any sticky options header with a zero option
+ * length, per RFC3542.
+ */
+ if (optlen > 0) {
+ if (sockptr_is_null(optval))
+ return -EINVAL;
+ if (optlen < sizeof(struct ipv6_opt_hdr) ||
+ optlen & 0x7 ||
+ optlen > 8 * 255)
+ return -EINVAL;
+
+ new = memdup_sockptr(optval, optlen);
+ if (IS_ERR(new))
+ return PTR_ERR(new);
+ if (unlikely(ipv6_optlen(new) > optlen)) {
+ kfree(new);
+ return -EINVAL;
+ }
+ }
+
+ opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
+ opt = ipv6_renew_options(sk, opt, optname, new);
+ kfree(new);
+ if (IS_ERR(opt))
+ return PTR_ERR(opt);
+
+ /* routing header option needs extra check */
+ err = -EINVAL;
+ if (optname == IPV6_RTHDR && opt && opt->srcrt) {
+ struct ipv6_rt_hdr *rthdr = opt->srcrt;
+ switch (rthdr->type) {
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+ case IPV6_SRCRT_TYPE_2:
+ if (rthdr->hdrlen != 2 || rthdr->segments_left != 1)
+ goto sticky_done;
+ break;
+#endif
+ case IPV6_SRCRT_TYPE_4:
+ {
+ struct ipv6_sr_hdr *srh =
+ (struct ipv6_sr_hdr *)opt->srcrt;
+
+ if (!seg6_validate_srh(srh, optlen, false))
+ goto sticky_done;
+ break;
+ }
+ default:
+ goto sticky_done;
+ }
+ }
+
+ err = 0;
+ opt = ipv6_update_options(sk, opt);
+sticky_done:
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
+ return err;
}
static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct net *net = sock_net(sk);
@@ -180,11 +398,11 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
int retv = -ENOPROTOOPT;
bool needs_rtnl = setsockopt_needs_rtnl(optname);
- if (!optval)
+ if (sockptr_is_null(optval))
val = 0;
else {
if (optlen >= sizeof(int)) {
- if (get_user(val, (int __user *) optval))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
} else
val = 0;
@@ -436,82 +654,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
case IPV6_RTHDRDSTOPTS:
case IPV6_RTHDR:
case IPV6_DSTOPTS:
- {
- struct ipv6_txoptions *opt;
- struct ipv6_opt_hdr *new = NULL;
-
- /* hop-by-hop / destination options are privileged option */
- retv = -EPERM;
- if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
- break;
-
- /* remove any sticky options header with a zero option
- * length, per RFC3542.
- */
- if (optlen == 0)
- optval = NULL;
- else if (!optval)
- goto e_inval;
- else if (optlen < sizeof(struct ipv6_opt_hdr) ||
- optlen & 0x7 || optlen > 8 * 255)
- goto e_inval;
- else {
- new = memdup_user(optval, optlen);
- if (IS_ERR(new)) {
- retv = PTR_ERR(new);
- break;
- }
- if (unlikely(ipv6_optlen(new) > optlen)) {
- kfree(new);
- goto e_inval;
- }
- }
-
- opt = rcu_dereference_protected(np->opt,
- lockdep_sock_is_held(sk));
- opt = ipv6_renew_options(sk, opt, optname, new);
- kfree(new);
- if (IS_ERR(opt)) {
- retv = PTR_ERR(opt);
- break;
- }
-
- /* routing header option needs extra check */
- retv = -EINVAL;
- if (optname == IPV6_RTHDR && opt && opt->srcrt) {
- struct ipv6_rt_hdr *rthdr = opt->srcrt;
- switch (rthdr->type) {
-#if IS_ENABLED(CONFIG_IPV6_MIP6)
- case IPV6_SRCRT_TYPE_2:
- if (rthdr->hdrlen != 2 ||
- rthdr->segments_left != 1)
- goto sticky_done;
-
- break;
-#endif
- case IPV6_SRCRT_TYPE_4:
- {
- struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)
- opt->srcrt;
-
- if (!seg6_validate_srh(srh, optlen, false))
- goto sticky_done;
- break;
- }
- default:
- goto sticky_done;
- }
- }
-
- retv = 0;
- opt = ipv6_update_options(sk, opt);
-sticky_done:
- if (opt) {
- atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
- txopt_put(opt);
- }
+ retv = ipv6_set_opt_hdr(sk, optname, optval, optlen);
break;
- }
case IPV6_PKTINFO:
{
@@ -519,12 +663,13 @@ sticky_done:
if (optlen == 0)
goto e_inval;
- else if (optlen < sizeof(struct in6_pktinfo) || !optval)
+ else if (optlen < sizeof(struct in6_pktinfo) ||
+ sockptr_is_null(optval))
goto e_inval;
- if (copy_from_user(&pkt, optval, sizeof(struct in6_pktinfo))) {
- retv = -EFAULT;
- break;
+ if (copy_from_sockptr(&pkt, optval, sizeof(pkt))) {
+ retv = -EFAULT;
+ break;
}
if (!sk_dev_equal_l3scope(sk, pkt.ipi6_ifindex))
goto e_inval;
@@ -565,7 +710,7 @@ sticky_done:
refcount_set(&opt->refcnt, 1);
opt->tot_len = sizeof(*opt) + optlen;
retv = -EFAULT;
- if (copy_from_user(opt+1, optval, optlen))
+ if (copy_from_sockptr(opt + 1, optval, optlen))
goto done;
msg.msg_controllen = optlen;
@@ -687,7 +832,7 @@ done:
break;
retv = -EFAULT;
- if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq)))
+ if (copy_from_sockptr(&mreq, optval, sizeof(struct ipv6_mreq)))
break;
if (optname == IPV6_ADD_MEMBERSHIP)
@@ -705,7 +850,7 @@ done:
goto e_inval;
retv = -EFAULT;
- if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq)))
+ if (copy_from_sockptr(&mreq, optval, sizeof(struct ipv6_mreq)))
break;
if (optname == IPV6_JOIN_ANYCAST)
@@ -723,77 +868,26 @@ done:
case MCAST_JOIN_GROUP:
case MCAST_LEAVE_GROUP:
- {
- struct group_req greq;
- struct sockaddr_in6 *psin6;
-
- if (optlen < sizeof(struct group_req))
- goto e_inval;
-
- retv = -EFAULT;
- if (copy_from_user(&greq, optval, sizeof(struct group_req)))
- break;
- if (greq.gr_group.ss_family != AF_INET6) {
- retv = -EADDRNOTAVAIL;
- break;
- }
- psin6 = (struct sockaddr_in6 *)&greq.gr_group;
- if (optname == MCAST_JOIN_GROUP)
- retv = ipv6_sock_mc_join(sk, greq.gr_interface,
- &psin6->sin6_addr);
+ if (in_compat_syscall())
+ retv = compat_ipv6_mcast_join_leave(sk, optname, optval,
+ optlen);
else
- retv = ipv6_sock_mc_drop(sk, greq.gr_interface,
- &psin6->sin6_addr);
+ retv = ipv6_mcast_join_leave(sk, optname, optval,
+ optlen);
break;
- }
case MCAST_JOIN_SOURCE_GROUP:
case MCAST_LEAVE_SOURCE_GROUP:
case MCAST_BLOCK_SOURCE:
case MCAST_UNBLOCK_SOURCE:
- {
- struct group_source_req greqs;
-
- if (optlen < sizeof(struct group_source_req))
- goto e_inval;
- if (copy_from_user(&greqs, optval, sizeof(greqs))) {
- retv = -EFAULT;
- break;
- }
- retv = do_ipv6_mcast_group_source(sk, optname, &greqs);
+ retv = do_ipv6_mcast_group_source(sk, optname, optval, optlen);
break;
- }
case MCAST_MSFILTER:
- {
- struct group_filter *gsf;
-
- if (optlen < GROUP_FILTER_SIZE(0))
- goto e_inval;
- if (optlen > sysctl_optmem_max) {
- retv = -ENOBUFS;
- break;
- }
- gsf = memdup_user(optval, optlen);
- if (IS_ERR(gsf)) {
- retv = PTR_ERR(gsf);
- break;
- }
- /* numsrc >= (4G-140)/128 overflow in 32 bits */
- if (gsf->gf_numsrc >= 0x1ffffffU ||
- gsf->gf_numsrc > sysctl_mld_max_msf) {
- kfree(gsf);
- retv = -ENOBUFS;
- break;
- }
- if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {
- kfree(gsf);
- retv = -EINVAL;
- break;
- }
- retv = ip6_mc_msfilter(sk, gsf, gsf->gf_slist);
- kfree(gsf);
-
+ if (in_compat_syscall())
+ retv = compat_ipv6_set_mcast_msfilter(sk, optval,
+ optlen);
+ else
+ retv = ipv6_set_mcast_msfilter(sk, optval, optlen);
break;
- }
case IPV6_ROUTER_ALERT:
if (optlen < sizeof(int))
goto e_inval;
@@ -872,6 +966,14 @@ done:
np->rxopt.bits.recvfragsize = valbool;
retv = 0;
break;
+ case IPV6_RECVERR_RFC4884:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ if (val < 0 || val > 1)
+ goto e_inval;
+ np->recverr_rfc4884 = valbool;
+ retv = 0;
+ break;
}
release_sock(sk);
@@ -887,8 +989,8 @@ e_inval:
return -EINVAL;
}
-int ipv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+ unsigned int optlen)
{
int err;
@@ -909,140 +1011,6 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
}
EXPORT_SYMBOL(ipv6_setsockopt);
-#ifdef CONFIG_COMPAT
-int compat_ipv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
-{
- int err;
-
- if (level == SOL_IP && sk->sk_type != SOCK_RAW) {
- if (udp_prot.compat_setsockopt != NULL)
- return udp_prot.compat_setsockopt(sk, level, optname,
- optval, optlen);
- return udp_prot.setsockopt(sk, level, optname, optval, optlen);
- }
-
- if (level != SOL_IPV6)
- return -ENOPROTOOPT;
-
- switch (optname) {
- case MCAST_JOIN_GROUP:
- case MCAST_LEAVE_GROUP:
- {
- struct compat_group_req __user *gr32 = (void __user *)optval;
- struct group_req greq;
- struct sockaddr_in6 *psin6 = (struct sockaddr_in6 *)&greq.gr_group;
-
- if (optlen < sizeof(struct compat_group_req))
- return -EINVAL;
-
- if (get_user(greq.gr_interface, &gr32->gr_interface) ||
- copy_from_user(&greq.gr_group, &gr32->gr_group,
- sizeof(greq.gr_group)))
- return -EFAULT;
-
- if (greq.gr_group.ss_family != AF_INET6)
- return -EADDRNOTAVAIL;
-
- rtnl_lock();
- lock_sock(sk);
- if (optname == MCAST_JOIN_GROUP)
- err = ipv6_sock_mc_join(sk, greq.gr_interface,
- &psin6->sin6_addr);
- else
- err = ipv6_sock_mc_drop(sk, greq.gr_interface,
- &psin6->sin6_addr);
- release_sock(sk);
- rtnl_unlock();
- return err;
- }
- case MCAST_JOIN_SOURCE_GROUP:
- case MCAST_LEAVE_SOURCE_GROUP:
- case MCAST_BLOCK_SOURCE:
- case MCAST_UNBLOCK_SOURCE:
- {
- struct compat_group_source_req __user *gsr32 = (void __user *)optval;
- struct group_source_req greqs;
-
- if (optlen < sizeof(struct compat_group_source_req))
- return -EINVAL;
-
- if (get_user(greqs.gsr_interface, &gsr32->gsr_interface) ||
- copy_from_user(&greqs.gsr_group, &gsr32->gsr_group,
- sizeof(greqs.gsr_group)) ||
- copy_from_user(&greqs.gsr_source, &gsr32->gsr_source,
- sizeof(greqs.gsr_source)))
- return -EFAULT;
-
- rtnl_lock();
- lock_sock(sk);
- err = do_ipv6_mcast_group_source(sk, optname, &greqs);
- release_sock(sk);
- rtnl_unlock();
- return err;
- }
- case MCAST_MSFILTER:
- {
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
- struct compat_group_filter *gf32;
- void *p;
- int n;
-
- if (optlen < size0)
- return -EINVAL;
- if (optlen > sysctl_optmem_max - 4)
- return -ENOBUFS;
-
- p = kmalloc(optlen + 4, GFP_KERNEL);
- if (!p)
- return -ENOMEM;
-
- gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
- if (copy_from_user(gf32, optval, optlen)) {
- err = -EFAULT;
- goto mc_msf_out;
- }
-
- n = gf32->gf_numsrc;
- /* numsrc >= (4G-140)/128 overflow in 32 bits */
- if (n >= 0x1ffffffU ||
- n > sysctl_mld_max_msf) {
- err = -ENOBUFS;
- goto mc_msf_out;
- }
- if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) {
- err = -EINVAL;
- goto mc_msf_out;
- }
-
- rtnl_lock();
- lock_sock(sk);
- err = ip6_mc_msfilter(sk, &(struct group_filter){
- .gf_interface = gf32->gf_interface,
- .gf_group = gf32->gf_group,
- .gf_fmode = gf32->gf_fmode,
- .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist);
- release_sock(sk);
- rtnl_unlock();
-mc_msf_out:
- kfree(p);
- return err;
- }
- }
-
- err = do_ipv6_setsockopt(sk, level, optname, optval, optlen);
-#ifdef CONFIG_NETFILTER
- /* we need to exclude all possible ENOPROTOOPTs except default case */
- if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY &&
- optname != IPV6_XFRM_POLICY)
- err = compat_nf_setsockopt(sk, PF_INET6, optname, optval,
- optlen);
-#endif
- return err;
-}
-EXPORT_SYMBOL(compat_ipv6_setsockopt);
-#endif
-
static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
int optname, char __user *optval, int len)
{
@@ -1077,6 +1045,75 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
return len;
}
+static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
+ int __user *optlen, int len)
+{
+ const int size0 = offsetof(struct group_filter, gf_slist);
+ struct group_filter __user *p = optval;
+ struct group_filter gsf;
+ int num;
+ int err;
+
+ if (len < size0)
+ return -EINVAL;
+ if (copy_from_user(&gsf, p, size0))
+ return -EFAULT;
+ if (gsf.gf_group.ss_family != AF_INET6)
+ return -EADDRNOTAVAIL;
+ num = gsf.gf_numsrc;
+ lock_sock(sk);
+ err = ip6_mc_msfget(sk, &gsf, p->gf_slist);
+ if (!err) {
+ if (num > gsf.gf_numsrc)
+ num = gsf.gf_numsrc;
+ if (put_user(GROUP_FILTER_SIZE(num), optlen) ||
+ copy_to_user(p, &gsf, size0))
+ err = -EFAULT;
+ }
+ release_sock(sk);
+ return err;
+}
+
+static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval,
+ int __user *optlen)
+{
+ const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ struct compat_group_filter __user *p = optval;
+ struct compat_group_filter gf32;
+ struct group_filter gf;
+ int len, err;
+ int num;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+ if (len < size0)
+ return -EINVAL;
+
+ if (copy_from_user(&gf32, p, size0))
+ return -EFAULT;
+ gf.gf_interface = gf32.gf_interface;
+ gf.gf_fmode = gf32.gf_fmode;
+ num = gf.gf_numsrc = gf32.gf_numsrc;
+ gf.gf_group = gf32.gf_group;
+
+ if (gf.gf_group.ss_family != AF_INET6)
+ return -EADDRNOTAVAIL;
+
+ lock_sock(sk);
+ err = ip6_mc_msfget(sk, &gf, p->gf_slist);
+ release_sock(sk);
+ if (err)
+ return err;
+ if (num > gf.gf_numsrc)
+ num = gf.gf_numsrc;
+ len = GROUP_FILTER_SIZE(num) - (sizeof(gf)-sizeof(gf32));
+ if (put_user(len, optlen) ||
+ put_user(gf.gf_fmode, &p->gf_fmode) ||
+ put_user(gf.gf_numsrc, &p->gf_numsrc))
+ return -EFAULT;
+ return 0;
+}
+
static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen, unsigned int flags)
{
@@ -1100,33 +1137,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = sk->sk_family;
break;
case MCAST_MSFILTER:
- {
- struct group_filter __user *p = (void __user *)optval;
- struct group_filter gsf;
- const int size0 = offsetof(struct group_filter, gf_slist);
- int num;
- int err;
-
- if (len < size0)
- return -EINVAL;
- if (copy_from_user(&gsf, p, size0))
- return -EFAULT;
- if (gsf.gf_group.ss_family != AF_INET6)
- return -EADDRNOTAVAIL;
- num = gsf.gf_numsrc;
- lock_sock(sk);
- err = ip6_mc_msfget(sk, &gsf, p->gf_slist);
- if (!err) {
- if (num > gsf.gf_numsrc)
- num = gsf.gf_numsrc;
- if (put_user(GROUP_FILTER_SIZE(num), optlen) ||
- copy_to_user(p, &gsf, size0))
- err = -EFAULT;
- }
- release_sock(sk);
- return err;
- }
-
+ if (in_compat_syscall())
+ return compat_ipv6_get_msfilter(sk, optval, optlen);
+ return ipv6_get_msfilter(sk, optval, optlen, len);
case IPV6_2292PKTOPTIONS:
{
struct msghdr msg;
@@ -1435,6 +1448,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = np->rtalert_isolate;
break;
+ case IPV6_RECVERR_RFC4884:
+ val = np->recverr_rfc4884;
+ break;
+
default:
return -ENOPROTOOPT;
}
@@ -1474,78 +1491,3 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
return err;
}
EXPORT_SYMBOL(ipv6_getsockopt);
-
-#ifdef CONFIG_COMPAT
-int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
-{
- int err;
-
- if (level == SOL_IP && sk->sk_type != SOCK_RAW) {
- if (udp_prot.compat_getsockopt != NULL)
- return udp_prot.compat_getsockopt(sk, level, optname,
- optval, optlen);
- return udp_prot.getsockopt(sk, level, optname, optval, optlen);
- }
-
- if (level != SOL_IPV6)
- return -ENOPROTOOPT;
-
- if (optname == MCAST_MSFILTER) {
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
- struct compat_group_filter __user *p = (void __user *)optval;
- struct compat_group_filter gf32;
- struct group_filter gf;
- int ulen, err;
- int num;
-
- if (get_user(ulen, optlen))
- return -EFAULT;
-
- if (ulen < size0)
- return -EINVAL;
-
- if (copy_from_user(&gf32, p, size0))
- return -EFAULT;
-
- gf.gf_interface = gf32.gf_interface;
- gf.gf_fmode = gf32.gf_fmode;
- num = gf.gf_numsrc = gf32.gf_numsrc;
- gf.gf_group = gf32.gf_group;
-
- if (gf.gf_group.ss_family != AF_INET6)
- return -EADDRNOTAVAIL;
- lock_sock(sk);
- err = ip6_mc_msfget(sk, &gf, p->gf_slist);
- release_sock(sk);
- if (err)
- return err;
- if (num > gf.gf_numsrc)
- num = gf.gf_numsrc;
- ulen = GROUP_FILTER_SIZE(num) - (sizeof(gf)-sizeof(gf32));
- if (put_user(ulen, optlen) ||
- put_user(gf.gf_fmode, &p->gf_fmode) ||
- put_user(gf.gf_numsrc, &p->gf_numsrc))
- return -EFAULT;
- return 0;
- }
-
- err = do_ipv6_getsockopt(sk, level, optname, optval, optlen,
- MSG_CMSG_COMPAT);
-#ifdef CONFIG_NETFILTER
- /* we need to exclude all possible ENOPROTOOPTs except default case */
- if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
- int len;
-
- if (get_user(len, optlen))
- return -EFAULT;
-
- err = compat_nf_getsockopt(sk, PF_INET6, optname, optval, &len);
- if (err >= 0)
- err = put_user(len, optlen);
- }
-#endif
- return err;
-}
-EXPORT_SYMBOL(compat_ipv6_getsockopt);
-#endif
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index e96a431549bc..2e2119bfcf13 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -960,8 +960,7 @@ static int compat_table_info(const struct xt_table_info *info,
}
#endif
-static int get_info(struct net *net, void __user *user,
- const int *len, int compat)
+static int get_info(struct net *net, void __user *user, const int *len)
{
char name[XT_TABLE_MAXNAMELEN];
struct xt_table *t;
@@ -975,7 +974,7 @@ static int get_info(struct net *net, void __user *user,
name[XT_TABLE_MAXNAMELEN-1] = '\0';
#ifdef CONFIG_COMPAT
- if (compat)
+ if (in_compat_syscall())
xt_compat_lock(AF_INET6);
#endif
t = xt_request_find_table_lock(net, AF_INET6, name);
@@ -985,7 +984,7 @@ static int get_info(struct net *net, void __user *user,
#ifdef CONFIG_COMPAT
struct xt_table_info tmp;
- if (compat) {
+ if (in_compat_syscall()) {
ret = compat_table_info(private, &tmp);
xt_compat_flush_offsets(AF_INET6);
private = &tmp;
@@ -1011,7 +1010,7 @@ static int get_info(struct net *net, void __user *user,
} else
ret = PTR_ERR(t);
#ifdef CONFIG_COMPAT
- if (compat)
+ if (in_compat_syscall())
xt_compat_unlock(AF_INET6);
#endif
return ret;
@@ -1120,7 +1119,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
}
static int
-do_replace(struct net *net, const void __user *user, unsigned int len)
+do_replace(struct net *net, sockptr_t arg, unsigned int len)
{
int ret;
struct ip6t_replace tmp;
@@ -1128,7 +1127,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
void *loc_cpu_entry;
struct ip6t_entry *iter;
- if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
/* overflow check */
@@ -1144,8 +1143,8 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
return -ENOMEM;
loc_cpu_entry = newinfo->entries;
- if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
- tmp.size) != 0) {
+ if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp),
+ tmp.size) != 0) {
ret = -EFAULT;
goto free_newinfo;
}
@@ -1169,8 +1168,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
}
static int
-do_add_counters(struct net *net, const void __user *user, unsigned int len,
- int compat)
+do_add_counters(struct net *net, sockptr_t arg, unsigned int len)
{
unsigned int i;
struct xt_counters_info tmp;
@@ -1181,7 +1179,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
struct ip6t_entry *iter;
unsigned int addend;
- paddc = xt_copy_counters_from_user(user, len, &tmp, compat);
+ paddc = xt_copy_counters(arg, len, &tmp);
if (IS_ERR(paddc))
return PTR_ERR(paddc);
t = xt_find_table_lock(net, AF_INET6, tmp.name);
@@ -1495,7 +1493,7 @@ out_unlock:
}
static int
-compat_do_replace(struct net *net, void __user *user, unsigned int len)
+compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
{
int ret;
struct compat_ip6t_replace tmp;
@@ -1503,7 +1501,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
void *loc_cpu_entry;
struct ip6t_entry *iter;
- if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
/* overflow check */
@@ -1519,8 +1517,8 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
return -ENOMEM;
loc_cpu_entry = newinfo->entries;
- if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
- tmp.size) != 0) {
+ if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp),
+ tmp.size) != 0) {
ret = -EFAULT;
goto free_newinfo;
}
@@ -1543,31 +1541,6 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
return ret;
}
-static int
-compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user,
- unsigned int len)
-{
- int ret;
-
- if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
- switch (cmd) {
- case IP6T_SO_SET_REPLACE:
- ret = compat_do_replace(sock_net(sk), user, len);
- break;
-
- case IP6T_SO_SET_ADD_COUNTERS:
- ret = do_add_counters(sock_net(sk), user, len, 1);
- break;
-
- default:
- ret = -EINVAL;
- }
-
- return ret;
-}
-
struct compat_ip6t_get_entries {
char name[XT_TABLE_MAXNAMELEN];
compat_uint_t size;
@@ -1643,33 +1616,10 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
xt_compat_unlock(AF_INET6);
return ret;
}
-
-static int do_ip6t_get_ctl(struct sock *, int, void __user *, int *);
-
-static int
-compat_do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
-{
- int ret;
-
- if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
- switch (cmd) {
- case IP6T_SO_GET_INFO:
- ret = get_info(sock_net(sk), user, len, 1);
- break;
- case IP6T_SO_GET_ENTRIES:
- ret = compat_get_entries(sock_net(sk), user, len);
- break;
- default:
- ret = do_ip6t_get_ctl(sk, cmd, user, len);
- }
- return ret;
-}
#endif
static int
-do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+do_ip6t_set_ctl(struct sock *sk, int cmd, sockptr_t arg, unsigned int len)
{
int ret;
@@ -1678,11 +1628,16 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
switch (cmd) {
case IP6T_SO_SET_REPLACE:
- ret = do_replace(sock_net(sk), user, len);
+#ifdef CONFIG_COMPAT
+ if (in_compat_syscall())
+ ret = compat_do_replace(sock_net(sk), arg, len);
+ else
+#endif
+ ret = do_replace(sock_net(sk), arg, len);
break;
case IP6T_SO_SET_ADD_COUNTERS:
- ret = do_add_counters(sock_net(sk), user, len, 0);
+ ret = do_add_counters(sock_net(sk), arg, len);
break;
default:
@@ -1702,11 +1657,16 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
switch (cmd) {
case IP6T_SO_GET_INFO:
- ret = get_info(sock_net(sk), user, len, 0);
+ ret = get_info(sock_net(sk), user, len);
break;
case IP6T_SO_GET_ENTRIES:
- ret = get_entries(sock_net(sk), user, len);
+#ifdef CONFIG_COMPAT
+ if (in_compat_syscall())
+ ret = compat_get_entries(sock_net(sk), user, len);
+ else
+#endif
+ ret = get_entries(sock_net(sk), user, len);
break;
case IP6T_SO_GET_REVISION_MATCH:
@@ -1897,15 +1857,9 @@ static struct nf_sockopt_ops ip6t_sockopts = {
.set_optmin = IP6T_BASE_CTL,
.set_optmax = IP6T_SO_SET_MAX+1,
.set = do_ip6t_set_ctl,
-#ifdef CONFIG_COMPAT
- .compat_set = compat_do_ip6t_set_ctl,
-#endif
.get_optmin = IP6T_BASE_CTL,
.get_optmax = IP6T_SO_GET_MAX+1,
.get = do_ip6t_get_ctl,
-#ifdef CONFIG_COMPAT
- .compat_get = compat_do_ip6t_get_ctl,
-#endif
.owner = THIS_MODULE,
};
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 4e15a14435e4..70da2f2ce064 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -74,8 +74,7 @@ static bool ah_mt6(const struct sk_buff *skb, struct xt_action_param *par)
ahinfo->hdrres, ah->reserved,
!(ahinfo->hdrres && ah->reserved));
- return (ah != NULL) &&
- spi_match(ahinfo->spis[0], ahinfo->spis[1],
+ return spi_match(ahinfo->spis[0], ahinfo->spis[1],
ntohl(ah->spi),
!!(ahinfo->invflags & IP6T_AH_INV_SPI)) &&
(!ahinfo->hdrlen ||
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index fb91eeee4a1e..3aad6439386b 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -85,8 +85,7 @@ frag_mt6(const struct sk_buff *skb, struct xt_action_param *par)
!((fraginfo->flags & IP6T_FRAG_NMF) &&
(ntohs(fh->frag_off) & IP6_MF)));
- return (fh != NULL) &&
- id_match(fraginfo->ids[0], fraginfo->ids[1],
+ return id_match(fraginfo->ids[0], fraginfo->ids[1],
ntohl(fh->identification),
!!(fraginfo->invflags & IP6T_FRAG_INV_IDS)) &&
!((fraginfo->flags & IP6T_FRAG_RES) &&
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 467b2a86031b..e7a3fb9355ee 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -86,8 +86,7 @@ hbh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
((optinfo->hdrlen == hdrlen) ^
!!(optinfo->invflags & IP6T_OPTS_INV_LEN))));
- ret = (oh != NULL) &&
- (!(optinfo->flags & IP6T_OPTS_LEN) ||
+ ret = (!(optinfo->flags & IP6T_OPTS_LEN) ||
((optinfo->hdrlen == hdrlen) ^
!!(optinfo->invflags & IP6T_OPTS_INV_LEN)));
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index f633dc84ca3f..733c83d38b30 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -89,8 +89,7 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
!((rtinfo->flags & IP6T_RT_RES) &&
(((const struct rt0_hdr *)rh)->reserved)));
- ret = (rh != NULL) &&
- (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
+ ret = (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
rh->segments_left,
!!(rtinfo->invflags & IP6T_RT_INV_SGS))) &&
(!(rtinfo->flags & IP6T_RT_LEN) ||
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 5fae66f66671..4aef6baaa55e 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -126,6 +126,21 @@ void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb,
}
EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_put);
+static int nf_reject6_fill_skb_dst(struct sk_buff *skb_in)
+{
+ struct dst_entry *dst = NULL;
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(struct flowi));
+ fl.u.ip6.daddr = ipv6_hdr(skb_in)->saddr;
+ nf_ip6_route(dev_net(skb_in->dev), &dst, &fl, false);
+ if (!dst)
+ return -1;
+
+ skb_dst_set(skb_in, dst);
+ return 0;
+}
+
void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
{
struct net_device *br_indev __maybe_unused;
@@ -154,6 +169,14 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
fl6.daddr = oip6h->saddr;
fl6.fl6_sport = otcph->dest;
fl6.fl6_dport = otcph->source;
+
+ if (hook == NF_INET_PRE_ROUTING) {
+ nf_ip6_route(net, &dst, flowi6_to_flowi(&fl6), false);
+ if (!dst)
+ return;
+ skb_dst_set(oldskb, dst);
+ }
+
fl6.flowi6_oif = l3mdev_master_ifindex(skb_dst(oldskb)->dev);
fl6.flowi6_mark = IP6_REPLY_MARK(net, oldskb->mark);
security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
@@ -245,6 +268,9 @@ void nf_send_unreach6(struct net *net, struct sk_buff *skb_in,
if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
skb_in->dev = net->loopback_dev;
+ if (hooknum == NF_INET_PRE_ROUTING && nf_reject6_fill_skb_dst(skb_in))
+ return;
+
icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
}
EXPORT_SYMBOL_GPL(nf_send_unreach6);
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 98ac32b49d8c..6caa062f68e7 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -114,6 +114,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
ipcm6_init_sk(&ipc6, np);
+ ipc6.sockc.mark = sk->sk_mark;
fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, false);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 8ef5a7b30524..874f01cd7aec 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -972,13 +972,13 @@ do_confirm:
}
static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
- char __user *optval, int optlen)
+ sockptr_t optval, int optlen)
{
switch (optname) {
case ICMPV6_FILTER:
if (optlen > sizeof(struct icmp6_filter))
optlen = sizeof(struct icmp6_filter);
- if (copy_from_user(&raw6_sk(sk)->filter, optval, optlen))
+ if (copy_from_sockptr(&raw6_sk(sk)->filter, optval, optlen))
return -EFAULT;
return 0;
default:
@@ -1015,12 +1015,12 @@ static int rawv6_geticmpfilter(struct sock *sk, int level, int optname,
static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct raw6_sock *rp = raw6_sk(sk);
int val;
- if (get_user(val, (int __user *)optval))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
switch (optname) {
@@ -1062,7 +1062,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
}
static int rawv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
switch (level) {
case SOL_RAW:
@@ -1084,30 +1084,6 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
}
-#ifdef CONFIG_COMPAT
-static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
-{
- switch (level) {
- case SOL_RAW:
- break;
- case SOL_ICMPV6:
- if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
- return -EOPNOTSUPP;
- return rawv6_seticmpfilter(sk, level, optname, optval, optlen);
- case SOL_IPV6:
- if (optname == IPV6_CHECKSUM ||
- optname == IPV6_HDRINCL)
- break;
- fallthrough;
- default:
- return compat_ipv6_setsockopt(sk, level, optname,
- optval, optlen);
- }
- return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
-}
-#endif
-
static int do_rawv6_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -1169,30 +1145,6 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
}
-#ifdef CONFIG_COMPAT
-static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
-{
- switch (level) {
- case SOL_RAW:
- break;
- case SOL_ICMPV6:
- if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
- return -EOPNOTSUPP;
- return rawv6_geticmpfilter(sk, level, optname, optval, optlen);
- case SOL_IPV6:
- if (optname == IPV6_CHECKSUM ||
- optname == IPV6_HDRINCL)
- break;
- fallthrough;
- default:
- return compat_ipv6_getsockopt(sk, level, optname,
- optval, optlen);
- }
- return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
-}
-#endif
-
static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
switch (cmd) {
@@ -1297,8 +1249,6 @@ struct proto rawv6_prot = {
.usersize = sizeof_field(struct raw6_sock, filter),
.h.raw_hash = &raw_v6_hashinfo,
#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_rawv6_setsockopt,
- .compat_getsockopt = compat_rawv6_getsockopt,
.compat_ioctl = compat_rawv6_ioctl,
#endif
.diag_destroy = raw_abort,
@@ -1378,8 +1328,6 @@ const struct proto_ops inet6_sockraw_ops = {
.sendpage = sock_no_sendpage,
#ifdef CONFIG_COMPAT
.compat_ioctl = inet6_compat_ioctl,
- .compat_setsockopt = compat_sock_common_setsockopt,
- .compat_getsockopt = compat_sock_common_getsockopt,
#endif
};
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4c36bd0c7930..5e7e25e2523a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -61,6 +61,7 @@
#include <net/l3mdev.h>
#include <net/ip.h>
#include <linux/uaccess.h>
+#include <linux/btf_ids.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
@@ -1210,7 +1211,7 @@ fallback:
return nrt;
}
-static struct rt6_info *ip6_pol_route_lookup(struct net *net,
+INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net,
struct fib6_table *table,
struct flowi6 *fl6,
const struct sk_buff *skb,
@@ -2277,7 +2278,7 @@ out:
}
EXPORT_SYMBOL_GPL(ip6_pol_route);
-static struct rt6_info *ip6_pol_route_input(struct net *net,
+INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_input(struct net *net,
struct fib6_table *table,
struct flowi6 *fl6,
const struct sk_buff *skb,
@@ -2468,7 +2469,7 @@ void ip6_route_input(struct sk_buff *skb)
&fl6, skb, flags));
}
-static struct rt6_info *ip6_pol_route_output(struct net *net,
+INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_output(struct net *net,
struct fib6_table *table,
struct flowi6 *fl6,
const struct sk_buff *skb,
@@ -2915,7 +2916,7 @@ struct ip6rd_flowi {
struct in6_addr gateway;
};
-static struct rt6_info *__ip6_route_redirect(struct net *net,
+INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net,
struct fib6_table *table,
struct flowi6 *fl6,
const struct sk_buff *skb,
@@ -6423,21 +6424,29 @@ void __init ip6_route_init_special_entries(void)
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt)
-static const struct bpf_iter_reg ipv6_route_reg_info = {
- .target = "ipv6_route",
+BTF_ID_LIST(btf_fib6_info_id)
+BTF_ID(struct, fib6_info)
+
+static const struct bpf_iter_seq_info ipv6_route_seq_info = {
.seq_ops = &ipv6_route_seq_ops,
.init_seq_private = bpf_iter_init_seq_net,
.fini_seq_private = bpf_iter_fini_seq_net,
.seq_priv_size = sizeof(struct ipv6_route_iter),
+};
+
+static struct bpf_iter_reg ipv6_route_reg_info = {
+ .target = "ipv6_route",
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__ipv6_route, rt),
PTR_TO_BTF_ID_OR_NULL },
},
+ .seq_info = &ipv6_route_seq_info,
};
static int __init bpf_iter_register(void)
{
+ ipv6_route_reg_info.ctx_arg_info[0].btf_id = *btf_fib6_info_id;
return bpf_iter_reg_target(&ipv6_route_reg_info);
}
diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index c3ececd7cfc1..5fdf3ebb953f 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -136,8 +136,7 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
oldhdr = ipv6_hdr(skb);
- buf = kzalloc(ipv6_rpl_srh_alloc_size(srh->segments_left - 1) * 2,
- GFP_ATOMIC);
+ buf = kcalloc(struct_size(srh, segments.addr, srh->segments_left), 2, GFP_ATOMIC);
if (!buf)
return -ENOMEM;
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index e0e9f48ab14f..897fa59c47de 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -27,6 +27,23 @@
#include <net/seg6_hmac.h>
#endif
+static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
+{
+ int head = 0;
+
+ switch (tuninfo->mode) {
+ case SEG6_IPTUN_MODE_INLINE:
+ break;
+ case SEG6_IPTUN_MODE_ENCAP:
+ head = sizeof(struct ipv6hdr);
+ break;
+ case SEG6_IPTUN_MODE_L2ENCAP:
+ return 0;
+ }
+
+ return ((tuninfo->srh->hdrlen + 1) << 3) + head;
+}
+
struct seg6_lwt {
struct dst_cache cache;
struct seg6_iptunnel_encap tuninfo[];
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 13235a012388..e796a64be308 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -170,7 +170,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
- req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk, false);
+ req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb);
if (!req)
goto out;
@@ -178,9 +178,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
treq = tcp_rsk(req);
treq->tfo_listener = false;
- if (IS_ENABLED(CONFIG_MPTCP))
- treq->is_mptcp = 0;
-
if (security_inet_conn_request(sk, skb, req))
goto out_free;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f67d45ff00b4..305870a72352 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -567,7 +567,7 @@ static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
}
static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
- char __user *optval, int optlen)
+ sockptr_t optval, int optlen)
{
struct tcp_md5sig cmd;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
@@ -577,7 +577,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
if (optlen < sizeof(cmd))
return -EINVAL;
- if (copy_from_user(&cmd, optval, sizeof(cmd)))
+ if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
return -EFAULT;
if (sin6->sin6_family != AF_INET6)
@@ -1811,6 +1811,13 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = {
.twsk_destructor = tcp_twsk_destructor,
};
+INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
+}
+
const struct inet_connection_sock_af_ops ipv6_specific = {
.queue_xmit = inet6_csk_xmit,
.send_check = tcp_v6_send_check,
@@ -1824,10 +1831,6 @@ const struct inet_connection_sock_af_ops ipv6_specific = {
.getsockopt = ipv6_getsockopt,
.addr2sockaddr = inet6_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in6),
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_ipv6_setsockopt,
- .compat_getsockopt = compat_ipv6_getsockopt,
-#endif
.mtu_reduced = tcp_v6_mtu_reduced,
};
@@ -1854,10 +1857,6 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
.getsockopt = ipv6_getsockopt,
.addr2sockaddr = inet6_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in6),
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_ipv6_setsockopt,
- .compat_getsockopt = compat_ipv6_getsockopt,
-#endif
.mtu_reduced = tcp_v4_mtu_reduced,
};
@@ -2115,10 +2114,6 @@ struct proto tcpv6_prot = {
.rsk_prot = &tcp6_request_sock_ops,
.h.hashinfo = &tcp_hashinfo,
.no_autobind = true,
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_tcp_setsockopt,
- .compat_getsockopt = compat_tcp_getsockopt,
-#endif
.diag_destroy = tcp_abort,
};
EXPORT_SYMBOL_GPL(tcpv6_prot);
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 06c02ebe6b9b..00e8d8b1c9a7 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -155,6 +155,33 @@ drop:
return 0;
}
+#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL)
+static int tunnel6_rcv_cb(struct sk_buff *skb, u8 proto, int err)
+{
+ struct xfrm6_tunnel __rcu *head;
+ struct xfrm6_tunnel *handler;
+ int ret;
+
+ head = (proto == IPPROTO_IPV6) ? tunnel6_handlers : tunnel46_handlers;
+
+ for_each_tunnel_rcu(head, handler) {
+ if (handler->cb_handler) {
+ ret = handler->cb_handler(skb, err);
+ if (ret <= 0)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static const struct xfrm_input_afinfo tunnel6_input_afinfo = {
+ .family = AF_INET6,
+ .is_ipip = true,
+ .callback = tunnel6_rcv_cb,
+};
+#endif
+
static int tunnel46_rcv(struct sk_buff *skb)
{
struct xfrm6_tunnel *handler;
@@ -245,11 +272,25 @@ static int __init tunnel6_init(void)
inet6_del_protocol(&tunnel46_protocol, IPPROTO_IPIP);
return -EAGAIN;
}
+#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL)
+ if (xfrm_input_register_afinfo(&tunnel6_input_afinfo)) {
+ pr_err("%s: can't add input afinfo\n", __func__);
+ inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6);
+ inet6_del_protocol(&tunnel46_protocol, IPPROTO_IPIP);
+ if (xfrm6_tunnel_mpls_supported())
+ inet6_del_protocol(&tunnelmpls6_protocol, IPPROTO_MPLS);
+ return -EAGAIN;
+ }
+#endif
return 0;
}
static void __exit tunnel6_fini(void)
{
+#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL)
+ if (xfrm_input_unregister_afinfo(&tunnel6_input_afinfo))
+ pr_err("%s: can't remove input afinfo\n", __func__);
+#endif
if (inet6_del_protocol(&tunnel46_protocol, IPPROTO_IPIP))
pr_err("%s: can't remove protocol\n", __func__);
if (inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6))
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index a8d74f44056a..29d9691359b9 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -141,6 +141,24 @@ static int compute_score(struct sock *sk, struct net *net,
return score;
}
+static struct sock *lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb,
+ const struct in6_addr *saddr,
+ __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned int hnum)
+{
+ struct sock *reuse_sk = NULL;
+ u32 hash;
+
+ if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) {
+ hash = udp6_ehashfn(net, daddr, hnum, saddr, sport);
+ reuse_sk = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ }
+ return reuse_sk;
+}
+
/* called with rcu_read_lock() */
static struct sock *udp6_lib_lookup2(struct net *net,
const struct in6_addr *saddr, __be16 sport,
@@ -148,9 +166,8 @@ static struct sock *udp6_lib_lookup2(struct net *net,
int dif, int sdif, struct udp_hslot *hslot2,
struct sk_buff *skb)
{
- struct sock *sk, *result, *reuseport_result;
+ struct sock *sk, *result;
int score, badness;
- u32 hash = 0;
result = NULL;
badness = -1;
@@ -158,26 +175,44 @@ static struct sock *udp6_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
- reuseport_result = NULL;
+ result = lookup_reuseport(net, sk, skb,
+ saddr, sport, daddr, hnum);
+ /* Fall back to scoring if group has connections */
+ if (result && !reuseport_has_conns(sk, false))
+ return result;
- if (sk->sk_reuseport &&
- sk->sk_state != TCP_ESTABLISHED) {
- hash = udp6_ehashfn(net, daddr, hnum,
- saddr, sport);
-
- reuseport_result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (reuseport_result && !reuseport_has_conns(sk, false))
- return reuseport_result;
- }
-
- result = reuseport_result ? : sk;
+ result = result ? : sk;
badness = score;
}
}
return result;
}
+static inline struct sock *udp6_lookup_run_bpf(struct net *net,
+ struct udp_table *udptable,
+ struct sk_buff *skb,
+ const struct in6_addr *saddr,
+ __be16 sport,
+ const struct in6_addr *daddr,
+ u16 hnum)
+{
+ struct sock *sk, *reuse_sk;
+ bool no_reuseport;
+
+ if (udptable != &udp_table)
+ return NULL; /* only UDP is supported */
+
+ no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP,
+ saddr, sport, daddr, hnum, &sk);
+ if (no_reuseport || IS_ERR_OR_NULL(sk))
+ return sk;
+
+ reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
+ if (reuse_sk)
+ sk = reuse_sk;
+ return sk;
+}
+
/* rcu_read_lock() must be held */
struct sock *__udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
@@ -188,25 +223,42 @@ struct sock *__udp6_lib_lookup(struct net *net,
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2;
struct udp_hslot *hslot2;
- struct sock *result;
+ struct sock *result, *sk;
hash2 = ipv6_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
+ /* Lookup connected or non-wildcard sockets */
result = udp6_lib_lookup2(net, saddr, sport,
daddr, hnum, dif, sdif,
hslot2, skb);
- if (!result) {
- hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
- slot2 = hash2 & udptable->mask;
+ if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED)
+ goto done;
+
+ /* Lookup redirect from BPF */
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
+ sk = udp6_lookup_run_bpf(net, udptable, skb,
+ saddr, sport, daddr, hnum);
+ if (sk) {
+ result = sk;
+ goto done;
+ }
+ }
- hslot2 = &udptable->hash2[slot2];
+ /* Got non-wildcard socket or error on first lookup */
+ if (result)
+ goto done;
- result = udp6_lib_lookup2(net, saddr, sport,
- &in6addr_any, hnum, dif, sdif,
- hslot2, skb);
- }
+ /* Lookup wildcard sockets */
+ hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
+ slot2 = hash2 & udptable->mask;
+ hslot2 = &udptable->hash2[slot2];
+
+ result = udp6_lib_lookup2(net, saddr, sport,
+ &in6addr_any, hnum, dif, sdif,
+ hslot2, skb);
+done:
if (IS_ERR(result))
return NULL;
return result;
@@ -1062,6 +1114,9 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
* @sk: socket we are sending on
* @skb: sk_buff containing the filled-in UDP header
* (checksum field must be zeroed out)
+ * @saddr: source address
+ * @daddr: destination address
+ * @len: length of packet
*/
static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
const struct in6_addr *saddr,
@@ -1561,26 +1616,16 @@ void udpv6_destroy_sock(struct sock *sk)
/*
* Socket option code for UDP
*/
-int udpv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+ unsigned int optlen)
{
if (level == SOL_UDP || level == SOL_UDPLITE)
- return udp_lib_setsockopt(sk, level, optname, optval, optlen,
+ return udp_lib_setsockopt(sk, level, optname,
+ optval, optlen,
udp_v6_push_pending_frames);
return ipv6_setsockopt(sk, level, optname, optval, optlen);
}
-#ifdef CONFIG_COMPAT
-int compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
-{
- if (level == SOL_UDP || level == SOL_UDPLITE)
- return udp_lib_setsockopt(sk, level, optname, optval, optlen,
- udp_v6_push_pending_frames);
- return compat_ipv6_setsockopt(sk, level, optname, optval, optlen);
-}
-#endif
-
int udpv6_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -1589,16 +1634,6 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname,
return ipv6_getsockopt(sk, level, optname, optval, optlen);
}
-#ifdef CONFIG_COMPAT
-int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
-{
- if (level == SOL_UDP || level == SOL_UDPLITE)
- return udp_lib_getsockopt(sk, level, optname, optval, optlen);
- return compat_ipv6_getsockopt(sk, level, optname, optval, optlen);
-}
-#endif
-
/* thinking of making this const? Don't.
* early_demux can change based on sysctl.
*/
@@ -1681,10 +1716,6 @@ struct proto udpv6_prot = {
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp6_sock),
.h.udp_table = &udp_table,
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_udpv6_setsockopt,
- .compat_getsockopt = compat_udpv6_getsockopt,
-#endif
.diag_destroy = udp_abort,
};
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 20e324b6f358..b2fcc46c1630 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -17,14 +17,8 @@ void udp_v6_rehash(struct sock *sk);
int udpv6_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
-int udpv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
-#ifdef CONFIG_COMPAT
-int compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
-int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen);
-#endif
+int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+ unsigned int optlen);
int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
int flags, int *addr_len);
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index bf7a7acd39b1..fbb700d3f437 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -52,10 +52,6 @@ struct proto udplitev6_prot = {
.sysctl_mem = sysctl_udp_mem,
.obj_size = sizeof(struct udp6_sock),
.h.udp_table = &udplite_table,
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_udpv6_setsockopt,
- .compat_getsockopt = compat_udpv6_getsockopt,
-#endif
};
static struct inet_protosw udplite6_protosw = {
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index ee0add15497d..6ee9851ac7c6 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1494,7 +1494,7 @@ static int iucv_sock_release(struct socket *sock)
/* getsockopt and setsockopt */
static int iucv_sock_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct iucv_sock *iucv = iucv_sk(sk);
@@ -1507,7 +1507,7 @@ static int iucv_sock_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *) optval))
+ if (copy_from_sockptr(&val, optval, sizeof(int)))
return -EFAULT;
rc = 0;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 56fac24a627a..56dad9565bc9 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1265,7 +1265,7 @@ static void kcm_recv_enable(struct kcm_sock *kcm)
}
static int kcm_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct kcm_sock *kcm = kcm_sk(sock->sk);
int val, valbool;
@@ -1277,8 +1277,8 @@ static int kcm_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *)optval))
- return -EINVAL;
+ if (copy_from_sockptr(&val, optval, sizeof(int)))
+ return -EFAULT;
valbool = val ? 1 : 0;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index b67ed3a8486c..c12dbc51ef5f 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1849,6 +1849,13 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms
if (ext_hdrs[SADB_X_EXT_FILTER - 1]) {
struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1];
+ if ((xfilter->sadb_x_filter_splen >=
+ (sizeof(xfrm_address_t) << 3)) ||
+ (xfilter->sadb_x_filter_dplen >=
+ (sizeof(xfrm_address_t) << 3))) {
+ mutex_unlock(&pfk->dump_lock);
+ return -EINVAL;
+ }
filter = kmalloc(sizeof(*filter), GFP_KERNEL);
if (filter == NULL) {
mutex_unlock(&pfk->dump_lock);
@@ -2400,7 +2407,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
return err;
}
- xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN,
+ xp = xfrm_policy_bysel_ctx(net, &dummy_mark, 0, XFRM_POLICY_TYPE_MAIN,
pol->sadb_x_policy_dir - 1, &sel, pol_ctx,
1, &err);
security_xfrm_policy_free(pol_ctx);
@@ -2651,7 +2658,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
return -EINVAL;
delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2);
- xp = xfrm_policy_byid(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN,
+ xp = xfrm_policy_byid(net, &dummy_mark, 0, XFRM_POLICY_TYPE_MAIN,
dir, pol->sadb_x_policy_id, delete, &err);
if (xp == NULL)
return -ENOENT;
@@ -3734,8 +3741,6 @@ static const struct proto_ops pfkey_ops = {
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
- .setsockopt = sock_no_setsockopt,
- .getsockopt = sock_no_getsockopt,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 6434d17e6e8e..701fc72ad9f4 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/*
- * L2TP core.
+/* L2TP core.
*
* Copyright (c) 2008,2009,2010 Katalix Systems Ltd
*
@@ -94,7 +93,7 @@ struct l2tp_skb_cb {
unsigned long expires;
};
-#define L2TP_SKB_CB(skb) ((struct l2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)])
+#define L2TP_SKB_CB(skb) ((struct l2tp_skb_cb *)&(skb)->cb[sizeof(struct inet_skb_parm)])
static struct workqueue_struct *l2tp_wq;
@@ -102,8 +101,10 @@ static struct workqueue_struct *l2tp_wq;
static unsigned int l2tp_net_id;
struct l2tp_net {
struct list_head l2tp_tunnel_list;
+ /* Lock for write access to l2tp_tunnel_list */
spinlock_t l2tp_tunnel_list_lock;
struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2];
+ /* Lock for write access to l2tp_session_hlist */
spinlock_t l2tp_session_hlist_lock;
};
@@ -134,7 +135,6 @@ static inline struct hlist_head *
l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id)
{
return &pn->l2tp_session_hlist[hash_32(session_id, L2TP_HASH_BITS_2)];
-
}
/* Session hash list.
@@ -149,12 +149,51 @@ l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id)
return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)];
}
-void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
+static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
{
sock_put(tunnel->sock);
/* the tunnel is freed in the socket destructor */
}
-EXPORT_SYMBOL(l2tp_tunnel_free);
+
+static void l2tp_session_free(struct l2tp_session *session)
+{
+ struct l2tp_tunnel *tunnel = session->tunnel;
+
+ if (tunnel) {
+ if (WARN_ON(tunnel->magic != L2TP_TUNNEL_MAGIC))
+ goto out;
+ l2tp_tunnel_dec_refcount(tunnel);
+ }
+
+out:
+ kfree(session);
+}
+
+void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel)
+{
+ refcount_inc(&tunnel->ref_count);
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_inc_refcount);
+
+void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel)
+{
+ if (refcount_dec_and_test(&tunnel->ref_count))
+ l2tp_tunnel_free(tunnel);
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_dec_refcount);
+
+void l2tp_session_inc_refcount(struct l2tp_session *session)
+{
+ refcount_inc(&session->ref_count);
+}
+EXPORT_SYMBOL_GPL(l2tp_session_inc_refcount);
+
+void l2tp_session_dec_refcount(struct l2tp_session *session)
+{
+ if (refcount_dec_and_test(&session->ref_count))
+ l2tp_session_free(session);
+}
+EXPORT_SYMBOL_GPL(l2tp_session_dec_refcount);
/* Lookup a tunnel. A new reference is held on the returned tunnel. */
struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
@@ -412,7 +451,7 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff *
}
/* call private receive handler */
- if (session->recv_skb != NULL)
+ if (session->recv_skb)
(*session->recv_skb)(session, skb, L2TP_SKB_CB(skb)->length);
else
kfree_skb(skb);
@@ -621,8 +660,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
int length)
{
struct l2tp_tunnel *tunnel = session->tunnel;
+ u32 ns = 0, nr = 0;
int offset;
- u32 ns, nr;
/* Parse and check optional cookie */
if (session->peer_cookie_len > 0) {
@@ -644,13 +683,12 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
* the control of the LNS. If no sequence numbers present but
* we were expecting them, discard frame.
*/
- ns = nr = 0;
L2TP_SKB_CB(skb)->has_seq = 0;
if (tunnel->version == L2TP_HDR_VER_2) {
if (hdrflags & L2TP_HDRFLAG_S) {
- ns = ntohs(*(__be16 *) ptr);
+ ns = ntohs(*(__be16 *)ptr);
ptr += 2;
- nr = ntohs(*(__be16 *) ptr);
+ nr = ntohs(*(__be16 *)ptr);
ptr += 2;
/* Store L2TP info in the skb */
@@ -662,7 +700,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
session->name, ns, nr, session->nr);
}
} else if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) {
- u32 l2h = ntohl(*(__be32 *) ptr);
+ u32 l2h = ntohl(*(__be32 *)ptr);
if (l2h & 0x40000000) {
ns = l2h & 0x00ffffff;
@@ -679,11 +717,11 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
}
if (L2TP_SKB_CB(skb)->has_seq) {
- /* Received a packet with sequence numbers. If we're the LNS,
+ /* Received a packet with sequence numbers. If we're the LAC,
* check if we sre sending sequence numbers and if not,
* configure it so.
*/
- if ((!session->lns_mode) && (!session->send_seq)) {
+ if (!session->lns_mode && !session->send_seq) {
l2tp_info(session, L2TP_MSG_SEQ,
"%s: requested to enable seq numbers by LNS\n",
session->name);
@@ -707,7 +745,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
* If we're the LNS and we're sending sequence numbers, the
* LAC is broken. Discard the frame.
*/
- if ((!session->lns_mode) && (session->send_seq)) {
+ if (!session->lns_mode && session->send_seq) {
l2tp_info(session, L2TP_MSG_SEQ,
"%s: requested to disable seq numbers by LNS\n",
session->name);
@@ -770,20 +808,21 @@ discard:
atomic_long_inc(&session->stats.rx_errors);
kfree_skb(skb);
}
-EXPORT_SYMBOL(l2tp_recv_common);
+EXPORT_SYMBOL_GPL(l2tp_recv_common);
/* Drop skbs from the session's reorder_q
*/
-static int l2tp_session_queue_purge(struct l2tp_session *session)
+static void l2tp_session_queue_purge(struct l2tp_session *session)
{
struct sk_buff *skb = NULL;
- BUG_ON(!session);
- BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+
+ if (WARN_ON(session->magic != L2TP_SESSION_MAGIC))
+ return;
+
while ((skb = skb_dequeue(&session->reorder_q))) {
atomic_long_inc(&session->stats.rx_errors);
kfree_skb(skb);
}
- return 0;
}
/* Internal UDP receive frame. Do the real work of receiving an L2TP data frame
@@ -825,10 +864,11 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
}
/* Point to L2TP header */
- optr = ptr = skb->data;
+ optr = skb->data;
+ ptr = skb->data;
/* Get L2TP header flags */
- hdrflags = ntohs(*(__be16 *) ptr);
+ hdrflags = ntohs(*(__be16 *)ptr);
/* Check protocol version */
version = hdrflags & L2TP_HDR_VER_MASK;
@@ -859,14 +899,14 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
ptr += 2;
/* Extract tunnel and session ID */
- tunnel_id = ntohs(*(__be16 *) ptr);
+ tunnel_id = ntohs(*(__be16 *)ptr);
ptr += 2;
- session_id = ntohs(*(__be16 *) ptr);
+ session_id = ntohs(*(__be16 *)ptr);
ptr += 2;
} else {
ptr += 2; /* skip reserved bits */
tunnel_id = tunnel->tunnel_id;
- session_id = ntohl(*(__be32 *) ptr);
+ session_id = ntohl(*(__be32 *)ptr);
ptr += 4;
}
@@ -910,7 +950,7 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
struct l2tp_tunnel *tunnel;
tunnel = rcu_dereference_sk_user_data(sk);
- if (tunnel == NULL)
+ if (!tunnel)
goto pass_up;
l2tp_dbg(tunnel, L2TP_MSG_DATA, "%s: received %d bytes\n",
@@ -971,13 +1011,13 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
*/
if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
u16 flags = L2TP_HDR_VER_3;
- *((__be16 *) bufp) = htons(flags);
+ *((__be16 *)bufp) = htons(flags);
bufp += 2;
- *((__be16 *) bufp) = 0;
+ *((__be16 *)bufp) = 0;
bufp += 2;
}
- *((__be32 *) bufp) = htonl(session->peer_session_id);
+ *((__be32 *)bufp) = htonl(session->peer_session_id);
bufp += 4;
if (session->cookie_len) {
memcpy(bufp, &session->cookie[0], session->cookie_len);
@@ -1076,7 +1116,10 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
}
/* Setup L2TP header */
- session->build_header(session, __skb_push(skb, hdr_len));
+ if (tunnel->version == L2TP_HDR_VER_2)
+ l2tp_build_l2tpv2_header(session, __skb_push(skb, hdr_len));
+ else
+ l2tp_build_l2tpv3_header(session, __skb_push(skb, hdr_len));
/* Reset skb netfilter state */
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -1121,8 +1164,8 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
&sk->sk_v6_daddr, udp_len);
else
#endif
- udp_set_csum(sk->sk_no_check_tx, skb, inet->inet_saddr,
- inet->inet_daddr, udp_len);
+ udp_set_csum(sk->sk_no_check_tx, skb, inet->inet_saddr,
+ inet->inet_daddr, udp_len);
break;
case L2TP_ENCAPTYPE_IP:
@@ -1149,7 +1192,7 @@ static void l2tp_tunnel_destruct(struct sock *sk)
{
struct l2tp_tunnel *tunnel = l2tp_tunnel(sk);
- if (tunnel == NULL)
+ if (!tunnel)
goto end;
l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: closing...\n", tunnel->name);
@@ -1179,6 +1222,30 @@ end:
return;
}
+/* Remove an l2tp session from l2tp_core's hash lists. */
+static void l2tp_session_unhash(struct l2tp_session *session)
+{
+ struct l2tp_tunnel *tunnel = session->tunnel;
+
+ /* Remove the session from core hashes */
+ if (tunnel) {
+ /* Remove from the per-tunnel hash */
+ write_lock_bh(&tunnel->hlist_lock);
+ hlist_del_init(&session->hlist);
+ write_unlock_bh(&tunnel->hlist_lock);
+
+ /* For L2TPv3 we have a per-net hash: remove from there, too */
+ if (tunnel->version != L2TP_HDR_VER_2) {
+ struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+
+ spin_lock_bh(&pn->l2tp_session_hlist_lock);
+ hlist_del_init_rcu(&session->global_hlist);
+ spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+ synchronize_rcu();
+ }
+ }
+}
+
/* When the tunnel is closed, all the attached sessions need to go too.
*/
static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
@@ -1188,8 +1255,6 @@ static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
struct hlist_node *tmp;
struct l2tp_session *session;
- BUG_ON(tunnel == NULL);
-
l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: closing all sessions...\n",
tunnel->name);
@@ -1210,10 +1275,10 @@ again:
write_unlock_bh(&tunnel->hlist_lock);
- __l2tp_session_unhash(session);
+ l2tp_session_unhash(session);
l2tp_session_queue_purge(session);
- if (session->session_close != NULL)
+ if (session->session_close)
(*session->session_close)(session);
l2tp_session_dec_refcount(session);
@@ -1284,10 +1349,10 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
* exit hook.
*/
static int l2tp_tunnel_sock_create(struct net *net,
- u32 tunnel_id,
- u32 peer_tunnel_id,
- struct l2tp_tunnel_cfg *cfg,
- struct socket **sockp)
+ u32 tunnel_id,
+ u32 peer_tunnel_id,
+ struct l2tp_tunnel_cfg *cfg,
+ struct socket **sockp)
{
int err = -EINVAL;
struct socket *sock = NULL;
@@ -1305,9 +1370,9 @@ static int l2tp_tunnel_sock_create(struct net *net,
memcpy(&udp_conf.peer_ip6, cfg->peer_ip6,
sizeof(udp_conf.peer_ip6));
udp_conf.use_udp6_tx_checksums =
- ! cfg->udp6_zero_tx_checksums;
+ !cfg->udp6_zero_tx_checksums;
udp_conf.use_udp6_rx_checksums =
- ! cfg->udp6_zero_rx_checksums;
+ !cfg->udp6_zero_rx_checksums;
} else
#endif
{
@@ -1332,7 +1397,7 @@ static int l2tp_tunnel_sock_create(struct net *net,
struct sockaddr_l2tpip6 ip6_addr = {0};
err = sock_create_kern(net, AF_INET6, SOCK_DGRAM,
- IPPROTO_L2TP, &sock);
+ IPPROTO_L2TP, &sock);
if (err < 0)
goto out;
@@ -1340,7 +1405,7 @@ static int l2tp_tunnel_sock_create(struct net *net,
memcpy(&ip6_addr.l2tp_addr, cfg->local_ip6,
sizeof(ip6_addr.l2tp_addr));
ip6_addr.l2tp_conn_id = tunnel_id;
- err = kernel_bind(sock, (struct sockaddr *) &ip6_addr,
+ err = kernel_bind(sock, (struct sockaddr *)&ip6_addr,
sizeof(ip6_addr));
if (err < 0)
goto out;
@@ -1350,7 +1415,7 @@ static int l2tp_tunnel_sock_create(struct net *net,
sizeof(ip6_addr.l2tp_addr));
ip6_addr.l2tp_conn_id = peer_tunnel_id;
err = kernel_connect(sock,
- (struct sockaddr *) &ip6_addr,
+ (struct sockaddr *)&ip6_addr,
sizeof(ip6_addr), 0);
if (err < 0)
goto out;
@@ -1360,14 +1425,14 @@ static int l2tp_tunnel_sock_create(struct net *net,
struct sockaddr_l2tpip ip_addr = {0};
err = sock_create_kern(net, AF_INET, SOCK_DGRAM,
- IPPROTO_L2TP, &sock);
+ IPPROTO_L2TP, &sock);
if (err < 0)
goto out;
ip_addr.l2tp_family = AF_INET;
ip_addr.l2tp_addr = cfg->local_ip;
ip_addr.l2tp_conn_id = tunnel_id;
- err = kernel_bind(sock, (struct sockaddr *) &ip_addr,
+ err = kernel_bind(sock, (struct sockaddr *)&ip_addr,
sizeof(ip_addr));
if (err < 0)
goto out;
@@ -1375,7 +1440,7 @@ static int l2tp_tunnel_sock_create(struct net *net,
ip_addr.l2tp_family = AF_INET;
ip_addr.l2tp_addr = cfg->peer_ip;
ip_addr.l2tp_conn_id = peer_tunnel_id;
- err = kernel_connect(sock, (struct sockaddr *) &ip_addr,
+ err = kernel_connect(sock, (struct sockaddr *)&ip_addr,
sizeof(ip_addr), 0);
if (err < 0)
goto out;
@@ -1388,7 +1453,7 @@ static int l2tp_tunnel_sock_create(struct net *net,
out:
*sockp = sock;
- if ((err < 0) && sock) {
+ if (err < 0 && sock) {
kernel_sock_shutdown(sock, SHUT_RDWR);
sock_release(sock);
*sockp = NULL;
@@ -1399,17 +1464,18 @@ out:
static struct lock_class_key l2tp_socket_class;
-int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
+int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id,
+ struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
{
struct l2tp_tunnel *tunnel = NULL;
int err;
enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP;
- if (cfg != NULL)
+ if (cfg)
encap = cfg->encap;
- tunnel = kzalloc(sizeof(struct l2tp_tunnel), GFP_KERNEL);
- if (tunnel == NULL) {
+ tunnel = kzalloc(sizeof(*tunnel), GFP_KERNEL);
+ if (!tunnel) {
err = -ENOMEM;
goto err;
}
@@ -1424,7 +1490,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
rwlock_init(&tunnel->hlist_lock);
tunnel->acpt_newsess = true;
- if (cfg != NULL)
+ if (cfg)
tunnel->debug = cfg->debug;
tunnel->encap = encap;
@@ -1557,67 +1623,17 @@ void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
}
EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
-/* Really kill the session.
- */
-void l2tp_session_free(struct l2tp_session *session)
-{
- struct l2tp_tunnel *tunnel = session->tunnel;
-
- BUG_ON(refcount_read(&session->ref_count) != 0);
-
- if (tunnel) {
- BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
- l2tp_tunnel_dec_refcount(tunnel);
- }
-
- kfree(session);
-}
-EXPORT_SYMBOL_GPL(l2tp_session_free);
-
-/* Remove an l2tp session from l2tp_core's hash lists.
- * Provides a tidyup interface for pseudowire code which can't just route all
- * shutdown via. l2tp_session_delete and a pseudowire-specific session_close
- * callback.
- */
-void __l2tp_session_unhash(struct l2tp_session *session)
-{
- struct l2tp_tunnel *tunnel = session->tunnel;
-
- /* Remove the session from core hashes */
- if (tunnel) {
- /* Remove from the per-tunnel hash */
- write_lock_bh(&tunnel->hlist_lock);
- hlist_del_init(&session->hlist);
- write_unlock_bh(&tunnel->hlist_lock);
-
- /* For L2TPv3 we have a per-net hash: remove from there, too */
- if (tunnel->version != L2TP_HDR_VER_2) {
- struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
- spin_lock_bh(&pn->l2tp_session_hlist_lock);
- hlist_del_init_rcu(&session->global_hlist);
- spin_unlock_bh(&pn->l2tp_session_hlist_lock);
- synchronize_rcu();
- }
- }
-}
-EXPORT_SYMBOL_GPL(__l2tp_session_unhash);
-
-/* This function is used by the netlink SESSION_DELETE command and by
- pseudowire modules.
- */
-int l2tp_session_delete(struct l2tp_session *session)
+void l2tp_session_delete(struct l2tp_session *session)
{
if (test_and_set_bit(0, &session->dead))
- return 0;
+ return;
- __l2tp_session_unhash(session);
+ l2tp_session_unhash(session);
l2tp_session_queue_purge(session);
- if (session->session_close != NULL)
+ if (session->session_close)
(*session->session_close)(session);
l2tp_session_dec_refcount(session);
-
- return 0;
}
EXPORT_SYMBOL_GPL(l2tp_session_delete);
@@ -1636,16 +1652,16 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version)
if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP)
session->hdr_len += 4;
}
-
}
EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
-struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
+struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id,
+ u32 peer_session_id, struct l2tp_session_cfg *cfg)
{
struct l2tp_session *session;
- session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL);
- if (session != NULL) {
+ session = kzalloc(sizeof(*session) + priv_size, GFP_KERNEL);
+ if (session) {
session->magic = L2TP_SESSION_MAGIC;
session->tunnel = tunnel;
@@ -1687,11 +1703,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
memcpy(&session->peer_cookie[0], &cfg->peer_cookie[0], cfg->peer_cookie_len);
}
- if (tunnel->version == L2TP_HDR_VER_2)
- session->build_header = l2tp_build_l2tpv2_header;
- else
- session->build_header = l2tp_build_l2tpv3_header;
-
l2tp_session_set_header_len(session, tunnel->version);
refcount_set(&session->ref_count, 1);
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 10cf7c3dcbb3..3468d6b177a0 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -1,6 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * L2TP internal definitions.
+/* L2TP internal definitions.
*
* Copyright (c) 2008,2009 Katalix Systems Ltd
*/
@@ -16,17 +15,17 @@
#include <net/xfrm.h>
#endif
-/* Just some random numbers */
+/* Random numbers used for internal consistency checks of tunnel and session structures */
#define L2TP_TUNNEL_MAGIC 0x42114DDA
#define L2TP_SESSION_MAGIC 0x0C04EB7D
-/* Per tunnel, session hash table size */
+/* Per tunnel session hash table size */
#define L2TP_HASH_BITS 4
-#define L2TP_HASH_SIZE (1 << L2TP_HASH_BITS)
+#define L2TP_HASH_SIZE BIT(L2TP_HASH_BITS)
-/* System-wide, session hash table size */
+/* System-wide session hash table size */
#define L2TP_HASH_BITS_2 8
-#define L2TP_HASH_SIZE_2 (1 << L2TP_HASH_BITS_2)
+#define L2TP_HASH_SIZE_2 BIT(L2TP_HASH_BITS_2)
struct sk_buff;
@@ -44,37 +43,34 @@ struct l2tp_stats {
struct l2tp_tunnel;
-/* Describes a session. Contains information to determine incoming
- * packets and transmit outgoing ones.
- */
+/* L2TP session configuration */
struct l2tp_session_cfg {
enum l2tp_pwtype pw_type;
- unsigned int recv_seq:1; /* expect receive packets with
- * sequence numbers? */
- unsigned int send_seq:1; /* send packets with sequence
- * numbers? */
- unsigned int lns_mode:1; /* behave as LNS? LAC enables
- * sequence numbers under
- * control of LNS. */
- int debug; /* bitmask of debug message
- * categories */
+ unsigned int recv_seq:1; /* expect receive packets with sequence numbers? */
+ unsigned int send_seq:1; /* send packets with sequence numbers? */
+ unsigned int lns_mode:1; /* behave as LNS?
+ * LAC enables sequence numbers under LNS control.
+ */
+ int debug; /* bitmask of debug message categories */
u16 l2specific_type; /* Layer 2 specific type */
u8 cookie[8]; /* optional cookie */
int cookie_len; /* 0, 4 or 8 bytes */
u8 peer_cookie[8]; /* peer's cookie */
int peer_cookie_len; /* 0, 4 or 8 bytes */
- int reorder_timeout; /* configured reorder timeout
- * (in jiffies) */
+ int reorder_timeout; /* configured reorder timeout (in jiffies) */
char *ifname;
};
+/* Represents a session (pseudowire) instance.
+ * Tracks runtime state including cookies, dataplane packet sequencing, and IO statistics.
+ * Is linked into a per-tunnel session hashlist; and in the case of an L2TPv3 session into
+ * an additional per-net ("global") hashlist.
+ */
struct l2tp_session {
- int magic; /* should be
- * L2TP_SESSION_MAGIC */
+ int magic; /* should be L2TP_SESSION_MAGIC */
long dead;
- struct l2tp_tunnel *tunnel; /* back pointer to tunnel
- * context */
+ struct l2tp_tunnel *tunnel; /* back pointer to tunnel context */
u32 session_id;
u32 peer_session_id;
u8 cookie[8];
@@ -89,42 +85,53 @@ struct l2tp_session {
u32 nr_max; /* max NR. Depends on tunnel */
u32 nr_window_size; /* NR window size */
u32 nr_oos; /* NR of last OOS packet */
- int nr_oos_count; /* For OOS recovery */
+ int nr_oos_count; /* for OOS recovery */
int nr_oos_count_max;
- struct hlist_node hlist; /* Hash list node */
+ struct hlist_node hlist; /* hash list node */
refcount_t ref_count;
char name[32]; /* for logging */
char ifname[IFNAMSIZ];
- unsigned int recv_seq:1; /* expect receive packets with
- * sequence numbers? */
- unsigned int send_seq:1; /* send packets with sequence
- * numbers? */
- unsigned int lns_mode:1; /* behave as LNS? LAC enables
- * sequence numbers under
- * control of LNS. */
- int debug; /* bitmask of debug message
- * categories */
- int reorder_timeout; /* configured reorder timeout
- * (in jiffies) */
+ unsigned int recv_seq:1; /* expect receive packets with sequence numbers? */
+ unsigned int send_seq:1; /* send packets with sequence numbers? */
+ unsigned int lns_mode:1; /* behave as LNS?
+ * LAC enables sequence numbers under LNS control.
+ */
+ int debug; /* bitmask of debug message categories */
+ int reorder_timeout; /* configured reorder timeout (in jiffies) */
int reorder_skip; /* set if skip to next nr */
enum l2tp_pwtype pwtype;
struct l2tp_stats stats;
- struct hlist_node global_hlist; /* Global hash list node */
+ struct hlist_node global_hlist; /* global hash list node */
- int (*build_header)(struct l2tp_session *session, void *buf);
+ /* Session receive handler for data packets.
+ * Each pseudowire implementation should implement this callback in order to
+ * handle incoming packets. Packets are passed to the pseudowire handler after
+ * reordering, if data sequence numbers are enabled for the session.
+ */
void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len);
+
+ /* Session close handler.
+ * Each pseudowire implementation may implement this callback in order to carry
+ * out pseudowire-specific shutdown actions.
+ * The callback is called by core after unhashing the session and purging its
+ * reorder queue.
+ */
void (*session_close)(struct l2tp_session *session);
+
+ /* Session show handler.
+ * Pseudowire-specific implementation of debugfs session rendering.
+ * The callback is called by l2tp_debugfs.c after rendering core session
+ * information.
+ */
void (*show)(struct seq_file *m, void *priv);
- u8 priv[]; /* private data */
+
+ u8 priv[]; /* private data */
};
-/* Describes the tunnel. It contains info to track all the associated
- * sessions so incoming packets can be sorted out
- */
+/* L2TP tunnel configuration */
struct l2tp_tunnel_cfg {
- int debug; /* bitmask of debug message
- * categories */
+ int debug; /* bitmask of debug message categories */
enum l2tp_encap_type encap;
/* Used only for kernel-created sockets */
@@ -141,6 +148,12 @@ struct l2tp_tunnel_cfg {
udp6_zero_rx_checksums:1;
};
+/* Represents a tunnel instance.
+ * Tracks runtime state including IO statistics.
+ * Holds the tunnel socket (either passed from userspace or directly created by the kernel).
+ * Maintains a hashlist of sessions belonging to the tunnel instance.
+ * Is linked into a per-net list of tunnels.
+ */
struct l2tp_tunnel {
int magic; /* Should be L2TP_TUNNEL_MAGIC */
@@ -148,40 +161,51 @@ struct l2tp_tunnel {
struct rcu_head rcu;
rwlock_t hlist_lock; /* protect session_hlist */
- bool acpt_newsess; /* Indicates whether this
- * tunnel accepts new sessions.
- * Protected by hlist_lock.
+ bool acpt_newsess; /* indicates whether this tunnel accepts
+ * new sessions. Protected by hlist_lock.
*/
struct hlist_head session_hlist[L2TP_HASH_SIZE];
- /* hashed list of sessions,
- * hashed by id */
+ /* hashed list of sessions, hashed by id */
u32 tunnel_id;
u32 peer_tunnel_id;
int version; /* 2=>L2TPv2, 3=>L2TPv3 */
char name[20]; /* for logging */
- int debug; /* bitmask of debug message
- * categories */
+ int debug; /* bitmask of debug message categories */
enum l2tp_encap_type encap;
struct l2tp_stats stats;
- struct list_head list; /* Keep a list of all tunnels */
+ struct list_head list; /* list node on per-namespace list of tunnels */
struct net *l2tp_net; /* the net we belong to */
refcount_t ref_count;
- void (*old_sk_destruct)(struct sock *);
- struct sock *sock; /* Parent socket */
- int fd; /* Parent fd, if tunnel socket
- * was created by userspace */
+ void (*old_sk_destruct)(struct sock *sk);
+ struct sock *sock; /* parent socket */
+ int fd; /* parent fd, if tunnel socket was created
+ * by userspace
+ */
struct work_struct del_work;
};
+/* Pseudowire ops callbacks for use with the l2tp genetlink interface */
struct l2tp_nl_cmd_ops {
+ /* The pseudowire session create callback is responsible for creating a session
+ * instance for a specific pseudowire type.
+ * It must call l2tp_session_create and l2tp_session_register to register the
+ * session instance, as well as carry out any pseudowire-specific initialisation.
+ * It must return >= 0 on success, or an appropriate negative errno value on failure.
+ */
int (*session_create)(struct net *net, struct l2tp_tunnel *tunnel,
u32 session_id, u32 peer_session_id,
struct l2tp_session_cfg *cfg);
- int (*session_delete)(struct l2tp_session *session);
+
+ /* The pseudowire session delete callback is responsible for initiating the deletion
+ * of a session instance.
+ * It must call l2tp_session_delete, as well as carry out any pseudowire-specific
+ * teardown actions.
+ */
+ void (*session_delete)(struct l2tp_session *session);
};
static inline void *l2tp_session_priv(struct l2tp_session *session)
@@ -189,73 +213,68 @@ static inline void *l2tp_session_priv(struct l2tp_session *session)
return &session->priv[0];
}
+/* Tunnel and session refcounts */
+void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel);
+void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel);
+void l2tp_session_inc_refcount(struct l2tp_session *session);
+void l2tp_session_dec_refcount(struct l2tp_session *session);
+
+/* Tunnel and session lookup.
+ * These functions take a reference on the instances they return, so
+ * the caller must ensure that the reference is dropped appropriately.
+ */
struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id);
struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth);
struct l2tp_session *l2tp_tunnel_get_session(struct l2tp_tunnel *tunnel,
u32 session_id);
-void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
-
struct l2tp_session *l2tp_session_get(const struct net *net, u32 session_id);
struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth);
struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
const char *ifname);
+/* Tunnel and session lifetime management.
+ * Creation of a new instance is a two-step process: create, then register.
+ * Destruction is triggered using the *_delete functions, and completes asynchronously.
+ */
int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id,
u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg,
struct l2tp_tunnel **tunnelp);
int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
struct l2tp_tunnel_cfg *cfg);
-
void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
+
struct l2tp_session *l2tp_session_create(int priv_size,
struct l2tp_tunnel *tunnel,
u32 session_id, u32 peer_session_id,
struct l2tp_session_cfg *cfg);
int l2tp_session_register(struct l2tp_session *session,
struct l2tp_tunnel *tunnel);
+void l2tp_session_delete(struct l2tp_session *session);
-void __l2tp_session_unhash(struct l2tp_session *session);
-int l2tp_session_delete(struct l2tp_session *session);
-void l2tp_session_free(struct l2tp_session *session);
+/* Receive path helpers. If data sequencing is enabled for the session these
+ * functions handle queuing and reordering prior to passing packets to the
+ * pseudowire code to be passed to userspace.
+ */
void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
unsigned char *ptr, unsigned char *optr, u16 hdrflags,
int length);
int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
-void l2tp_session_set_header_len(struct l2tp_session *session, int version);
+/* Transmit path helpers for sending packets over the tunnel socket. */
+void l2tp_session_set_header_len(struct l2tp_session *session, int version);
int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb,
int hdr_len);
-int l2tp_nl_register_ops(enum l2tp_pwtype pw_type,
- const struct l2tp_nl_cmd_ops *ops);
-void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
-int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg);
-
-static inline void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel)
-{
- refcount_inc(&tunnel->ref_count);
-}
-
-static inline void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel)
-{
- if (refcount_dec_and_test(&tunnel->ref_count))
- l2tp_tunnel_free(tunnel);
-}
-
-/* Session reference counts. Incremented when code obtains a reference
- * to a session.
+/* Pseudowire management.
+ * Pseudowires should register with l2tp core on module init, and unregister
+ * on module exit.
*/
-static inline void l2tp_session_inc_refcount(struct l2tp_session *session)
-{
- refcount_inc(&session->ref_count);
-}
+int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops);
+void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
-static inline void l2tp_session_dec_refcount(struct l2tp_session *session)
-{
- if (refcount_dec_and_test(&session->ref_count))
- l2tp_session_free(session);
-}
+/* IOCTL helper for IP encap modules. */
+int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg);
static inline int l2tp_get_l2specific_len(struct l2tp_session *session)
{
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 35bb4f3bdbe0..96cb9601c21b 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * L2TP subsystem debugfs
+/* L2TP subsystem debugfs
*
* Copyright (c) 2010 Katalix Systems Ltd
*/
@@ -59,11 +58,10 @@ static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd)
pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
pd->session_idx++;
- if (pd->session == NULL) {
+ if (!pd->session) {
pd->session_idx = 0;
l2tp_dfs_next_tunnel(pd);
}
-
}
static void *l2tp_dfs_seq_start(struct seq_file *m, loff_t *offs)
@@ -74,23 +72,25 @@ static void *l2tp_dfs_seq_start(struct seq_file *m, loff_t *offs)
if (!pos)
goto out;
- BUG_ON(m->private == NULL);
+ if (WARN_ON(!m->private)) {
+ pd = NULL;
+ goto out;
+ }
pd = m->private;
- if (pd->tunnel == NULL)
+ if (!pd->tunnel)
l2tp_dfs_next_tunnel(pd);
else
l2tp_dfs_next_session(pd);
/* NULL tunnel and session indicates end of list */
- if ((pd->tunnel == NULL) && (pd->session == NULL))
+ if (!pd->tunnel && !pd->session)
pd = NULL;
out:
return pd;
}
-
static void *l2tp_dfs_seq_next(struct seq_file *m, void *v, loff_t *pos)
{
(*pos)++;
@@ -148,11 +148,13 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
const struct ipv6_pinfo *np = inet6_sk(tunnel->sock);
seq_printf(m, " from %pI6c to %pI6c\n",
- &np->saddr, &tunnel->sock->sk_v6_daddr);
- } else
+ &np->saddr, &tunnel->sock->sk_v6_daddr);
+ }
#endif
- seq_printf(m, " from %pI4 to %pI4\n",
- &inet->inet_saddr, &inet->inet_daddr);
+ if (tunnel->sock->sk_family == AF_INET)
+ seq_printf(m, " from %pI4 to %pI4\n",
+ &inet->inet_saddr, &inet->inet_daddr);
+
if (tunnel->encap == L2TP_ENCAPTYPE_UDP)
seq_printf(m, " source port %hu, dest port %hu\n",
ntohs(inet->inet_sport), ntohs(inet->inet_dport));
@@ -202,7 +204,7 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
seq_printf(m, "%02x%02x%02x%02x",
session->cookie[4], session->cookie[5],
session->cookie[6], session->cookie[7]);
- seq_printf(m, "\n");
+ seq_puts(m, "\n");
}
if (session->peer_cookie_len) {
seq_printf(m, " peer cookie %02x%02x%02x%02x",
@@ -212,7 +214,7 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
seq_printf(m, "%02x%02x%02x%02x",
session->peer_cookie[4], session->peer_cookie[5],
session->peer_cookie[6], session->peer_cookie[7]);
- seq_printf(m, "\n");
+ seq_puts(m, "\n");
}
seq_printf(m, " %hu/%hu tx %ld/%ld/%ld rx %ld/%ld/%ld\n",
@@ -224,7 +226,7 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
atomic_long_read(&session->stats.rx_bytes),
atomic_long_read(&session->stats.rx_errors));
- if (session->show != NULL)
+ if (session->show)
session->show(m, session);
}
@@ -271,7 +273,7 @@ static int l2tp_dfs_seq_open(struct inode *inode, struct file *file)
int rc = -ENOMEM;
pd = kzalloc(sizeof(*pd), GFP_KERNEL);
- if (pd == NULL)
+ if (!pd)
goto out;
/* Derive the network namespace from the pid opening the
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index fd5ac2788e45..7ed2b4eced94 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * L2TPv3 ethernet pseudowire driver
+/* L2TPv3 ethernet pseudowire driver
*
* Copyright (c) 2008,2009,2010 Katalix Systems Ltd
*/
@@ -51,7 +50,6 @@ struct l2tp_eth_sess {
struct net_device __rcu *dev;
};
-
static int l2tp_eth_dev_init(struct net_device *dev)
{
eth_hw_addr_random(dev);
@@ -73,7 +71,7 @@ static void l2tp_eth_dev_uninit(struct net_device *dev)
*/
}
-static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct l2tp_eth *priv = netdev_priv(dev);
struct l2tp_session *session = priv->session;
@@ -94,13 +92,12 @@ static void l2tp_eth_get_stats64(struct net_device *dev,
{
struct l2tp_eth *priv = netdev_priv(dev);
- stats->tx_bytes = (unsigned long) atomic_long_read(&priv->tx_bytes);
- stats->tx_packets = (unsigned long) atomic_long_read(&priv->tx_packets);
- stats->tx_dropped = (unsigned long) atomic_long_read(&priv->tx_dropped);
- stats->rx_bytes = (unsigned long) atomic_long_read(&priv->rx_bytes);
- stats->rx_packets = (unsigned long) atomic_long_read(&priv->rx_packets);
- stats->rx_errors = (unsigned long) atomic_long_read(&priv->rx_errors);
-
+ stats->tx_bytes = (unsigned long)atomic_long_read(&priv->tx_bytes);
+ stats->tx_packets = (unsigned long)atomic_long_read(&priv->tx_packets);
+ stats->tx_dropped = (unsigned long)atomic_long_read(&priv->tx_dropped);
+ stats->rx_bytes = (unsigned long)atomic_long_read(&priv->rx_bytes);
+ stats->rx_packets = (unsigned long)atomic_long_read(&priv->rx_packets);
+ stats->rx_errors = (unsigned long)atomic_long_read(&priv->rx_errors);
}
static const struct net_device_ops l2tp_eth_netdev_ops = {
@@ -348,13 +345,11 @@ err:
return rc;
}
-
static const struct l2tp_nl_cmd_ops l2tp_eth_nl_cmd_ops = {
.session_create = l2tp_eth_create,
.session_delete = l2tp_session_delete,
};
-
static int __init l2tp_eth_init(void)
{
int err = 0;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 955662a6dee7..df2a35b5714a 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * L2TPv3 IP encapsulation support
+/* L2TPv3 IP encapsulation support
*
* Copyright (c) 2008,2009,2010 Katalix Systems Ltd
*/
@@ -125,8 +124,9 @@ static int l2tp_ip_recv(struct sk_buff *skb)
goto discard;
/* Point to L2TP header */
- optr = ptr = skb->data;
- session_id = ntohl(*((__be32 *) ptr));
+ optr = skb->data;
+ ptr = skb->data;
+ session_id = ntohl(*((__be32 *)ptr));
ptr += 4;
/* RFC3931: L2TP/IP packets have the first 4 bytes containing
@@ -154,7 +154,8 @@ static int l2tp_ip_recv(struct sk_buff *skb)
goto discard_sess;
/* Point to L2TP header */
- optr = ptr = skb->data;
+ optr = skb->data;
+ ptr = skb->data;
ptr += 4;
pr_debug("%s: ip recv\n", tunnel->name);
print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
@@ -176,7 +177,7 @@ pass_up:
if ((skb->data[0] & 0xc0) != 0xc0)
goto discard;
- tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
+ tunnel_id = ntohl(*(__be32 *)&skb->data[4]);
iph = (struct iphdr *)skb_network_header(skb);
read_lock_bh(&l2tp_ip_lock);
@@ -260,7 +261,7 @@ static void l2tp_ip_destroy_sock(struct sock *sk)
static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct inet_sock *inet = inet_sk(sk);
- struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *) uaddr;
+ struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *)uaddr;
struct net *net = sock_net(sk);
int ret;
int chk_addr_ret;
@@ -285,8 +286,10 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
goto out;
- if (addr->l2tp_addr.s_addr)
- inet->inet_rcv_saddr = inet->inet_saddr = addr->l2tp_addr.s_addr;
+ if (addr->l2tp_addr.s_addr) {
+ inet->inet_rcv_saddr = addr->l2tp_addr.s_addr;
+ inet->inet_saddr = addr->l2tp_addr.s_addr;
+ }
if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
inet->inet_saddr = 0; /* Use device */
@@ -316,7 +319,7 @@ out:
static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
- struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *) uaddr;
+ struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *)uaddr;
int rc;
if (addr_len < sizeof(*lsa))
@@ -375,6 +378,7 @@ static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
lsa->l2tp_addr.s_addr = inet->inet_daddr;
} else {
__be32 addr = inet->inet_rcv_saddr;
+
if (!addr)
addr = inet->inet_saddr;
lsa->l2tp_conn_id = lsk->conn_id;
@@ -422,6 +426,7 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
/* Get and verify the address. */
if (msg->msg_name) {
DECLARE_SOCKADDR(struct sockaddr_l2tpip *, lip, msg->msg_name);
+
rc = -EINVAL;
if (msg->msg_namelen < sizeof(*lip))
goto out;
@@ -456,7 +461,7 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
skb_reset_transport_header(skb);
/* Insert 0 session_id */
- *((__be32 *) skb_put(skb, 4)) = 0;
+ *((__be32 *)skb_put(skb, 4)) = 0;
/* Copy user data into skb */
rc = memcpy_from_msg(skb_put(skb, len), msg, len);
@@ -467,10 +472,10 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl4 = &inet->cork.fl.u.ip4;
if (connected)
- rt = (struct rtable *) __sk_dst_check(sk, 0);
+ rt = (struct rtable *)__sk_dst_check(sk, 0);
rcu_read_lock();
- if (rt == NULL) {
+ if (!rt) {
const struct ip_options_rcu *inet_opt;
inet_opt = rcu_dereference(inet->inet_opt);
@@ -592,7 +597,7 @@ int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg)
return put_user(amount, (int __user *)arg);
}
-EXPORT_SYMBOL(l2tp_ioctl);
+EXPORT_SYMBOL_GPL(l2tp_ioctl);
static struct proto l2tp_ip_prot = {
.name = "L2TP/IP",
@@ -612,10 +617,6 @@ static struct proto l2tp_ip_prot = {
.hash = l2tp_ip_hash,
.unhash = l2tp_ip_unhash,
.obj_size = sizeof(struct l2tp_ip_sock),
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_ip_setsockopt,
- .compat_getsockopt = compat_ip_getsockopt,
-#endif
};
static const struct proto_ops l2tp_ip_ops = {
@@ -638,10 +639,6 @@ static const struct proto_ops l2tp_ip_ops = {
.recvmsg = sock_common_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_sock_common_setsockopt,
- .compat_getsockopt = compat_sock_common_getsockopt,
-#endif
};
static struct inet_protosw l2tp_ip_protosw = {
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 526ed2c24dd5..bc757bc7e264 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * L2TPv3 IP encapsulation support for IPv6
+/* L2TPv3 IP encapsulation support for IPv6
*
* Copyright (c) 2012 Katalix Systems Ltd
*/
@@ -38,7 +37,8 @@ struct l2tp_ip6_sock {
u32 peer_conn_id;
/* ipv6_pinfo has to be the last member of l2tp_ip6_sock, see
- inet6_sk_generic */
+ * inet6_sk_generic
+ */
struct ipv6_pinfo inet6;
};
@@ -137,8 +137,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
goto discard;
/* Point to L2TP header */
- optr = ptr = skb->data;
- session_id = ntohl(*((__be32 *) ptr));
+ optr = skb->data;
+ ptr = skb->data;
+ session_id = ntohl(*((__be32 *)ptr));
ptr += 4;
/* RFC3931: L2TP/IP packets have the first 4 bytes containing
@@ -166,7 +167,8 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
goto discard_sess;
/* Point to L2TP header */
- optr = ptr = skb->data;
+ optr = skb->data;
+ ptr = skb->data;
ptr += 4;
pr_debug("%s: ip recv\n", tunnel->name);
print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
@@ -188,7 +190,7 @@ pass_up:
if ((skb->data[0] & 0xc0) != 0xc0)
goto discard;
- tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
+ tunnel_id = ntohl(*(__be32 *)&skb->data[4]);
iph = ipv6_hdr(skb);
read_lock_bh(&l2tp_ip6_lock);
@@ -276,7 +278,7 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
- struct sockaddr_l2tpip6 *addr = (struct sockaddr_l2tpip6 *) uaddr;
+ struct sockaddr_l2tpip6 *addr = (struct sockaddr_l2tpip6 *)uaddr;
struct net *net = sock_net(sk);
__be32 v4addr = 0;
int bound_dev_if;
@@ -375,8 +377,8 @@ out_unlock:
static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
- struct sockaddr_l2tpip6 *lsa = (struct sockaddr_l2tpip6 *) uaddr;
- struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
+ struct sockaddr_l2tpip6 *lsa = (struct sockaddr_l2tpip6 *)uaddr;
+ struct sockaddr_in6 *usin = (struct sockaddr_in6 *)uaddr;
struct in6_addr *daddr;
int addr_type;
int rc;
@@ -486,7 +488,7 @@ static int l2tp_ip6_push_pending_frames(struct sock *sk)
int err = 0;
skb = skb_peek(&sk->sk_write_queue);
- if (skb == NULL)
+ if (!skb)
goto out;
transhdr = (__be32 *)skb_transport_header(skb);
@@ -519,7 +521,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int err;
/* Rough check on arithmetic overflow,
- better check is made in ip6_append_data().
+ * better check is made in ip6_append_data().
*/
if (len > INT_MAX)
return -EMSGSIZE;
@@ -528,9 +530,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_flags & MSG_OOB)
return -EOPNOTSUPP;
- /*
- * Get and verify the address.
- */
+ /* Get and verify the address */
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_mark = sk->sk_mark;
@@ -548,15 +548,14 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
daddr = &lsa->l2tp_addr;
if (np->sndflow) {
fl6.flowlabel = lsa->l2tp_flowinfo & IPV6_FLOWINFO_MASK;
- if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
+ if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) {
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (IS_ERR(flowlabel))
return -EINVAL;
}
}
- /*
- * Otherwise it will be difficult to maintain
+ /* Otherwise it will be difficult to maintain
* sk->sk_dst_cache.
*/
if (sk->sk_state == TCP_ESTABLISHED &&
@@ -594,7 +593,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (IS_ERR(flowlabel))
return -EINVAL;
}
- if (!(opt->opt_nflen|opt->opt_flen))
+ if (!(opt->opt_nflen | opt->opt_flen))
opt = NULL;
}
@@ -745,10 +744,6 @@ static struct proto l2tp_ip6_prot = {
.hash = l2tp_ip6_hash,
.unhash = l2tp_ip6_unhash,
.obj_size = sizeof(struct l2tp_ip6_sock),
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_ipv6_setsockopt,
- .compat_getsockopt = compat_ipv6_getsockopt,
-#endif
};
static const struct proto_ops l2tp_ip6_ops = {
@@ -773,8 +768,6 @@ static const struct proto_ops l2tp_ip6_ops = {
.sendpage = sock_no_sendpage,
#ifdef CONFIG_COMPAT
.compat_ioctl = inet6_compat_ioctl,
- .compat_setsockopt = compat_sock_common_setsockopt,
- .compat_getsockopt = compat_sock_common_getsockopt,
#endif
};
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index ebb381c3f1b9..def78eebca4c 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/*
- * L2TP netlink layer, for management
+/* L2TP netlink layer, for management
*
* Copyright (c) 2008,2009,2010 Katalix Systems Ltd
*
@@ -27,7 +26,6 @@
#include "l2tp_core.h"
-
static struct genl_family l2tp_nl_family;
static const struct genl_multicast_group l2tp_multicast_group[] = {
@@ -157,81 +155,82 @@ static int l2tp_session_notify(struct genl_family *family,
return ret;
}
+static int l2tp_nl_cmd_tunnel_create_get_addr(struct nlattr **attrs, struct l2tp_tunnel_cfg *cfg)
+{
+ if (attrs[L2TP_ATTR_UDP_SPORT])
+ cfg->local_udp_port = nla_get_u16(attrs[L2TP_ATTR_UDP_SPORT]);
+ if (attrs[L2TP_ATTR_UDP_DPORT])
+ cfg->peer_udp_port = nla_get_u16(attrs[L2TP_ATTR_UDP_DPORT]);
+ cfg->use_udp_checksums = nla_get_flag(attrs[L2TP_ATTR_UDP_CSUM]);
+
+ /* Must have either AF_INET or AF_INET6 address for source and destination */
+#if IS_ENABLED(CONFIG_IPV6)
+ if (attrs[L2TP_ATTR_IP6_SADDR] && attrs[L2TP_ATTR_IP6_DADDR]) {
+ cfg->local_ip6 = nla_data(attrs[L2TP_ATTR_IP6_SADDR]);
+ cfg->peer_ip6 = nla_data(attrs[L2TP_ATTR_IP6_DADDR]);
+ cfg->udp6_zero_tx_checksums = nla_get_flag(attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]);
+ cfg->udp6_zero_rx_checksums = nla_get_flag(attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]);
+ return 0;
+ }
+#endif
+ if (attrs[L2TP_ATTR_IP_SADDR] && attrs[L2TP_ATTR_IP_DADDR]) {
+ cfg->local_ip.s_addr = nla_get_in_addr(attrs[L2TP_ATTR_IP_SADDR]);
+ cfg->peer_ip.s_addr = nla_get_in_addr(attrs[L2TP_ATTR_IP_DADDR]);
+ return 0;
+ }
+ return -EINVAL;
+}
+
static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info)
{
u32 tunnel_id;
u32 peer_tunnel_id;
int proto_version;
- int fd;
+ int fd = -1;
int ret = 0;
struct l2tp_tunnel_cfg cfg = { 0, };
struct l2tp_tunnel *tunnel;
struct net *net = genl_info_net(info);
+ struct nlattr **attrs = info->attrs;
- if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+ if (!attrs[L2TP_ATTR_CONN_ID]) {
ret = -EINVAL;
goto out;
}
- tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+ tunnel_id = nla_get_u32(attrs[L2TP_ATTR_CONN_ID]);
- if (!info->attrs[L2TP_ATTR_PEER_CONN_ID]) {
+ if (!attrs[L2TP_ATTR_PEER_CONN_ID]) {
ret = -EINVAL;
goto out;
}
- peer_tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_PEER_CONN_ID]);
+ peer_tunnel_id = nla_get_u32(attrs[L2TP_ATTR_PEER_CONN_ID]);
- if (!info->attrs[L2TP_ATTR_PROTO_VERSION]) {
+ if (!attrs[L2TP_ATTR_PROTO_VERSION]) {
ret = -EINVAL;
goto out;
}
- proto_version = nla_get_u8(info->attrs[L2TP_ATTR_PROTO_VERSION]);
+ proto_version = nla_get_u8(attrs[L2TP_ATTR_PROTO_VERSION]);
- if (!info->attrs[L2TP_ATTR_ENCAP_TYPE]) {
+ if (!attrs[L2TP_ATTR_ENCAP_TYPE]) {
ret = -EINVAL;
goto out;
}
- cfg.encap = nla_get_u16(info->attrs[L2TP_ATTR_ENCAP_TYPE]);
-
- fd = -1;
- if (info->attrs[L2TP_ATTR_FD]) {
- fd = nla_get_u32(info->attrs[L2TP_ATTR_FD]);
+ cfg.encap = nla_get_u16(attrs[L2TP_ATTR_ENCAP_TYPE]);
+
+ /* Managed tunnels take the tunnel socket from userspace.
+ * Unmanaged tunnels must call out the source and destination addresses
+ * for the kernel to create the tunnel socket itself.
+ */
+ if (attrs[L2TP_ATTR_FD]) {
+ fd = nla_get_u32(attrs[L2TP_ATTR_FD]);
} else {
-#if IS_ENABLED(CONFIG_IPV6)
- if (info->attrs[L2TP_ATTR_IP6_SADDR] &&
- info->attrs[L2TP_ATTR_IP6_DADDR]) {
- cfg.local_ip6 = nla_data(
- info->attrs[L2TP_ATTR_IP6_SADDR]);
- cfg.peer_ip6 = nla_data(
- info->attrs[L2TP_ATTR_IP6_DADDR]);
- } else
-#endif
- if (info->attrs[L2TP_ATTR_IP_SADDR] &&
- info->attrs[L2TP_ATTR_IP_DADDR]) {
- cfg.local_ip.s_addr = nla_get_in_addr(
- info->attrs[L2TP_ATTR_IP_SADDR]);
- cfg.peer_ip.s_addr = nla_get_in_addr(
- info->attrs[L2TP_ATTR_IP_DADDR]);
- } else {
- ret = -EINVAL;
+ ret = l2tp_nl_cmd_tunnel_create_get_addr(attrs, &cfg);
+ if (ret < 0)
goto out;
- }
- if (info->attrs[L2TP_ATTR_UDP_SPORT])
- cfg.local_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_SPORT]);
- if (info->attrs[L2TP_ATTR_UDP_DPORT])
- cfg.peer_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_DPORT]);
- cfg.use_udp_checksums = nla_get_flag(
- info->attrs[L2TP_ATTR_UDP_CSUM]);
-
-#if IS_ENABLED(CONFIG_IPV6)
- cfg.udp6_zero_tx_checksums = nla_get_flag(
- info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]);
- cfg.udp6_zero_rx_checksums = nla_get_flag(
- info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]);
-#endif
}
- if (info->attrs[L2TP_ATTR_DEBUG])
- cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
+ if (attrs[L2TP_ATTR_DEBUG])
+ cfg.debug = nla_get_u32(attrs[L2TP_ATTR_DEBUG]);
ret = -EINVAL;
switch (cfg.encap) {
@@ -320,16 +319,79 @@ out:
return ret;
}
+#if IS_ENABLED(CONFIG_IPV6)
+static int l2tp_nl_tunnel_send_addr6(struct sk_buff *skb, struct sock *sk,
+ enum l2tp_encap_type encap)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ switch (encap) {
+ case L2TP_ENCAPTYPE_UDP:
+ if (udp_get_no_check6_tx(sk) &&
+ nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_TX))
+ return -1;
+ if (udp_get_no_check6_rx(sk) &&
+ nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_RX))
+ return -1;
+ if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) ||
+ nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)))
+ return -1;
+ fallthrough;
+ case L2TP_ENCAPTYPE_IP:
+ if (nla_put_in6_addr(skb, L2TP_ATTR_IP6_SADDR, &np->saddr) ||
+ nla_put_in6_addr(skb, L2TP_ATTR_IP6_DADDR, &sk->sk_v6_daddr))
+ return -1;
+ break;
+ }
+ return 0;
+}
+#endif
+
+static int l2tp_nl_tunnel_send_addr4(struct sk_buff *skb, struct sock *sk,
+ enum l2tp_encap_type encap)
+{
+ struct inet_sock *inet = inet_sk(sk);
+
+ switch (encap) {
+ case L2TP_ENCAPTYPE_UDP:
+ if (nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx) ||
+ nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) ||
+ nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)))
+ return -1;
+ fallthrough;
+ case L2TP_ENCAPTYPE_IP:
+ if (nla_put_in_addr(skb, L2TP_ATTR_IP_SADDR, inet->inet_saddr) ||
+ nla_put_in_addr(skb, L2TP_ATTR_IP_DADDR, inet->inet_daddr))
+ return -1;
+ break;
+ }
+
+ return 0;
+}
+
+/* Append attributes for the tunnel address, handling the different attribute types
+ * used for different tunnel encapsulation and AF_INET v.s. AF_INET6.
+ */
+static int l2tp_nl_tunnel_send_addr(struct sk_buff *skb, struct l2tp_tunnel *tunnel)
+{
+ struct sock *sk = tunnel->sock;
+
+ if (!sk)
+ return 0;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ return l2tp_nl_tunnel_send_addr6(skb, sk, tunnel->encap);
+#endif
+ return l2tp_nl_tunnel_send_addr4(skb, sk, tunnel->encap);
+}
+
static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int flags,
struct l2tp_tunnel *tunnel, u8 cmd)
{
void *hdr;
struct nlattr *nest;
- struct sock *sk = NULL;
- struct inet_sock *inet;
-#if IS_ENABLED(CONFIG_IPV6)
- struct ipv6_pinfo *np = NULL;
-#endif
hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, cmd);
if (!hdr)
@@ -343,7 +405,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
goto nla_put_failure;
nest = nla_nest_start_noflag(skb, L2TP_ATTR_STATS);
- if (nest == NULL)
+ if (!nest)
goto nla_put_failure;
if (nla_put_u64_64bit(skb, L2TP_ATTR_TX_PACKETS,
@@ -373,58 +435,9 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
goto nla_put_failure;
nla_nest_end(skb, nest);
- sk = tunnel->sock;
- if (!sk)
- goto out;
-
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- np = inet6_sk(sk);
-#endif
-
- inet = inet_sk(sk);
-
- switch (tunnel->encap) {
- case L2TP_ENCAPTYPE_UDP:
- switch (sk->sk_family) {
- case AF_INET:
- if (nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx))
- goto nla_put_failure;
- break;
-#if IS_ENABLED(CONFIG_IPV6)
- case AF_INET6:
- if (udp_get_no_check6_tx(sk) &&
- nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_TX))
- goto nla_put_failure;
- if (udp_get_no_check6_rx(sk) &&
- nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_RX))
- goto nla_put_failure;
- break;
-#endif
- }
- if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) ||
- nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)))
- goto nla_put_failure;
- /* fall through */
- case L2TP_ENCAPTYPE_IP:
-#if IS_ENABLED(CONFIG_IPV6)
- if (np) {
- if (nla_put_in6_addr(skb, L2TP_ATTR_IP6_SADDR,
- &np->saddr) ||
- nla_put_in6_addr(skb, L2TP_ATTR_IP6_DADDR,
- &sk->sk_v6_daddr))
- goto nla_put_failure;
- } else
-#endif
- if (nla_put_in_addr(skb, L2TP_ATTR_IP_SADDR,
- inet->inet_saddr) ||
- nla_put_in_addr(skb, L2TP_ATTR_IP_DADDR,
- inet->inet_daddr))
- goto nla_put_failure;
- break;
- }
+ if (l2tp_nl_tunnel_send_addr(skb, tunnel))
+ goto nla_put_failure;
-out:
genlmsg_end(skb, hdr);
return 0;
@@ -485,7 +498,7 @@ static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback
for (;;) {
tunnel = l2tp_tunnel_get_nth(net, ti);
- if (tunnel == NULL)
+ if (!tunnel)
goto out;
if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid,
@@ -570,6 +583,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
if (info->attrs[L2TP_ATTR_COOKIE]) {
u16 len = nla_len(info->attrs[L2TP_ATTR_COOKIE]);
+
if (len > 8) {
ret = -EINVAL;
goto out_tunnel;
@@ -579,6 +593,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
}
if (info->attrs[L2TP_ATTR_PEER_COOKIE]) {
u16 len = nla_len(info->attrs[L2TP_ATTR_PEER_COOKIE]);
+
if (len > 8) {
ret = -EINVAL;
goto out_tunnel;
@@ -606,14 +621,13 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
cfg.reorder_timeout = nla_get_msecs(info->attrs[L2TP_ATTR_RECV_TIMEOUT]);
#ifdef CONFIG_MODULES
- if (l2tp_nl_cmd_ops[cfg.pw_type] == NULL) {
+ if (!l2tp_nl_cmd_ops[cfg.pw_type]) {
genl_unlock();
request_module("net-l2tp-type-%u", cfg.pw_type);
genl_lock();
}
#endif
- if ((l2tp_nl_cmd_ops[cfg.pw_type] == NULL) ||
- (l2tp_nl_cmd_ops[cfg.pw_type]->session_create == NULL)) {
+ if (!l2tp_nl_cmd_ops[cfg.pw_type] || !l2tp_nl_cmd_ops[cfg.pw_type]->session_create) {
ret = -EPROTONOSUPPORT;
goto out_tunnel;
}
@@ -645,7 +659,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
u16 pw_type;
session = l2tp_nl_session_get(info);
- if (session == NULL) {
+ if (!session) {
ret = -ENODEV;
goto out;
}
@@ -656,7 +670,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
pw_type = session->pwtype;
if (pw_type < __L2TP_PWTYPE_MAX)
if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
- ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session);
+ l2tp_nl_cmd_ops[pw_type]->session_delete(session);
l2tp_session_dec_refcount(session);
@@ -670,7 +684,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
struct l2tp_session *session;
session = l2tp_nl_session_get(info);
- if (session == NULL) {
+ if (!session) {
ret = -ENODEV;
goto out;
}
@@ -715,8 +729,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
if (nla_put_u32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id) ||
nla_put_u32(skb, L2TP_ATTR_SESSION_ID, session->session_id) ||
nla_put_u32(skb, L2TP_ATTR_PEER_CONN_ID, tunnel->peer_tunnel_id) ||
- nla_put_u32(skb, L2TP_ATTR_PEER_SESSION_ID,
- session->peer_session_id) ||
+ nla_put_u32(skb, L2TP_ATTR_PEER_SESSION_ID, session->peer_session_id) ||
nla_put_u32(skb, L2TP_ATTR_DEBUG, session->debug) ||
nla_put_u16(skb, L2TP_ATTR_PW_TYPE, session->pwtype))
goto nla_put_failure;
@@ -724,11 +737,9 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
if ((session->ifname[0] &&
nla_put_string(skb, L2TP_ATTR_IFNAME, session->ifname)) ||
(session->cookie_len &&
- nla_put(skb, L2TP_ATTR_COOKIE, session->cookie_len,
- &session->cookie[0])) ||
+ nla_put(skb, L2TP_ATTR_COOKIE, session->cookie_len, session->cookie)) ||
(session->peer_cookie_len &&
- nla_put(skb, L2TP_ATTR_PEER_COOKIE, session->peer_cookie_len,
- &session->peer_cookie[0])) ||
+ nla_put(skb, L2TP_ATTR_PEER_COOKIE, session->peer_cookie_len, session->peer_cookie)) ||
nla_put_u8(skb, L2TP_ATTR_RECV_SEQ, session->recv_seq) ||
nla_put_u8(skb, L2TP_ATTR_SEND_SEQ, session->send_seq) ||
nla_put_u8(skb, L2TP_ATTR_LNS_MODE, session->lns_mode) ||
@@ -740,7 +751,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
goto nla_put_failure;
nest = nla_nest_start_noflag(skb, L2TP_ATTR_STATS);
- if (nest == NULL)
+ if (!nest)
goto nla_put_failure;
if (nla_put_u64_64bit(skb, L2TP_ATTR_TX_PACKETS,
@@ -785,7 +796,7 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
int ret;
session = l2tp_nl_session_get(info);
- if (session == NULL) {
+ if (!session) {
ret = -ENODEV;
goto err;
}
@@ -824,14 +835,14 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
int si = cb->args[1];
for (;;) {
- if (tunnel == NULL) {
+ if (!tunnel) {
tunnel = l2tp_tunnel_get_nth(net, ti);
- if (tunnel == NULL)
+ if (!tunnel)
goto out;
}
session = l2tp_session_get_nth(tunnel, si);
- if (session == NULL) {
+ if (!session) {
ti++;
l2tp_tunnel_dec_refcount(tunnel);
tunnel = NULL;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index c54cb59593ef..13c3153b40d6 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -117,8 +117,7 @@ struct pppol2tp_session {
int owner; /* pid that opened the socket */
struct mutex sk_lock; /* Protects .sk */
- struct sock __rcu *sk; /* Pointer to the session
- * PPPoX socket */
+ struct sock __rcu *sk; /* Pointer to the session PPPoX socket */
struct sock *__sk; /* Copy of .sk, for cleanup */
struct rcu_head rcu; /* For asynchronous release */
};
@@ -155,17 +154,20 @@ static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk)
{
struct l2tp_session *session;
- if (sk == NULL)
+ if (!sk)
return NULL;
sock_hold(sk);
session = (struct l2tp_session *)(sk->sk_user_data);
- if (session == NULL) {
+ if (!session) {
+ sock_put(sk);
+ goto out;
+ }
+ if (WARN_ON(session->magic != L2TP_SESSION_MAGIC)) {
+ session = NULL;
sock_put(sk);
goto out;
}
-
- BUG_ON(session->magic != L2TP_SESSION_MAGIC);
out:
return session;
@@ -218,7 +220,7 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
*/
rcu_read_lock();
sk = rcu_dereference(ps->sk);
- if (sk == NULL)
+ if (!sk)
goto no_sock;
/* If the first two bytes are 0xFF03, consider that it is the PPP's
@@ -286,7 +288,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
/* Get session and tunnel contexts */
error = -EBADF;
session = pppol2tp_sock_to_session(sk);
- if (session == NULL)
+ if (!session)
goto error;
tunnel = session->tunnel;
@@ -351,7 +353,7 @@ error:
*/
static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
{
- struct sock *sk = (struct sock *) chan->private;
+ struct sock *sk = (struct sock *)chan->private;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel;
int uhlen, headroom;
@@ -361,7 +363,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
/* Get session and tunnel contexts from the socket */
session = pppol2tp_sock_to_session(sk);
- if (session == NULL)
+ if (!session)
goto abort;
tunnel = session->tunnel;
@@ -420,7 +422,8 @@ static void pppol2tp_session_destruct(struct sock *sk)
if (session) {
sk->sk_user_data = NULL;
- BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+ if (WARN_ON(session->magic != L2TP_SESSION_MAGIC))
+ return;
l2tp_session_dec_refcount(session);
}
}
@@ -704,7 +707,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
* tunnel id.
*/
if (!info.session_id && !info.peer_session_id) {
- if (tunnel == NULL) {
+ if (!tunnel) {
struct l2tp_tunnel_cfg tcfg = {
.encap = L2TP_ENCAPTYPE_UDP,
.debug = 0,
@@ -739,11 +742,11 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
} else {
/* Error if we can't find the tunnel */
error = -ENOENT;
- if (tunnel == NULL)
+ if (!tunnel)
goto end;
/* Error if socket is not prepped */
- if (tunnel->sock == NULL)
+ if (!tunnel->sock)
goto end;
}
@@ -803,8 +806,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
* the internal context for use by ioctl() and sockopt()
* handlers.
*/
- if ((session->session_id == 0) &&
- (session->peer_session_id == 0)) {
+ if (session->session_id == 0 && session->peer_session_id == 0) {
error = 0;
goto out_no_ppp;
}
@@ -912,22 +914,23 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
struct pppol2tp_session *pls;
error = -ENOTCONN;
- if (sk == NULL)
+ if (!sk)
goto end;
if (!(sk->sk_state & PPPOX_CONNECTED))
goto end;
error = -EBADF;
session = pppol2tp_sock_to_session(sk);
- if (session == NULL)
+ if (!session)
goto end;
pls = l2tp_session_priv(session);
tunnel = session->tunnel;
inet = inet_sk(tunnel->sock);
- if ((tunnel->version == 2) && (tunnel->sock->sk_family == AF_INET)) {
+ if (tunnel->version == 2 && tunnel->sock->sk_family == AF_INET) {
struct sockaddr_pppol2tp sp;
+
len = sizeof(sp);
memset(&sp, 0, len);
sp.sa_family = AF_PPPOX;
@@ -943,8 +946,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
sp.pppol2tp.addr.sin_addr.s_addr = inet->inet_daddr;
memcpy(uaddr, &sp, len);
#if IS_ENABLED(CONFIG_IPV6)
- } else if ((tunnel->version == 2) &&
- (tunnel->sock->sk_family == AF_INET6)) {
+ } else if (tunnel->version == 2 && tunnel->sock->sk_family == AF_INET6) {
struct sockaddr_pppol2tpin6 sp;
len = sizeof(sp);
@@ -962,8 +964,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
memcpy(&sp.pppol2tp.addr.sin6_addr, &tunnel->sock->sk_v6_daddr,
sizeof(tunnel->sock->sk_v6_daddr));
memcpy(uaddr, &sp, len);
- } else if ((tunnel->version == 3) &&
- (tunnel->sock->sk_family == AF_INET6)) {
+ } else if (tunnel->version == 3 && tunnel->sock->sk_family == AF_INET6) {
struct sockaddr_pppol2tpv3in6 sp;
len = sizeof(sp);
@@ -984,6 +985,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
#endif
} else if (tunnel->version == 3) {
struct sockaddr_pppol2tpv3 sp;
+
len = sizeof(sp);
memset(&sp, 0, len);
sp.sa_family = AF_PPPOX;
@@ -1178,7 +1180,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
switch (optname) {
case PPPOL2TP_SO_RECVSEQ:
- if ((val != 0) && (val != 1)) {
+ if (val != 0 && val != 1) {
err = -EINVAL;
break;
}
@@ -1189,7 +1191,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
break;
case PPPOL2TP_SO_SENDSEQ:
- if ((val != 0) && (val != 1)) {
+ if (val != 0 && val != 1) {
err = -EINVAL;
break;
}
@@ -1207,7 +1209,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
break;
case PPPOL2TP_SO_LNSMODE:
- if ((val != 0) && (val != 1)) {
+ if (val != 0 && val != 1) {
err = -EINVAL;
break;
}
@@ -1244,7 +1246,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
* session or the special tunnel type.
*/
static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct l2tp_session *session;
@@ -1258,23 +1260,22 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *)optval))
+ if (copy_from_sockptr(&val, optval, sizeof(int)))
return -EFAULT;
err = -ENOTCONN;
- if (sk->sk_user_data == NULL)
+ if (!sk->sk_user_data)
goto end;
/* Get session context from the socket */
err = -EBADF;
session = pppol2tp_sock_to_session(sk);
- if (session == NULL)
+ if (!session)
goto end;
/* Special case: if session_id == 0x0000, treat as operation on tunnel
*/
- if ((session->session_id == 0) &&
- (session->peer_session_id == 0)) {
+ if (session->session_id == 0 && session->peer_session_id == 0) {
tunnel = session->tunnel;
err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val);
} else {
@@ -1343,7 +1344,7 @@ static int pppol2tp_session_getsockopt(struct sock *sk,
break;
case PPPOL2TP_SO_REORDERTO:
- *val = (int) jiffies_to_msecs(session->reorder_timeout);
+ *val = (int)jiffies_to_msecs(session->reorder_timeout);
l2tp_info(session, L2TP_MSG_CONTROL,
"%s: get reorder_timeout=%d\n", session->name, *val);
break;
@@ -1381,18 +1382,17 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
return -EINVAL;
err = -ENOTCONN;
- if (sk->sk_user_data == NULL)
+ if (!sk->sk_user_data)
goto end;
/* Get the session context */
err = -EBADF;
session = pppol2tp_sock_to_session(sk);
- if (session == NULL)
+ if (!session)
goto end;
/* Special case: if session_id == 0x0000, treat as operation on tunnel */
- if ((session->session_id == 0) &&
- (session->peer_session_id == 0)) {
+ if (session->session_id == 0 && session->peer_session_id == 0) {
tunnel = session->tunnel;
err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
if (err)
@@ -1407,7 +1407,7 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
if (put_user(len, optlen))
goto end_put_sess;
- if (copy_to_user((void __user *) optval, &val, len))
+ if (copy_to_user((void __user *)optval, &val, len))
goto end_put_sess;
err = 0;
@@ -1463,7 +1463,7 @@ static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
pd->session_idx++;
- if (pd->session == NULL) {
+ if (!pd->session) {
pd->session_idx = 0;
pppol2tp_next_tunnel(net, pd);
}
@@ -1478,17 +1478,21 @@ static void *pppol2tp_seq_start(struct seq_file *m, loff_t *offs)
if (!pos)
goto out;
- BUG_ON(m->private == NULL);
+ if (WARN_ON(!m->private)) {
+ pd = NULL;
+ goto out;
+ }
+
pd = m->private;
net = seq_file_net(m);
- if (pd->tunnel == NULL)
+ if (!pd->tunnel)
pppol2tp_next_tunnel(net, pd);
else
pppol2tp_next_session(net, pd);
/* NULL tunnel and session indicates end of list */
- if ((pd->tunnel == NULL) && (pd->session == NULL))
+ if (!pd->tunnel && !pd->session)
pd = NULL;
out:
@@ -1551,6 +1555,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
if (tunnel->sock) {
struct inet_sock *inet = inet_sk(tunnel->sock);
+
ip = ntohl(inet->inet_saddr);
port = ntohs(inet->inet_sport);
}
@@ -1564,8 +1569,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
user_data_ok = 'N';
}
- seq_printf(m, " SESSION '%s' %08X/%d %04X/%04X -> "
- "%04X/%04X %d %c\n",
+ seq_printf(m, " SESSION '%s' %08X/%d %04X/%04X -> %04X/%04X %d %c\n",
session->name, ip, port,
tunnel->tunnel_id,
session->session_id,
@@ -1604,8 +1608,7 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v)
seq_puts(m, "PPPoL2TP driver info, " PPPOL2TP_DRV_VERSION "\n");
seq_puts(m, "TUNNEL name, user-data-ok session-count\n");
seq_puts(m, " debug tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
- seq_puts(m, " SESSION name, addr/port src-tid/sid "
- "dest-tid/sid state user-data-ok\n");
+ seq_puts(m, " SESSION name, addr/port src-tid/sid dest-tid/sid state user-data-ok\n");
seq_puts(m, " mtu/mru/rcvseq/sendseq/lns debug reorderto\n");
seq_puts(m, " nr/ns tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
goto out;
@@ -1638,7 +1641,7 @@ static __net_init int pppol2tp_init_net(struct net *net)
int err = 0;
pde = proc_create_net("pppol2tp", 0444, net->proc_net,
- &pppol2tp_seq_ops, sizeof(struct pppol2tp_seq_data));
+ &pppol2tp_seq_ops, sizeof(struct pppol2tp_seq_data));
if (!pde) {
err = -ENOMEM;
goto out;
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index f35899d45a9a..e71ca5aec684 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -9,6 +9,99 @@
#include <net/fib_rules.h>
#include <net/l3mdev.h>
+static DEFINE_SPINLOCK(l3mdev_lock);
+
+struct l3mdev_handler {
+ lookup_by_table_id_t dev_lookup;
+};
+
+static struct l3mdev_handler l3mdev_handlers[L3MDEV_TYPE_MAX + 1];
+
+static int l3mdev_check_type(enum l3mdev_type l3type)
+{
+ if (l3type <= L3MDEV_TYPE_UNSPEC || l3type > L3MDEV_TYPE_MAX)
+ return -EINVAL;
+
+ return 0;
+}
+
+int l3mdev_table_lookup_register(enum l3mdev_type l3type,
+ lookup_by_table_id_t fn)
+{
+ struct l3mdev_handler *hdlr;
+ int res;
+
+ res = l3mdev_check_type(l3type);
+ if (res)
+ return res;
+
+ hdlr = &l3mdev_handlers[l3type];
+
+ spin_lock(&l3mdev_lock);
+
+ if (hdlr->dev_lookup) {
+ res = -EBUSY;
+ goto unlock;
+ }
+
+ hdlr->dev_lookup = fn;
+ res = 0;
+
+unlock:
+ spin_unlock(&l3mdev_lock);
+
+ return res;
+}
+EXPORT_SYMBOL_GPL(l3mdev_table_lookup_register);
+
+void l3mdev_table_lookup_unregister(enum l3mdev_type l3type,
+ lookup_by_table_id_t fn)
+{
+ struct l3mdev_handler *hdlr;
+
+ if (l3mdev_check_type(l3type))
+ return;
+
+ hdlr = &l3mdev_handlers[l3type];
+
+ spin_lock(&l3mdev_lock);
+
+ if (hdlr->dev_lookup == fn)
+ hdlr->dev_lookup = NULL;
+
+ spin_unlock(&l3mdev_lock);
+}
+EXPORT_SYMBOL_GPL(l3mdev_table_lookup_unregister);
+
+int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type,
+ struct net *net, u32 table_id)
+{
+ lookup_by_table_id_t lookup;
+ struct l3mdev_handler *hdlr;
+ int ifindex = -EINVAL;
+ int res;
+
+ res = l3mdev_check_type(l3type);
+ if (res)
+ return res;
+
+ hdlr = &l3mdev_handlers[l3type];
+
+ spin_lock(&l3mdev_lock);
+
+ lookup = hdlr->dev_lookup;
+ if (!lookup)
+ goto unlock;
+
+ ifindex = lookup(net, table_id);
+
+unlock:
+ spin_unlock(&l3mdev_lock);
+
+ return ifindex;
+}
+EXPORT_SYMBOL_GPL(l3mdev_ifindex_lookup_by_table_id);
+
/**
* l3mdev_master_ifindex - get index of L3 master device
* @dev: targeted interface
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 6e53e43c1907..7180979114e4 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -984,7 +984,6 @@ out:
* llc_ui_getname - return the address info of a socket
* @sock: Socket to get address of.
* @uaddr: Address structure to return information.
- * @uaddrlen: Length of address structure.
* @peer: Does user want local or remote address information.
*
* Return the address information of a socket.
@@ -1054,7 +1053,7 @@ static int llc_ui_ioctl(struct socket *sock, unsigned int cmd,
* Set various connection specific parameters.
*/
static int llc_ui_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
@@ -1064,7 +1063,7 @@ static int llc_ui_setsockopt(struct socket *sock, int level, int optname,
lock_sock(sk);
if (unlikely(level != SOL_LLC || optlen != sizeof(int)))
goto out;
- rc = get_user(opt, (int __user *)optval);
+ rc = copy_from_sockptr(&opt, optval, sizeof(opt));
if (rc)
goto out;
rc = -EINVAL;
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 7b620acaca9e..1144cda2a0fc 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -284,8 +284,8 @@ out:;
/**
* llc_conn_remove_acked_pdus - Removes acknowledged pdus from tx queue
* @sk: active connection
- * nr: NR
- * how_many_unacked: size of pdu_unack_q after removing acked pdus
+ * @nr: NR
+ * @how_many_unacked: size of pdu_unack_q after removing acked pdus
*
* Removes acknowledged pdus from transmit queue (pdu_unack_q). Returns
* the number of pdus that removed from queue.
@@ -906,6 +906,7 @@ static void llc_sk_init(struct sock *sk)
/**
* llc_sk_alloc - Allocates LLC sock
+ * @net: network namespace
* @family: upper layer protocol family
* @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
*
@@ -951,7 +952,7 @@ void llc_sk_stop_all_timers(struct sock *sk, bool sync)
/**
* llc_sk_free - Frees a LLC socket
- * @sk - socket to free
+ * @sk: - socket to free
*
* Frees a LLC socket
*/
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index 82cb93f66b9b..c309b72a5877 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -144,6 +144,7 @@ static inline int llc_fixup_skb(struct sk_buff *skb)
* @skb: received pdu
* @dev: device that receive pdu
* @pt: packet type
+ * @orig_dev: the original receive net device
*
* When the system receives a 802.2 frame this function is called. It
* checks SAP and connection of received pdu and passes frame to
diff --git a/net/llc/llc_pdu.c b/net/llc/llc_pdu.c
index 2e6cb79196bb..792d195c8bae 100644
--- a/net/llc/llc_pdu.c
+++ b/net/llc/llc_pdu.c
@@ -25,7 +25,7 @@ void llc_pdu_set_cmd_rsp(struct sk_buff *skb, u8 pdu_type)
/**
* pdu_set_pf_bit - sets poll/final bit in LLC header
- * @pdu_frame: input frame that p/f bit must be set into it.
+ * @skb: Frame to set bit in
* @bit_value: poll/final bit (0 or 1).
*
* This function sets poll/final bit in LLC header (based on type of PDU).
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index be419062e19a..6805ce43a055 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -37,6 +37,7 @@ static int llc_mac_header_len(unsigned short devtype)
/**
* llc_alloc_frame - allocates sk_buff for frame
+ * @sk: socket to allocate frame to
* @dev: network device this skb will be sent over
* @type: pdu type to allocate
* @data_size: data size to allocate
@@ -273,6 +274,7 @@ void llc_build_and_send_xid_pkt(struct llc_sap *sap, struct sk_buff *skb,
* llc_sap_rcv - sends received pdus to the sap state machine
* @sap: current sap component structure.
* @skb: received frame.
+ * @sk: socket to associate to frame
*
* Sends received pdus to the sap state machine.
*/
@@ -379,6 +381,7 @@ static void llc_do_mcast(struct llc_sap *sap, struct sk_buff *skb,
* llc_sap_mcast - Deliver multicast PDU's to all matching datagram sockets.
* @sap: SAP
* @laddr: address of local LLC (MAC + SAP)
+ * @skb: PDU to deliver
*
* Search socket list of the SAP and finds connections with same sap.
* Deliver clone to each.
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 7f245e9f114c..313ba97acae3 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -477,7 +477,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
size_t len)
{
u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num;
- struct ieee802_11_elems elems = { 0 };
+ struct ieee802_11_elems elems = { };
u8 dialog_token;
int ies_len;
diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c
index 9fc2968856c0..366f76c9003d 100644
--- a/net/mac80211/airtime.c
+++ b/net/mac80211/airtime.c
@@ -551,7 +551,7 @@ EXPORT_SYMBOL_GPL(ieee80211_calc_tx_airtime);
u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
struct ieee80211_sta *pubsta,
- int len)
+ int len, bool ampdu)
{
struct ieee80211_supported_band *sband;
struct ieee80211_chanctx_conf *conf;
@@ -572,10 +572,26 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw,
if (pubsta) {
struct sta_info *sta = container_of(pubsta, struct sta_info,
sta);
-
- return ieee80211_calc_tx_airtime_rate(hw,
- &sta->tx_stats.last_rate,
- band, len);
+ struct ieee80211_tx_rate *rate = &sta->tx_stats.last_rate;
+ u32 airtime;
+
+ if (!(rate->flags & (IEEE80211_TX_RC_VHT_MCS |
+ IEEE80211_TX_RC_MCS)))
+ ampdu = false;
+
+ /*
+ * Assume that HT/VHT transmission on any AC except VO will
+ * use aggregation. Since we don't have reliable reporting
+ * of aggregation length, assume an average of 16.
+ * This will not be very accurate, but much better than simply
+ * assuming un-aggregated tx.
+ */
+ airtime = ieee80211_calc_tx_airtime_rate(hw, rate, band,
+ ampdu ? len * 16 : len);
+ if (ampdu)
+ airtime /= 16;
+
+ return airtime;
}
if (!conf)
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 9b360544ad6f..87fddd84c621 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -608,12 +608,12 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
case WLAN_CIPHER_SUITE_BIP_CMAC_256:
BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) !=
offsetof(typeof(kseq), aes_cmac));
- /* fall through */
+ fallthrough;
case WLAN_CIPHER_SUITE_BIP_GMAC_128:
case WLAN_CIPHER_SUITE_BIP_GMAC_256:
BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) !=
offsetof(typeof(kseq), aes_gmac));
- /* fall through */
+ fallthrough;
case WLAN_CIPHER_SUITE_GCMP:
case WLAN_CIPHER_SUITE_GCMP_256:
BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) !=
@@ -991,9 +991,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
BSS_CHANGED_SSID |
BSS_CHANGED_P2P_PS |
BSS_CHANGED_TXPOWER |
- BSS_CHANGED_TWT |
- BSS_CHANGED_HE_OBSS_PD |
- BSS_CHANGED_HE_BSS_COLOR;
+ BSS_CHANGED_TWT;
int i, err;
int prev_beacon_int;
@@ -1019,6 +1017,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
sdata->vif.bss_conf.frame_time_rts_th =
le32_get_bits(params->he_oper->he_oper_params,
IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK);
+ changed |= BSS_CHANGED_HE_OBSS_PD;
+
+ if (params->he_bss_color.enabled)
+ changed |= BSS_CHANGED_HE_BSS_COLOR;
}
mutex_lock(&local->mtx);
@@ -2126,6 +2128,11 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy,
if (_chg_mesh_attr(NL80211_MESHCONF_CONNECTED_TO_GATE, mask))
conf->dot11MeshConnectedToMeshGate =
nconf->dot11MeshConnectedToMeshGate;
+ if (_chg_mesh_attr(NL80211_MESHCONF_NOLEARN, mask))
+ conf->dot11MeshNolearn = nconf->dot11MeshNolearn;
+ if (_chg_mesh_attr(NL80211_MESHCONF_CONNECTED_TO_AS, mask))
+ conf->dot11MeshConnectedToAuthServer =
+ nconf->dot11MeshConnectedToAuthServer;
ieee80211_mbss_info_change_notify(sdata, BSS_CHANGED_BEACON);
return 0;
}
@@ -2166,6 +2173,7 @@ static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev)
ieee80211_stop_mesh(sdata);
mutex_lock(&sdata->local->mtx);
ieee80211_vif_release_channel(sdata);
+ kfree(sdata->u.mesh.ie);
mutex_unlock(&sdata->local->mtx);
return 0;
@@ -2336,7 +2344,7 @@ static int ieee80211_scan(struct wiphy *wiphy,
* for now fall through to allow scanning only when
* beaconing hasn't been configured yet
*/
- /* fall through */
+ fallthrough;
case NL80211_IFTYPE_AP:
/*
* If the scan has been forced (and the driver supports
@@ -3583,7 +3591,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
}
local_bh_disable();
- ieee80211_xmit(sdata, sta, skb, 0);
+ ieee80211_xmit(sdata, sta, skb);
local_bh_enable();
ret = 0;
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index e6e192f53e4e..bdc0f29dc6cd 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -313,9 +313,14 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
lockdep_assert_held(&local->chanctx_mtx);
- /* don't optimize 5MHz, 10MHz, and radar_enabled confs */
+ /* don't optimize non-20MHz based and radar_enabled confs */
if (ctx->conf.def.width == NL80211_CHAN_WIDTH_5 ||
ctx->conf.def.width == NL80211_CHAN_WIDTH_10 ||
+ ctx->conf.def.width == NL80211_CHAN_WIDTH_1 ||
+ ctx->conf.def.width == NL80211_CHAN_WIDTH_2 ||
+ ctx->conf.def.width == NL80211_CHAN_WIDTH_4 ||
+ ctx->conf.def.width == NL80211_CHAN_WIDTH_8 ||
+ ctx->conf.def.width == NL80211_CHAN_WIDTH_16 ||
ctx->conf.radar_enabled) {
ctx->conf.min_def = ctx->conf.def;
return;
@@ -743,7 +748,7 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
default:
WARN_ONCE(1, "Invalid SMPS mode %d\n",
sdata->smps_mode);
- /* fall through */
+ fallthrough;
case IEEE80211_SMPS_OFF:
needed_static = sdata->needed_rx_chains;
needed_dynamic = sdata->needed_rx_chains;
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index d7e955127d5c..fe8a7a87e513 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -638,6 +638,9 @@ IEEE80211_IF_FILE(dot11MeshAwakeWindowDuration,
u.mesh.mshcfg.dot11MeshAwakeWindowDuration, DEC);
IEEE80211_IF_FILE(dot11MeshConnectedToMeshGate,
u.mesh.mshcfg.dot11MeshConnectedToMeshGate, DEC);
+IEEE80211_IF_FILE(dot11MeshNolearn, u.mesh.mshcfg.dot11MeshNolearn, DEC);
+IEEE80211_IF_FILE(dot11MeshConnectedToAuthServer,
+ u.mesh.mshcfg.dot11MeshConnectedToAuthServer, DEC);
#endif
#define DEBUGFS_ADD_MODE(name, mode) \
@@ -762,6 +765,8 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata)
MESHPARAMS_ADD(power_mode);
MESHPARAMS_ADD(dot11MeshAwakeWindowDuration);
MESHPARAMS_ADD(dot11MeshConnectedToMeshGate);
+ MESHPARAMS_ADD(dot11MeshNolearn);
+ MESHPARAMS_ADD(dot11MeshConnectedToAuthServer);
#undef MESHPARAMS_ADD
}
#endif
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index de69fc9c4f07..41d495d73d3a 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -12,12 +12,11 @@
#include "ieee80211_i.h"
#include "trace.h"
-static inline bool check_sdata_in_driver(struct ieee80211_sub_if_data *sdata)
-{
- return !WARN(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER),
- "%s: Failed check-sdata-in-driver check, flags: 0x%x\n",
- sdata->dev ? sdata->dev->name : sdata->name, sdata->flags);
-}
+#define check_sdata_in_driver(sdata) ({ \
+ !WARN_ONCE(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER), \
+ "%s: Failed check-sdata-in-driver check, flags: 0x%x\n", \
+ sdata->dev ? sdata->dev->name : sdata->name, sdata->flags); \
+})
static inline struct ieee80211_sub_if_data *
get_bss_sdata(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index e32906202575..3d62a80b5790 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -250,7 +250,7 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
switch (sdata->vif.bss_conf.chandef.width) {
default:
WARN_ON_ONCE(1);
- /* fall through */
+ fallthrough;
case NL80211_CHAN_WIDTH_20_NOHT:
case NL80211_CHAN_WIDTH_20:
bw = IEEE80211_STA_RX_BW_20;
@@ -517,7 +517,7 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
case IEEE80211_SMPS_AUTOMATIC:
case IEEE80211_SMPS_NUM_MODES:
WARN_ON(1);
- /* fall through */
+ fallthrough;
case IEEE80211_SMPS_OFF:
action_frame->u.action.u.ht_smps.smps_control =
WLAN_HT_SMPS_CONTROL_DISABLED;
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 81d26fef41e9..53632c2f5217 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -791,7 +791,7 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
case NL80211_CHAN_WIDTH_10:
case NL80211_CHAN_WIDTH_20_NOHT:
sta_flags |= IEEE80211_STA_DISABLE_HT;
- /* fall through */
+ fallthrough;
case NL80211_CHAN_WIDTH_20:
sta_flags |= IEEE80211_STA_DISABLE_40MHZ;
break;
@@ -1401,7 +1401,7 @@ ieee80211_ibss_setup_scan_channels(struct wiphy *wiphy,
break;
case NL80211_CHAN_WIDTH_80P80:
cf2 = chandef->center_freq2;
- /* fall through */
+ fallthrough;
case NL80211_CHAN_WIDTH_80:
width = 80;
break;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ec1a71ac65f2..0b1eaec6649f 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -164,7 +164,6 @@ typedef unsigned __bitwise ieee80211_tx_result;
#define TX_DROP ((__force ieee80211_tx_result) 1u)
#define TX_QUEUED ((__force ieee80211_tx_result) 2u)
-#define IEEE80211_TX_NO_SEQNO BIT(0)
#define IEEE80211_TX_UNICAST BIT(1)
#define IEEE80211_TX_PS_BUFFERED BIT(2)
@@ -218,7 +217,7 @@ enum ieee80211_rx_flags {
};
struct ieee80211_rx_data {
- struct napi_struct *napi;
+ struct list_head *list;
struct sk_buff *skb;
struct ieee80211_local *local;
struct ieee80211_sub_if_data *sdata;
@@ -1967,12 +1966,11 @@ void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
bool bss_notify, bool enable_qos);
void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
- struct sta_info *sta, struct sk_buff *skb,
- u32 txdata_flags);
+ struct sta_info *sta, struct sk_buff *skb);
void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, int tid,
- enum nl80211_band band, u32 txdata_flags);
+ enum nl80211_band band);
/* sta_out needs to be checked for ERR_PTR() before using */
int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata,
@@ -1982,10 +1980,10 @@ int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata,
static inline void
ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, int tid,
- enum nl80211_band band, u32 txdata_flags)
+ enum nl80211_band band)
{
rcu_read_lock();
- __ieee80211_tx_skb_tid_band(sdata, skb, tid, band, txdata_flags);
+ __ieee80211_tx_skb_tid_band(sdata, skb, tid, band);
rcu_read_unlock();
}
@@ -2003,7 +2001,7 @@ static inline void ieee80211_tx_skb_tid(struct ieee80211_sub_if_data *sdata,
}
__ieee80211_tx_skb_tid_band(sdata, skb, tid,
- chanctx_conf->def.chan->band, 0);
+ chanctx_conf->def.chan->band);
rcu_read_unlock();
}
@@ -2290,7 +2288,7 @@ extern const struct ethtool_ops ieee80211_ethtool_ops;
u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
struct ieee80211_sta *pubsta,
- int len);
+ int len, bool ampdu);
#ifdef CONFIG_MAC80211_NOINLINE
#define debug_noinline noinline
#else
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index f900c84fb40f..9740ae8fa697 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -978,7 +978,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
case NL80211_IFTYPE_P2P_DEVICE:
/* relies on synchronize_rcu() below */
RCU_INIT_POINTER(local->p2p_sdata, NULL);
- /* fall through */
+ fallthrough;
default:
cancel_work_sync(&sdata->work);
/*
@@ -1048,7 +1048,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE))
break;
- /* fall through */
+ fallthrough;
default:
if (going_down)
drv_remove_interface(local, sdata);
@@ -1183,17 +1183,24 @@ static u16 ieee80211_monitor_select_queue(struct net_device *dev,
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_hdr *hdr;
- struct ieee80211_radiotap_header *rtap = (void *)skb->data;
+ int len_rthdr;
if (local->hw.queues < IEEE80211_NUM_ACS)
return 0;
- if (skb->len < 4 ||
- skb->len < le16_to_cpu(rtap->it_len) + 2 /* frame control */)
+ /* reset flags and info before parsing radiotap header */
+ memset(info, 0, sizeof(*info));
+
+ if (!ieee80211_parse_tx_radiotap(skb, dev))
return 0; /* doesn't matter, frame will be dropped */
- hdr = (void *)((u8 *)skb->data + le16_to_cpu(rtap->it_len));
+ len_rthdr = ieee80211_get_radiotap_len(skb->data);
+ hdr = (struct ieee80211_hdr *)(skb->data + len_rthdr);
+ if (skb->len < len_rthdr + 2 ||
+ skb->len < len_rthdr + ieee80211_hdrlen(hdr->frame_control))
+ return 0; /* doesn't matter, frame will be dropped */
return ieee80211_select_queue_80211(sdata, skb, hdr);
}
@@ -1497,7 +1504,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
type = NL80211_IFTYPE_AP;
sdata->vif.type = type;
sdata->vif.p2p = true;
- /* fall through */
+ fallthrough;
case NL80211_IFTYPE_AP:
skb_queue_head_init(&sdata->u.ap.ps.bc_buf);
INIT_LIST_HEAD(&sdata->u.ap.vlans);
@@ -1507,7 +1514,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
type = NL80211_IFTYPE_STATION;
sdata->vif.type = type;
sdata->vif.p2p = true;
- /* fall through */
+ fallthrough;
case NL80211_IFTYPE_STATION:
sdata->vif.bss_conf.bssid = sdata->u.mgd.bssid;
ieee80211_sta_setup_sdata(sdata);
@@ -1703,7 +1710,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
goto out_unlock;
}
}
- /* fall through */
+ fallthrough;
default:
/* assign a new address if possible -- try n_addresses first */
for (i = 0; i < local->hw.wiphy->n_addresses; i++) {
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 8f403c1bb908..9c2888004878 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -225,7 +225,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
*/
if (sdata->hw_80211_encap)
return -EINVAL;
- /* Fall through */
+ fallthrough;
case WLAN_CIPHER_SUITE_AES_CMAC:
case WLAN_CIPHER_SUITE_BIP_CMAC_256:
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 5f1ca25b6c97..7ecd801a943b 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -260,6 +260,7 @@ int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata,
bool is_connected_to_gate = ifmsh->num_gates > 0 ||
ifmsh->mshcfg.dot11MeshGateAnnouncementProtocol ||
ifmsh->mshcfg.dot11MeshConnectedToMeshGate;
+ bool is_connected_to_as = ifmsh->mshcfg.dot11MeshConnectedToAuthServer;
if (skb_tailroom(skb) < 2 + meshconf_len)
return -ENOMEM;
@@ -284,7 +285,9 @@ int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata,
/* Mesh Formation Info - number of neighbors */
neighbors = atomic_read(&ifmsh->estab_plinks);
neighbors = min_t(int, neighbors, IEEE80211_MAX_MESH_PEERINGS);
- *pos++ = (neighbors << 1) | is_connected_to_gate;
+ *pos++ = (is_connected_to_as << 7) |
+ (neighbors << 1) |
+ is_connected_to_gate;
/* Mesh capability */
*pos = 0x00;
*pos |= ifmsh->mshcfg.dot11MeshForwarding ?
@@ -617,6 +620,19 @@ int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata,
int mesh_add_he_6ghz_cap_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
{
+ struct ieee80211_supported_band *sband;
+ const struct ieee80211_sband_iftype_data *iftd;
+
+ sband = ieee80211_get_sband(sdata);
+ if (!sband)
+ return -EINVAL;
+
+ iftd = ieee80211_get_sband_iftype_data(sband,
+ NL80211_IFTYPE_MESH_POINT);
+ /* The device doesn't support HE in mesh mode or at all */
+ if (!iftd)
+ return 0;
+
ieee80211_ie_build_he_6ghz_cap(sdata, skb);
return 0;
}
@@ -1094,10 +1110,10 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
switch (sdata->vif.bss_conf.chandef.width) {
case NL80211_CHAN_WIDTH_20_NOHT:
sta_flags |= IEEE80211_STA_DISABLE_HT;
- /* fall through */
+ fallthrough;
case NL80211_CHAN_WIDTH_20:
sta_flags |= IEEE80211_STA_DISABLE_40MHZ;
- /* fall through */
+ fallthrough;
case NL80211_CHAN_WIDTH_40:
sta_flags |= IEEE80211_STA_DISABLE_VHT;
break;
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 02cde0fd08fe..bec23d2eee7a 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -1173,6 +1173,40 @@ int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata,
}
/**
+ * mesh_nexthop_lookup_nolearn - try to set next hop without path discovery
+ * @skb: 802.11 frame to be sent
+ * @sdata: network subif the frame will be sent through
+ *
+ * Check if the meshDA (addr3) of a unicast frame is a direct neighbor.
+ * And if so, set the RA (addr1) to it to transmit to this node directly,
+ * avoiding PREQ/PREP path discovery.
+ *
+ * Returns: 0 if the next hop was found and -ENOENT otherwise.
+ */
+static int mesh_nexthop_lookup_nolearn(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb)
+{
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+ struct sta_info *sta;
+
+ if (is_multicast_ether_addr(hdr->addr1))
+ return -ENOENT;
+
+ rcu_read_lock();
+ sta = sta_info_get(sdata, hdr->addr3);
+
+ if (!sta || sta->mesh->plink_state != NL80211_PLINK_ESTAB) {
+ rcu_read_unlock();
+ return -ENOENT;
+ }
+ rcu_read_unlock();
+
+ memcpy(hdr->addr1, hdr->addr3, ETH_ALEN);
+ memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
+ return 0;
+}
+
+/**
* mesh_nexthop_lookup - put the appropriate next hop on a mesh frame. Calling
* this function is considered "using" the associated mpath, so preempt a path
* refresh if this mpath expires soon.
@@ -1185,11 +1219,16 @@ int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata,
int mesh_nexthop_lookup(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
{
+ struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
struct mesh_path *mpath;
struct sta_info *next_hop;
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
u8 *target_addr = hdr->addr3;
+ if (ifmsh->mshcfg.dot11MeshNolearn &&
+ !mesh_nexthop_lookup_nolearn(sdata, skb))
+ return 0;
+
mpath = mesh_path_lookup(sdata, target_addr);
if (!mpath || !(mpath->flags & MESH_PATH_ACTIVE))
return -ENOENT;
@@ -1266,7 +1305,7 @@ void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata)
break;
case IEEE80211_PROACTIVE_PREQ_WITH_PREP:
flags |= IEEE80211_PREQ_PROACTIVE_PREP_FLAG;
- /* fall through */
+ fallthrough;
case IEEE80211_PROACTIVE_PREQ_NO_PREP:
interval = ifmsh->mshcfg.dot11MeshHWMPactivePathToRootTimeout;
target_flags |= IEEE80211_PREQ_TO_FLAG |
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 117519bf33d6..48f31ac9233c 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -72,7 +72,6 @@ static void mesh_table_free(struct mesh_table *tbl)
}
/**
- *
* mesh_path_assign_nexthop - update mesh path next hop
*
* @mpath: mesh path to update
@@ -140,7 +139,6 @@ static void prepare_for_gate(struct sk_buff *skb, char *dst_addr,
}
/**
- *
* mesh_path_move_to_queue - Move or copy frames from one mpath queue to another
*
* This function is used to transfer or copy frames from an unresolved mpath to
@@ -152,7 +150,7 @@ static void prepare_for_gate(struct sk_buff *skb, char *dst_addr,
*
* The gate mpath must be an active mpath with a valid mpath->next_hop.
*
- * @mpath: An active mpath the frames will be sent to (i.e. the gate)
+ * @gate_mpath: An active mpath the frames will be sent to (i.e. the gate)
* @from_mpath: The failed mpath
* @copy: When true, copy all the frames to the new mpath queue. When false,
* move them.
@@ -521,6 +519,7 @@ static void mesh_path_free_rcu(struct mesh_table *tbl,
del_timer_sync(&mpath->timer);
atomic_dec(&sdata->u.mesh.mpaths);
atomic_dec(&tbl->entries);
+ mesh_path_flush_pending(mpath);
kfree_rcu(mpath, rcu);
}
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 798e4b6b383f..15f2fc658f70 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -699,7 +699,7 @@ void mesh_plink_timer(struct timer_list *t)
break;
}
reason = WLAN_REASON_MESH_MAX_RETRIES;
- /* fall through */
+ fallthrough;
case NL80211_PLINK_CNF_RCVD:
/* confirm timer */
if (!reason)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index b2a9d47cf86d..ac870309b911 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -533,7 +533,7 @@ static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata,
case IEEE80211_SMPS_AUTOMATIC:
case IEEE80211_SMPS_NUM_MODES:
WARN_ON(1);
- /* fall through */
+ fallthrough;
case IEEE80211_SMPS_OFF:
cap |= WLAN_HT_CAP_SM_PS_DISABLED <<
IEEE80211_HT_CAP_SM_PS_SHIFT;
@@ -1529,7 +1529,7 @@ ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata,
switch (channel->band) {
default:
WARN_ON_ONCE(1);
- /* fall through */
+ fallthrough;
case NL80211_BAND_2GHZ:
case NL80211_BAND_60GHZ:
chan_increment = 1;
@@ -2988,7 +2988,10 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
if (auth_alg == WLAN_AUTH_SAE &&
- status_code == WLAN_STATUS_ANTI_CLOG_REQUIRED)
+ (status_code == WLAN_STATUS_ANTI_CLOG_REQUIRED ||
+ (auth_transaction == 1 &&
+ (status_code == WLAN_STATUS_SAE_HASH_TO_ELEMENT ||
+ status_code == WLAN_STATUS_SAE_PK))))
return;
sdata_info(sdata, "%pM denied authentication (status %d)\n",
@@ -3460,10 +3463,12 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
bss_conf->he_bss_color.partial =
le32_get_bits(elems->he_operation->he_oper_params,
IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR);
- bss_conf->he_bss_color.disabled =
- le32_get_bits(elems->he_operation->he_oper_params,
- IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED);
- changed |= BSS_CHANGED_HE_BSS_COLOR;
+ bss_conf->he_bss_color.enabled =
+ !le32_get_bits(elems->he_operation->he_oper_params,
+ IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED);
+
+ if (bss_conf->he_bss_color.enabled)
+ changed |= BSS_CHANGED_HE_BSS_COLOR;
bss_conf->htc_trig_based_pkt_ext =
le32_get_bits(elems->he_operation->he_oper_params,
@@ -4558,6 +4563,9 @@ static void ieee80211_sta_bcn_mon_timer(struct timer_list *t)
if (sdata->vif.csa_active && !ifmgd->csa_waiting_bcn)
return;
+ if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)
+ return;
+
sdata->u.mgd.connection_loss = false;
ieee80211_queue_work(&sdata->local->hw,
&sdata->u.mgd.beacon_connection_loss_work);
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index db3b8bf75656..f470d1a7ce9b 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -264,7 +264,7 @@ static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc,
if (roc->mgmt_tx_cookie) {
if (!WARN_ON(!roc->frame)) {
ieee80211_tx_skb_tid_band(roc->sdata, roc->frame, 7,
- roc->chan->band, 0);
+ roc->chan->band);
roc->frame = NULL;
}
} else {
@@ -808,13 +808,13 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
if (!sdata->vif.bss_conf.ibss_joined)
need_offchan = true;
#ifdef CONFIG_MAC80211_MESH
- /* fall through */
+ fallthrough;
case NL80211_IFTYPE_MESH_POINT:
if (ieee80211_vif_is_mesh(&sdata->vif) &&
!sdata->u.mesh.mesh_id_len)
need_offchan = true;
#endif
- /* fall through */
+ fallthrough;
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_P2P_GO:
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 5c5af4b5fc08..836cde516a18 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2578,8 +2578,8 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb,
memset(skb->cb, 0, sizeof(skb->cb));
/* deliver to local stack */
- if (rx->napi)
- napi_gro_receive(rx->napi, skb);
+ if (rx->list)
+ list_add_tail(&skb->list, rx->list);
else
netif_receive_skb(skb);
}
@@ -3591,7 +3591,7 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
}
__ieee80211_tx_skb_tid_band(rx->sdata, nskb, 7,
- status->band, 0);
+ status->band);
}
dev_kfree_skb(rx->skb);
return RX_QUEUED;
@@ -3736,7 +3736,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx,
I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop);
if (rx->sta)
rx->sta->rx_stats.dropped++;
- /* fall through */
+ fallthrough;
case RX_CONTINUE: {
struct ieee80211_rate *rate = NULL;
struct ieee80211_supported_band *sband;
@@ -3869,7 +3869,6 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
/* This is OK -- must be QoS data frame */
.security_idx = tid,
.seqno_idx = tid,
- .napi = NULL, /* must be NULL to not have races */
};
struct tid_ampdu_rx *tid_agg_rx;
@@ -4479,8 +4478,8 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
/* deliver to local stack */
skb->protocol = eth_type_trans(skb, fast_rx->dev);
memset(skb->cb, 0, sizeof(skb->cb));
- if (rx->napi)
- napi_gro_receive(rx->napi, skb);
+ if (rx->list)
+ list_add_tail(&skb->list, rx->list);
else
netif_receive_skb(skb);
@@ -4547,7 +4546,7 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
struct ieee80211_sta *pubsta,
struct sk_buff *skb,
- struct napi_struct *napi)
+ struct list_head *list)
{
struct ieee80211_local *local = hw_to_local(hw);
struct ieee80211_sub_if_data *sdata;
@@ -4562,7 +4561,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
memset(&rx, 0, sizeof(rx));
rx.skb = skb;
rx.local = local;
- rx.napi = napi;
+ rx.list = list;
if (ieee80211_is_data(fc) || ieee80211_is_mgmt(fc))
I802_DEBUG_INC(local->dot11ReceivedFragmentCount);
@@ -4670,8 +4669,8 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
* This is the receive path handler. It is called by a low level driver when an
* 802.11 MPDU is received from the hardware.
*/
-void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
- struct sk_buff *skb, struct napi_struct *napi)
+void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
+ struct sk_buff *skb, struct list_head *list)
{
struct ieee80211_local *local = hw_to_local(hw);
struct ieee80211_rate *rate = NULL;
@@ -4752,7 +4751,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
break;
default:
WARN_ON_ONCE(1);
- /* fall through */
+ fallthrough;
case RX_ENC_LEGACY:
if (WARN_ON(status->rate_idx >= sband->n_bitrates))
goto drop;
@@ -4763,36 +4762,53 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
status->rx_flags = 0;
/*
- * key references and virtual interfaces are protected using RCU
- * and this requires that we are in a read-side RCU section during
- * receive processing
- */
- rcu_read_lock();
-
- /*
* Frames with failed FCS/PLCP checksum are not returned,
* all other frames are returned without radiotap header
* if it was previously present.
* Also, frames with less than 16 bytes are dropped.
*/
skb = ieee80211_rx_monitor(local, skb, rate);
- if (!skb) {
- rcu_read_unlock();
+ if (!skb)
return;
- }
ieee80211_tpt_led_trig_rx(local,
((struct ieee80211_hdr *)skb->data)->frame_control,
skb->len);
- __ieee80211_rx_handle_packet(hw, pubsta, skb, napi);
-
- rcu_read_unlock();
+ __ieee80211_rx_handle_packet(hw, pubsta, skb, list);
return;
drop:
kfree_skb(skb);
}
+EXPORT_SYMBOL(ieee80211_rx_list);
+
+void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
+ struct sk_buff *skb, struct napi_struct *napi)
+{
+ struct sk_buff *tmp;
+ LIST_HEAD(list);
+
+
+ /*
+ * key references and virtual interfaces are protected using RCU
+ * and this requires that we are in a read-side RCU section during
+ * receive processing
+ */
+ rcu_read_lock();
+ ieee80211_rx_list(hw, pubsta, skb, &list);
+ rcu_read_unlock();
+
+ if (!napi) {
+ netif_receive_skb_list(&list);
+ return;
+ }
+
+ list_for_each_entry_safe(skb, tmp, &list, list) {
+ skb_list_del_init(skb);
+ napi_gro_receive(napi, skb);
+ }
+}
EXPORT_SYMBOL(ieee80211_rx_napi);
/* This is a version of the rx handler that can be called from hard irq
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index ad90bbe57457..5ac2785cdc7b 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -591,7 +591,6 @@ static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata,
struct ieee80211_channel *channel)
{
struct sk_buff *skb;
- u32 txdata_flags = 0;
skb = ieee80211_build_probe_req(sdata, src, dst, ratemask, channel,
ssid, ssid_len,
@@ -600,15 +599,15 @@ static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata,
if (skb) {
if (flags & IEEE80211_PROBE_FLAG_RANDOM_SN) {
struct ieee80211_hdr *hdr = (void *)skb->data;
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
u16 sn = get_random_u32();
- txdata_flags |= IEEE80211_TX_NO_SEQNO;
+ info->control.flags |= IEEE80211_TX_CTRL_NO_SEQNO;
hdr->seq_ctrl =
cpu_to_le16(IEEE80211_SN_TO_SEQ(sn));
}
IEEE80211_SKB_CB(skb)->flags |= tx_flags;
- ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band,
- txdata_flags);
+ ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band);
}
}
@@ -913,6 +912,7 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local,
case NL80211_BSS_CHAN_WIDTH_10:
local->scan_chandef.width = NL80211_CHAN_WIDTH_10;
break;
+ default:
case NL80211_BSS_CHAN_WIDTH_20:
/* If scanning on oper channel, use whatever channel-type
* is currently in use.
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index cd8487bc6fc2..f2840d1d95cf 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1050,7 +1050,7 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
might_sleep();
lockdep_assert_held(&local->sta_mtx);
- while (sta->sta_state == IEEE80211_STA_AUTHORIZED) {
+ if (sta->sta_state == IEEE80211_STA_AUTHORIZED) {
ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC);
WARN_ON_ONCE(ret);
}
@@ -1455,7 +1455,7 @@ static void ieee80211_send_null_response(struct sta_info *sta, int tid,
}
info->band = chanctx_conf->def.chan->band;
- ieee80211_xmit(sdata, sta, skb, 0);
+ ieee80211_xmit(sdata, sta, skb);
rcu_read_unlock();
}
@@ -1923,9 +1923,7 @@ void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
if (sta) {
tx_pending = atomic_sub_return(tx_airtime,
&sta->airtime[ac].aql_tx_pending);
- if (WARN_ONCE(tx_pending < 0,
- "STA %pM AC %d txq pending airtime underflow: %u, %u",
- sta->addr, ac, tx_pending, tx_airtime))
+ if (tx_pending < 0)
atomic_cmpxchg(&sta->airtime[ac].aql_tx_pending,
tx_pending, 0);
}
@@ -2426,7 +2424,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
BIT_ULL(NL80211_STA_INFO_LOCAL_PM) |
BIT_ULL(NL80211_STA_INFO_PEER_PM) |
BIT_ULL(NL80211_STA_INFO_NONPEER_PM) |
- BIT_ULL(NL80211_STA_INFO_CONNECTED_TO_GATE);
+ BIT_ULL(NL80211_STA_INFO_CONNECTED_TO_GATE) |
+ BIT_ULL(NL80211_STA_INFO_CONNECTED_TO_AS);
sinfo->llid = sta->mesh->llid;
sinfo->plid = sta->mesh->plid;
@@ -2439,6 +2438,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
sinfo->peer_pm = sta->mesh->peer_pm;
sinfo->nonpeer_pm = sta->mesh->nonpeer_pm;
sinfo->connected_to_gate = sta->mesh->connected_to_gate;
+ sinfo->connected_to_as = sta->mesh->connected_to_as;
#endif
}
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 49728047dfad..9d398c9daa4c 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -385,6 +385,7 @@ DECLARE_EWMA(mesh_tx_rate_avg, 8, 16)
* @processed_beacon: set to true after peer rates and capabilities are
* processed
* @connected_to_gate: true if mesh STA has a path to a mesh gate
+ * @connected_to_as: true if mesh STA has a path to a authentication server
* @fail_avg: moving percentage of failed MSDUs
* @tx_rate_avg: moving average of tx bitrate
*/
@@ -404,6 +405,7 @@ struct mesh_sta {
bool processed_beacon;
bool connected_to_gate;
+ bool connected_to_as;
enum nl80211_plink_state plink_state;
u32 plink_timeout;
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index cbc40b358ba2..adb1d30ce06e 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -799,7 +799,6 @@ static int ieee80211_tx_get_rates(struct ieee80211_hw *hw,
struct ieee80211_tx_info *info,
int *retry_count)
{
- int rates_idx = -1;
int count = -1;
int i;
@@ -821,13 +820,12 @@ static int ieee80211_tx_get_rates(struct ieee80211_hw *hw,
count += info->status.rates[i].count;
}
- rates_idx = i - 1;
if (count < 0)
count = 0;
*retry_count = count;
- return rates_idx;
+ return i - 1;
}
void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 4b0cff4a07bd..e01e4daeb8cd 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -239,7 +239,7 @@ static enum ieee80211_ac_numbers ieee80211_ac_from_wmm(int ac)
switch (ac) {
default:
WARN_ON_ONCE(1);
- /* fall through */
+ fallthrough;
case 0:
return IEEE80211_AC_BE;
case 1:
@@ -952,7 +952,7 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev,
set_sta_flag(sta, WLAN_STA_TDLS_INITIATOR);
sta->sta.tdls_initiator = false;
}
- /* fall-through */
+ fallthrough;
case WLAN_TDLS_SETUP_CONFIRM:
case WLAN_TDLS_DISCOVERY_REQUEST:
initiator = true;
@@ -967,7 +967,7 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev,
clear_sta_flag(sta, WLAN_STA_TDLS_INITIATOR);
sta->sta.tdls_initiator = true;
}
- /* fall-through */
+ fallthrough;
case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
initiator = false;
break;
@@ -1222,7 +1222,7 @@ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
* by the AP.
*/
drv_mgd_protect_tdls_discover(sdata->local, sdata);
- /* fall-through */
+ fallthrough;
case WLAN_TDLS_SETUP_CONFIRM:
case WLAN_PUB_ACTION_TDLS_DISCOVER_RES:
/* no special handling */
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 1b4709694d2a..50ab5b9d8eab 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -22,7 +22,8 @@
#define LOCAL_PR_ARG __entry->wiphy_name
#define STA_ENTRY __array(char, sta_addr, ETH_ALEN)
-#define STA_ASSIGN (sta ? memcpy(__entry->sta_addr, sta->addr, ETH_ALEN) : memset(__entry->sta_addr, 0, ETH_ALEN))
+#define STA_ASSIGN (sta ? memcpy(__entry->sta_addr, sta->addr, ETH_ALEN) : \
+ eth_zero_addr(__entry->sta_addr))
#define STA_NAMED_ASSIGN(s) memcpy(__entry->sta_addr, (s)->addr, ETH_ALEN)
#define STA_PR_FMT " sta:%pM"
#define STA_PR_ARG __entry->sta_addr
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1a2941e5244f..dca01d7e6e3e 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -166,6 +166,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
if (r->flags & IEEE80211_RATE_MANDATORY_A)
mrate = r->bitrate;
break;
+ case NL80211_BAND_S1GHZ:
case NL80211_BAND_60GHZ:
/* TODO, for now fall through */
case NUM_NL80211_BANDS:
@@ -824,6 +825,9 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
if (ieee80211_is_qos_nullfunc(hdr->frame_control))
return TX_CONTINUE;
+ if (info->control.flags & IEEE80211_TX_CTRL_NO_SEQNO)
+ return TX_CONTINUE;
+
/*
* Anything but QoS data that has a sequence number field
* (is long enough) gets a sequence number from the global
@@ -832,8 +836,6 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
*/
if (!ieee80211_is_data_qos(hdr->frame_control) ||
is_multicast_ether_addr(hdr->addr1)) {
- if (tx->flags & IEEE80211_TX_NO_SEQNO)
- return TX_CONTINUE;
/* driver should assign sequence number */
info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
/* for pure STA mode without beacons, we can do it */
@@ -1739,7 +1741,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
case NL80211_IFTYPE_AP_VLAN:
sdata = container_of(sdata->bss,
struct ieee80211_sub_if_data, u.ap);
- /* fall through */
+ fallthrough;
default:
vif = &sdata->vif;
break;
@@ -1890,7 +1892,7 @@ EXPORT_SYMBOL(ieee80211_tx_prepare_skb);
*/
static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta, struct sk_buff *skb,
- bool txpending, u32 txdata_flags)
+ bool txpending)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_tx_data tx;
@@ -1908,8 +1910,6 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
led_len = skb->len;
res_prepare = ieee80211_tx_prepare(sdata, &tx, sta, skb);
- tx.flags |= txdata_flags;
-
if (unlikely(res_prepare == TX_DROP)) {
ieee80211_free_txskb(&local->hw, skb);
return true;
@@ -1977,8 +1977,7 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata,
}
void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
- struct sta_info *sta, struct sk_buff *skb,
- u32 txdata_flags)
+ struct sta_info *sta, struct sk_buff *skb)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
@@ -2013,12 +2012,13 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
}
ieee80211_set_qos_hdr(sdata, skb);
- ieee80211_tx(sdata, sta, skb, false, txdata_flags);
+ ieee80211_tx(sdata, sta, skb, false);
}
-static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local,
- struct sk_buff *skb)
+bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
+ struct net_device *dev)
{
+ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
struct ieee80211_radiotap_iterator iterator;
struct ieee80211_radiotap_header *rthdr =
(struct ieee80211_radiotap_header *) skb->data;
@@ -2037,6 +2037,18 @@ static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local,
u8 vht_mcs = 0, vht_nss = 0;
int i;
+ /* check for not even having the fixed radiotap header part */
+ if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header)))
+ return false; /* too short to be possibly valid */
+
+ /* is it a header version we can trust to find length from? */
+ if (unlikely(rthdr->it_version))
+ return false; /* only version 0 is supported */
+
+ /* does the skb contain enough to deliver on the alleged length? */
+ if (unlikely(skb->len < ieee80211_get_radiotap_len(skb->data)))
+ return false; /* skb too short for claimed rt header extent */
+
info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
IEEE80211_TX_CTL_DONTFRAG;
@@ -2084,6 +2096,8 @@ static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local,
txflags = get_unaligned_le16(iterator.this_arg);
if (txflags & IEEE80211_RADIOTAP_F_TX_NOACK)
info->flags |= IEEE80211_TX_CTL_NO_ACK;
+ if (txflags & IEEE80211_RADIOTAP_F_TX_NOSEQNO)
+ info->control.flags |= IEEE80211_TX_CTRL_NO_SEQNO;
break;
case IEEE80211_RADIOTAP_RATE:
@@ -2188,13 +2202,6 @@ static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local,
local->hw.max_rate_tries);
}
- /*
- * remove the radiotap header
- * iterator->_max_length was sanity-checked against
- * skb->len by iterator init
- */
- skb_pull(skb, iterator._max_length);
-
return true;
}
@@ -2203,8 +2210,6 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
{
struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
struct ieee80211_chanctx_conf *chanctx_conf;
- struct ieee80211_radiotap_header *prthdr =
- (struct ieee80211_radiotap_header *)skb->data;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_hdr *hdr;
struct ieee80211_sub_if_data *tmp_sdata, *sdata;
@@ -2212,21 +2217,17 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
u16 len_rthdr;
int hdrlen;
- /* check for not even having the fixed radiotap header part */
- if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header)))
- goto fail; /* too short to be possibly valid */
+ memset(info, 0, sizeof(*info));
+ info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
+ IEEE80211_TX_CTL_INJECTED;
- /* is it a header version we can trust to find length from? */
- if (unlikely(prthdr->it_version))
- goto fail; /* only version 0 is supported */
+ /* Sanity-check and process the injection radiotap header */
+ if (!ieee80211_parse_tx_radiotap(skb, dev))
+ goto fail;
- /* then there must be a radiotap header with a length we can use */
+ /* we now know there is a radiotap header with a length we can use */
len_rthdr = ieee80211_get_radiotap_len(skb->data);
- /* does the skb contain enough to deliver on the alleged length? */
- if (unlikely(skb->len < len_rthdr))
- goto fail; /* skb too short for claimed rt header extent */
-
/*
* fix up the pointers accounting for the radiotap
* header still being in there. We are being given
@@ -2272,11 +2273,6 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
skb->priority = *p & IEEE80211_QOS_CTL_TAG1D_MASK;
}
- memset(info, 0, sizeof(*info));
-
- info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
- IEEE80211_TX_CTL_INJECTED;
-
rcu_read_lock();
/*
@@ -2342,11 +2338,10 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
info->band = chandef->chan->band;
- /* process and remove the injection radiotap header */
- if (!ieee80211_parse_tx_radiotap(local, skb))
- goto fail_rcu;
+ /* remove the injection radiotap header */
+ skb_pull(skb, len_rthdr);
- ieee80211_xmit(sdata, NULL, skb, 0);
+ ieee80211_xmit(sdata, NULL, skb);
rcu_read_unlock();
return NETDEV_TX_OK;
@@ -2382,7 +2377,7 @@ int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata,
} else if (sdata->wdev.use_4addr) {
return -ENOLINK;
}
- /* fall through */
+ fallthrough;
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_OCB:
case NL80211_IFTYPE_ADHOC:
@@ -2552,7 +2547,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
band = chanctx_conf->def.chan->band;
if (sdata->wdev.use_4addr)
break;
- /* fall through */
+ fallthrough;
case NL80211_IFTYPE_AP:
if (sdata->vif.type == NL80211_IFTYPE_AP)
chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
@@ -2999,7 +2994,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
build.hdr_len = 30;
break;
}
- /* fall through */
+ fallthrough;
case NL80211_IFTYPE_AP:
fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
/* DA BSSID SA */
@@ -3618,7 +3613,7 @@ begin:
tx.skb = skb;
tx.sdata = vif_to_sdata(info->control.vif);
- if (txq->sta) {
+ if (txq->sta && !(info->flags & IEEE80211_TX_CTL_INJECTED)) {
tx.sta = container_of(txq->sta, struct sta_info, sta);
/*
* Drop unicast frames to unauthorised stations unless they are
@@ -3710,7 +3705,7 @@ begin:
case NL80211_IFTYPE_AP_VLAN:
tx.sdata = container_of(tx.sdata->bss,
struct ieee80211_sub_if_data, u.ap);
- /* fall through */
+ fallthrough;
default:
vif = &tx.sdata->vif;
break;
@@ -3721,10 +3716,11 @@ encap_out:
if (vif &&
wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) {
+ bool ampdu = txq->ac != IEEE80211_AC_VO;
u32 airtime;
airtime = ieee80211_calc_expected_tx_airtime(hw, vif, txq->sta,
- skb->len);
+ skb->len, ampdu);
if (airtime) {
airtime = ieee80211_info_set_tx_time_est(info, airtime);
ieee80211_sta_update_pending_airtime(local, tx.sta,
@@ -3950,6 +3946,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
if (local->ops->wake_tx_queue) {
u16 queue = __ieee80211_select_queue(sdata, sta, skb);
skb_set_queue_mapping(skb, queue);
+ skb_get_hash(skb);
}
if (sta) {
@@ -4008,7 +4005,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
ieee80211_tx_stats(dev, skb->len);
- ieee80211_xmit(sdata, sta, skb, 0);
+ ieee80211_xmit(sdata, sta, skb);
}
goto out;
out_free:
@@ -4049,7 +4046,7 @@ static bool ieee80211_multicast_to_unicast(struct sk_buff *skb,
return false;
if (sdata->wdev.use_4addr)
return false;
- /* fall through */
+ fallthrough;
case NL80211_IFTYPE_AP:
/* check runtime toggle for this bss */
if (!sdata->bss->multicast_to_unicast)
@@ -4230,11 +4227,12 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))
goto out_free;
+ memset(info, 0, sizeof(*info));
+
if (unlikely(!multicast && skb->sk &&
skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS))
- ieee80211_store_ack_skb(local, skb, &info->flags, NULL);
-
- memset(info, 0, sizeof(*info));
+ info->ack_frame_id = ieee80211_store_ack_skb(local, skb,
+ &info->flags, NULL);
if (unlikely(sdata->control_port_protocol == ehdr->h_proto)) {
if (sdata->control_port_no_encrypt)
@@ -4374,7 +4372,7 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local,
return true;
}
info->band = chanctx_conf->def.chan->band;
- result = ieee80211_tx(sdata, NULL, skb, true, 0);
+ result = ieee80211_tx(sdata, NULL, skb, true);
} else if (info->control.flags & IEEE80211_TX_CTRL_HW_80211_ENCAP) {
if (ieee80211_lookup_ra_sta(sdata, skb, &sta)) {
dev_kfree_skb(skb);
@@ -5333,7 +5331,7 @@ EXPORT_SYMBOL(ieee80211_unreserve_tid);
void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, int tid,
- enum nl80211_band band, u32 txdata_flags)
+ enum nl80211_band band)
{
int ac = ieee80211_ac_from_tid(tid);
@@ -5350,7 +5348,7 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
*/
local_bh_disable();
IEEE80211_SKB_CB(skb)->band = band;
- ieee80211_xmit(sdata, NULL, skb, txdata_flags);
+ ieee80211_xmit(sdata, NULL, skb);
local_bh_enable();
}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 21c94094a699..c8504ffc71a1 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2347,10 +2347,10 @@ int ieee80211_reconfig(struct ieee80211_local *local)
case NL80211_IFTYPE_ADHOC:
if (sdata->vif.bss_conf.ibss_joined)
WARN_ON(drv_join_ibss(local, sdata));
- /* fall through */
+ fallthrough;
default:
ieee80211_reconfig_stations(sdata);
- /* fall through */
+ fallthrough;
case NL80211_IFTYPE_AP: /* AP stations are handled later */
for (i = 0; i < IEEE80211_NUM_ACS; i++)
drv_conf_tx(local, sdata, i,
@@ -2399,7 +2399,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
break;
case NL80211_IFTYPE_ADHOC:
changed |= BSS_CHANGED_IBSS;
- /* fall through */
+ fallthrough;
case NL80211_IFTYPE_AP:
changed |= BSS_CHANGED_SSID | BSS_CHANGED_P2P_PS;
@@ -2414,8 +2414,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
if (rcu_access_pointer(sdata->u.ap.beacon))
drv_start_ap(local, sdata);
}
-
- /* fall through */
+ fallthrough;
case NL80211_IFTYPE_MESH_POINT:
if (sdata->vif.bss_conf.enable_beacon) {
changed |= BSS_CHANGED_BEACON |
@@ -2878,6 +2877,10 @@ void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
if (WARN_ON(!iftd))
return;
+ /* Check for device HE 6 GHz capability before adding element */
+ if (!iftd->he_6ghz_capa.capa)
+ return;
+
cap = le16_to_cpu(iftd->he_6ghz_capa.capa);
cap &= ~IEEE80211_HE_6GHZ_CAP_SM_PS;
@@ -2885,7 +2888,7 @@ void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
case IEEE80211_SMPS_AUTOMATIC:
case IEEE80211_SMPS_NUM_MODES:
WARN_ON(1);
- /* fall through */
+ fallthrough;
case IEEE80211_SMPS_OFF:
cap |= u16_encode_bits(WLAN_HT_CAP_SM_PS_DISABLED,
IEEE80211_HE_6GHZ_CAP_SM_PS);
@@ -3196,7 +3199,7 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info,
break;
case 0x01:
support_80_80 = false;
- /* fall through */
+ fallthrough;
case 0x02:
case 0x03:
ccf1 = ccfs2;
@@ -3566,7 +3569,7 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
break;
default:
WARN_ON(1);
- /* fall through */
+ fallthrough;
case RX_ENC_LEGACY: {
struct ieee80211_supported_band *sband;
int shift = 0;
@@ -3730,6 +3733,11 @@ u32 ieee80211_chandef_downgrade(struct cfg80211_chan_def *c)
c->width = NL80211_CHAN_WIDTH_20_NOHT;
ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
break;
+ case NL80211_CHAN_WIDTH_1:
+ case NL80211_CHAN_WIDTH_2:
+ case NL80211_CHAN_WIDTH_4:
+ case NL80211_CHAN_WIDTH_8:
+ case NL80211_CHAN_WIDTH_16:
case NL80211_CHAN_WIDTH_5:
case NL80211_CHAN_WIDTH_10:
WARN_ON_ONCE(1);
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 72920d82928c..2fb99325135a 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -198,7 +198,7 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata,
sta = rcu_dereference(sdata->u.vlan.sta);
if (sta)
break;
- /* fall through */
+ fallthrough;
case NL80211_IFTYPE_AP:
ra = skb->data;
break;
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index fd30ea61336e..6fdd0c9f865a 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1584,21 +1584,10 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
unsigned int flags;
if (event == NETDEV_REGISTER) {
+ mdev = mpls_add_dev(dev);
+ if (IS_ERR(mdev))
+ return notifier_from_errno(PTR_ERR(mdev));
- /* For now just support Ethernet, IPGRE, IP6GRE, SIT and
- * IPIP devices
- */
- if (dev->type == ARPHRD_ETHER ||
- dev->type == ARPHRD_LOOPBACK ||
- dev->type == ARPHRD_IPGRE ||
- dev->type == ARPHRD_IP6GRE ||
- dev->type == ARPHRD_SIT ||
- dev->type == ARPHRD_TUNNEL ||
- dev->type == ARPHRD_TUNNEL6) {
- mdev = mpls_add_dev(dev);
- if (IS_ERR(mdev))
- return notifier_from_errno(PTR_ERR(mdev));
- }
return NOTIFY_OK;
}
diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig
index a9ed3bf1d93f..698bc3525160 100644
--- a/net/mptcp/Kconfig
+++ b/net/mptcp/Kconfig
@@ -13,17 +13,29 @@ config MPTCP
if MPTCP
+config INET_MPTCP_DIAG
+ depends on INET_DIAG
+ def_tristate INET_DIAG
+
config MPTCP_IPV6
bool "MPTCP: IPv6 support for Multipath TCP"
select IPV6
default y
-config MPTCP_HMAC_TEST
- bool "Tests for MPTCP HMAC implementation"
+endif
+
+config MPTCP_KUNIT_TESTS
+ tristate "This builds the MPTCP KUnit tests" if !KUNIT_ALL_TESTS
+ select MPTCP
+ depends on KUNIT
+ default KUNIT_ALL_TESTS
help
- This option enable boot time self-test for the HMAC implementation
- used by the MPTCP code
+ Currently covers the MPTCP crypto and token helpers.
+ Only useful for kernel devs running KUnit test harness and are not
+ for inclusion into a production build.
- Say N if you are unsure.
+ For more information on KUnit and unit tests in general please refer
+ to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+ If unsure, say N.
-endif
diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
index baa0640527c7..a611968be4d7 100644
--- a/net/mptcp/Makefile
+++ b/net/mptcp/Makefile
@@ -3,3 +3,10 @@ obj-$(CONFIG_MPTCP) += mptcp.o
mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \
mib.o pm_netlink.o
+
+obj-$(CONFIG_SYN_COOKIES) += syncookies.o
+obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
+
+mptcp_crypto_test-objs := crypto_test.o
+mptcp_token_test-objs := token_test.o
+obj-$(CONFIG_MPTCP_KUNIT_TESTS) += mptcp_crypto_test.o mptcp_token_test.o
diff --git a/net/mptcp/crypto.c b/net/mptcp/crypto.c
index 3d980713a9e2..6c4ea979dfd4 100644
--- a/net/mptcp/crypto.c
+++ b/net/mptcp/crypto.c
@@ -87,65 +87,6 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac)
sha256_final(&state, (u8 *)hmac);
}
-#ifdef CONFIG_MPTCP_HMAC_TEST
-struct test_cast {
- char *key;
- char *msg;
- char *result;
-};
-
-/* we can't reuse RFC 4231 test vectors, as we have constraint on the
- * input and key size.
- */
-static struct test_cast tests[] = {
- {
- .key = "0b0b0b0b0b0b0b0b",
- .msg = "48692054",
- .result = "8385e24fb4235ac37556b6b886db106284a1da671699f46db1f235ec622dcafa",
- },
- {
- .key = "aaaaaaaaaaaaaaaa",
- .msg = "dddddddd",
- .result = "2c5e219164ff1dca1c4a92318d847bb6b9d44492984e1eb71aff9022f71046e9",
- },
- {
- .key = "0102030405060708",
- .msg = "cdcdcdcd",
- .result = "e73b9ba9969969cefb04aa0d6df18ec2fcc075b6f23b4d8c4da736a5dbbc6e7d",
- },
-};
-
-static int __init test_mptcp_crypto(void)
-{
- char hmac[32], hmac_hex[65];
- u32 nonce1, nonce2;
- u64 key1, key2;
- u8 msg[8];
- int i, j;
-
- for (i = 0; i < ARRAY_SIZE(tests); ++i) {
- /* mptcp hmap will convert to be before computing the hmac */
- key1 = be64_to_cpu(*((__be64 *)&tests[i].key[0]));
- key2 = be64_to_cpu(*((__be64 *)&tests[i].key[8]));
- nonce1 = be32_to_cpu(*((__be32 *)&tests[i].msg[0]));
- nonce2 = be32_to_cpu(*((__be32 *)&tests[i].msg[4]));
-
- put_unaligned_be32(nonce1, &msg[0]);
- put_unaligned_be32(nonce2, &msg[4]);
-
- mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
- for (j = 0; j < 32; ++j)
- sprintf(&hmac_hex[j << 1], "%02x", hmac[j] & 0xff);
- hmac_hex[64] = 0;
-
- if (memcmp(hmac_hex, tests[i].result, 64))
- pr_err("test %d failed, got %s expected %s", i,
- hmac_hex, tests[i].result);
- else
- pr_info("test %d [ ok ]", i);
- }
- return 0;
-}
-
-late_initcall(test_mptcp_crypto);
+#if IS_MODULE(CONFIG_MPTCP_KUNIT_TESTS)
+EXPORT_SYMBOL_GPL(mptcp_crypto_hmac_sha);
#endif
diff --git a/net/mptcp/crypto_test.c b/net/mptcp/crypto_test.c
new file mode 100644
index 000000000000..017248dea038
--- /dev/null
+++ b/net/mptcp/crypto_test.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <kunit/test.h>
+
+#include "protocol.h"
+
+struct test_case {
+ char *key;
+ char *msg;
+ char *result;
+};
+
+/* we can't reuse RFC 4231 test vectors, as we have constraint on the
+ * input and key size.
+ */
+static struct test_case tests[] = {
+ {
+ .key = "0b0b0b0b0b0b0b0b",
+ .msg = "48692054",
+ .result = "8385e24fb4235ac37556b6b886db106284a1da671699f46db1f235ec622dcafa",
+ },
+ {
+ .key = "aaaaaaaaaaaaaaaa",
+ .msg = "dddddddd",
+ .result = "2c5e219164ff1dca1c4a92318d847bb6b9d44492984e1eb71aff9022f71046e9",
+ },
+ {
+ .key = "0102030405060708",
+ .msg = "cdcdcdcd",
+ .result = "e73b9ba9969969cefb04aa0d6df18ec2fcc075b6f23b4d8c4da736a5dbbc6e7d",
+ },
+};
+
+static void mptcp_crypto_test_basic(struct kunit *test)
+{
+ char hmac[32], hmac_hex[65];
+ u32 nonce1, nonce2;
+ u64 key1, key2;
+ u8 msg[8];
+ int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+ /* mptcp hmap will convert to be before computing the hmac */
+ key1 = be64_to_cpu(*((__be64 *)&tests[i].key[0]));
+ key2 = be64_to_cpu(*((__be64 *)&tests[i].key[8]));
+ nonce1 = be32_to_cpu(*((__be32 *)&tests[i].msg[0]));
+ nonce2 = be32_to_cpu(*((__be32 *)&tests[i].msg[4]));
+
+ put_unaligned_be32(nonce1, &msg[0]);
+ put_unaligned_be32(nonce2, &msg[4]);
+
+ mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
+ for (j = 0; j < 32; ++j)
+ sprintf(&hmac_hex[j << 1], "%02x", hmac[j] & 0xff);
+ hmac_hex[64] = 0;
+
+ KUNIT_EXPECT_STREQ(test, &hmac_hex[0], tests[i].result);
+ }
+}
+
+static struct kunit_case mptcp_crypto_test_cases[] = {
+ KUNIT_CASE(mptcp_crypto_test_basic),
+ {}
+};
+
+static struct kunit_suite mptcp_crypto_suite = {
+ .name = "mptcp-crypto",
+ .test_cases = mptcp_crypto_test_cases,
+};
+
+kunit_test_suite(mptcp_crypto_suite);
+
+MODULE_LICENSE("GPL");
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 8e39585d37f3..54b888f94009 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -112,6 +112,7 @@ static struct pernet_operations mptcp_pernet_ops = {
void __init mptcp_init(void)
{
+ mptcp_join_cookie_init();
mptcp_proto_init();
if (register_pernet_subsys(&mptcp_pernet_ops) < 0)
diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
new file mode 100644
index 000000000000..5f390a97f556
--- /dev/null
+++ b/net/mptcp/mptcp_diag.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+/* MPTCP socket monitoring support
+ *
+ * Copyright (c) 2020 Red Hat
+ *
+ * Author: Paolo Abeni <pabeni@redhat.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/net.h>
+#include <linux/inet_diag.h>
+#include <net/netlink.h>
+#include <uapi/linux/mptcp.h>
+#include "protocol.h"
+
+static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
+ struct netlink_callback *cb,
+ const struct inet_diag_req_v2 *req,
+ struct nlattr *bc, bool net_admin)
+{
+ if (!inet_diag_bc_sk(bc, sk))
+ return 0;
+
+ return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, req, NLM_F_MULTI,
+ net_admin);
+}
+
+static int mptcp_diag_dump_one(struct netlink_callback *cb,
+ const struct inet_diag_req_v2 *req)
+{
+ struct sk_buff *in_skb = cb->skb;
+ struct mptcp_sock *msk = NULL;
+ struct sk_buff *rep;
+ int err = -ENOENT;
+ struct net *net;
+ struct sock *sk;
+
+ net = sock_net(in_skb->sk);
+ msk = mptcp_token_get_sock(req->id.idiag_cookie[0]);
+ if (!msk)
+ goto out_nosk;
+
+ err = -ENOMEM;
+ sk = (struct sock *)msk;
+ rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) +
+ inet_diag_msg_attrs_size() +
+ nla_total_size(sizeof(struct mptcp_info)) +
+ nla_total_size(sizeof(struct inet_diag_meminfo)) + 64,
+ GFP_KERNEL);
+ if (!rep)
+ goto out;
+
+ err = inet_sk_diag_fill(sk, inet_csk(sk), rep, cb, req, 0,
+ netlink_net_capable(in_skb, CAP_NET_ADMIN));
+ if (err < 0) {
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(rep);
+ goto out;
+ }
+ err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
+ MSG_DONTWAIT);
+ if (err > 0)
+ err = 0;
+out:
+ sock_put(sk);
+
+out_nosk:
+ return err;
+}
+
+static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ const struct inet_diag_req_v2 *r)
+{
+ bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
+ struct net *net = sock_net(skb->sk);
+ struct inet_diag_dump_data *cb_data;
+ struct mptcp_sock *msk;
+ struct nlattr *bc;
+
+ cb_data = cb->data;
+ bc = cb_data->inet_diag_nla_bc;
+
+ while ((msk = mptcp_token_iter_next(net, &cb->args[0], &cb->args[1])) !=
+ NULL) {
+ struct inet_sock *inet = (struct inet_sock *)msk;
+ struct sock *sk = (struct sock *)msk;
+ int ret = 0;
+
+ if (!(r->idiag_states & (1 << sk->sk_state)))
+ goto next;
+ if (r->sdiag_family != AF_UNSPEC &&
+ sk->sk_family != r->sdiag_family)
+ goto next;
+ if (r->id.idiag_sport != inet->inet_sport &&
+ r->id.idiag_sport)
+ goto next;
+ if (r->id.idiag_dport != inet->inet_dport &&
+ r->id.idiag_dport)
+ goto next;
+
+ ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin);
+next:
+ sock_put(sk);
+ if (ret < 0) {
+ /* will retry on the same position */
+ cb->args[1]--;
+ break;
+ }
+ cond_resched();
+ }
+}
+
+static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
+ void *_info)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ struct mptcp_info *info = _info;
+ u32 flags = 0;
+ bool slow;
+ u8 val;
+
+ r->idiag_rqueue = sk_rmem_alloc_get(sk);
+ r->idiag_wqueue = sk_wmem_alloc_get(sk);
+ if (!info)
+ return;
+
+ slow = lock_sock_fast(sk);
+ info->mptcpi_subflows = READ_ONCE(msk->pm.subflows);
+ info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled);
+ info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
+ info->mptcpi_subflows_max = READ_ONCE(msk->pm.subflows_max);
+ val = READ_ONCE(msk->pm.add_addr_signal_max);
+ info->mptcpi_add_addr_signal_max = val;
+ val = READ_ONCE(msk->pm.add_addr_accept_max);
+ info->mptcpi_add_addr_accepted_max = val;
+ if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags))
+ flags |= MPTCP_INFO_FLAG_FALLBACK;
+ if (READ_ONCE(msk->can_ack))
+ flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED;
+ info->mptcpi_flags = flags;
+ info->mptcpi_token = READ_ONCE(msk->token);
+ info->mptcpi_write_seq = READ_ONCE(msk->write_seq);
+ info->mptcpi_snd_una = atomic64_read(&msk->snd_una);
+ info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
+ unlock_sock_fast(sk, slow);
+}
+
+static const struct inet_diag_handler mptcp_diag_handler = {
+ .dump = mptcp_diag_dump,
+ .dump_one = mptcp_diag_dump_one,
+ .idiag_get_info = mptcp_diag_get_info,
+ .idiag_type = IPPROTO_MPTCP,
+ .idiag_info_size = sizeof(struct mptcp_info),
+};
+
+static int __init mptcp_diag_init(void)
+{
+ return inet_diag_register(&mptcp_diag_handler);
+}
+
+static void __exit mptcp_diag_exit(void)
+{
+ inet_diag_unregister(&mptcp_diag_handler);
+}
+
+module_init(mptcp_diag_init);
+module_exit(mptcp_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-262 /* AF_INET - IPPROTO_MPTCP */);
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 8f940be42f98..7fa822b55c34 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -451,6 +451,8 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
struct sk_buff *skb, struct mptcp_ext *ext)
{
+ u64 data_fin_tx_seq = READ_ONCE(mptcp_sk(subflow->conn)->write_seq);
+
if (!ext->use_map || !skb->len) {
/* RFC6824 requires a DSS mapping with specific values
* if DATA_FIN is set but no data payload is mapped
@@ -458,10 +460,13 @@ static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
ext->data_fin = 1;
ext->use_map = 1;
ext->dsn64 = 1;
- ext->data_seq = subflow->data_fin_tx_seq;
+ /* The write_seq value has already been incremented, so
+ * the actual sequence number for the DATA_FIN is one less.
+ */
+ ext->data_seq = data_fin_tx_seq - 1;
ext->subflow_seq = 0;
ext->data_len = 1;
- } else if (ext->data_seq + ext->data_len == subflow->data_fin_tx_seq) {
+ } else if (ext->data_seq + ext->data_len == data_fin_tx_seq) {
/* If there's an existing DSS mapping and it is the
* final mapping, DATA_FIN consumes 1 additional byte of
* mapping space.
@@ -477,22 +482,17 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
struct mptcp_out_options *opts)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
unsigned int dss_size = 0;
+ u64 snd_data_fin_enable;
struct mptcp_ext *mpext;
- struct mptcp_sock *msk;
unsigned int ack_size;
bool ret = false;
- u8 tcp_fin;
- if (skb) {
- mpext = mptcp_get_ext(skb);
- tcp_fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
- } else {
- mpext = NULL;
- tcp_fin = 0;
- }
+ mpext = skb ? mptcp_get_ext(skb) : NULL;
+ snd_data_fin_enable = READ_ONCE(msk->snd_data_fin_enable);
- if (!skb || (mpext && mpext->use_map) || tcp_fin) {
+ if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
unsigned int map_size;
map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
@@ -502,7 +502,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
if (mpext)
opts->ext_copy = *mpext;
- if (skb && tcp_fin && subflow->data_fin_tx_enable)
+ if (skb && snd_data_fin_enable)
mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
ret = true;
}
@@ -511,7 +511,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
* if the first subflow may have the already the remote key handy
*/
opts->ext_copy.use_ack = 0;
- msk = mptcp_sk(subflow->conn);
if (!READ_ONCE(msk->can_ack)) {
*size = ALIGN(dss_size, 4);
return ret;
@@ -624,6 +623,9 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
opts->suboptions = 0;
+ if (unlikely(mptcp_check_fallback(sk)))
+ return false;
+
if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts))
ret = true;
else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining,
@@ -706,6 +708,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
* additional ack.
*/
subflow->fully_established = 1;
+ WRITE_ONCE(msk->fully_established, true);
goto fully_established;
}
@@ -714,15 +717,14 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
*/
if (!mp_opt->mp_capable) {
subflow->mp_capable = 0;
- tcp_sk(sk)->is_mptcp = 0;
+ pr_fallback(msk);
+ __mptcp_do_fallback(msk);
return false;
}
if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
- subflow->fully_established = 1;
- subflow->remote_key = mp_opt->sndr_key;
- subflow->can_ack = 1;
+ mptcp_subflow_fully_established(subflow, mp_opt);
fully_established:
if (likely(subflow->pm_notified))
@@ -780,6 +782,22 @@ static void update_una(struct mptcp_sock *msk,
}
}
+bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq)
+{
+ /* Skip if DATA_FIN was already received.
+ * If updating simultaneously with the recvmsg loop, values
+ * should match. If they mismatch, the peer is misbehaving and
+ * we will prefer the most recent information.
+ */
+ if (READ_ONCE(msk->rcv_data_fin) || !READ_ONCE(msk->first))
+ return false;
+
+ WRITE_ONCE(msk->rcv_data_fin_seq, data_fin_seq);
+ WRITE_ONCE(msk->rcv_data_fin, 1);
+
+ return true;
+}
+
static bool add_addr_hmac_valid(struct mptcp_sock *msk,
struct mptcp_options_received *mp_opt)
{
@@ -814,6 +832,9 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
struct mptcp_options_received mp_opt;
struct mptcp_ext *mpext;
+ if (__mptcp_check_fallback(msk))
+ return;
+
mptcp_get_options(skb, &mp_opt);
if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
return;
@@ -847,6 +868,20 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
if (mp_opt.use_ack)
update_una(msk, &mp_opt);
+ /* Zero-data-length packets are dropped by the caller and not
+ * propagated to the MPTCP layer, so the skb extension does not
+ * need to be allocated or populated. DATA_FIN information, if
+ * present, needs to be updated here before the skb is freed.
+ */
+ if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
+ if (mp_opt.data_fin && mp_opt.data_len == 1 &&
+ mptcp_update_rcv_data_fin(msk, mp_opt.data_seq) &&
+ schedule_work(&msk->work))
+ sock_hold(subflow->conn);
+
+ return;
+ }
+
mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
if (!mpext)
return;
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 977d9c8b1453..a8ad20559aaa 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -10,8 +10,6 @@
#include <net/mptcp.h>
#include "protocol.h"
-static struct workqueue_struct *pm_wq;
-
/* path manager command handlers */
int mptcp_pm_announce_addr(struct mptcp_sock *msk,
@@ -78,7 +76,7 @@ static bool mptcp_pm_schedule_work(struct mptcp_sock *msk,
return false;
msk->pm.status |= BIT(new_status);
- if (queue_work(pm_wq, &msk->pm.work))
+ if (schedule_work(&msk->work))
sock_hold((struct sock *)msk);
return true;
}
@@ -181,35 +179,6 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
return mptcp_pm_nl_get_local_id(msk, skc);
}
-static void pm_worker(struct work_struct *work)
-{
- struct mptcp_pm_data *pm = container_of(work, struct mptcp_pm_data,
- work);
- struct mptcp_sock *msk = container_of(pm, struct mptcp_sock, pm);
- struct sock *sk = (struct sock *)msk;
-
- lock_sock(sk);
- spin_lock_bh(&msk->pm.lock);
-
- pr_debug("msk=%p status=%x", msk, pm->status);
- if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) {
- pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED);
- mptcp_pm_nl_add_addr_received(msk);
- }
- if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) {
- pm->status &= ~BIT(MPTCP_PM_ESTABLISHED);
- mptcp_pm_nl_fully_established(msk);
- }
- if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) {
- pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED);
- mptcp_pm_nl_subflow_established(msk);
- }
-
- spin_unlock_bh(&msk->pm.lock);
- release_sock(sk);
- sock_put(sk);
-}
-
void mptcp_pm_data_init(struct mptcp_sock *msk)
{
msk->pm.add_addr_signaled = 0;
@@ -223,22 +192,11 @@ void mptcp_pm_data_init(struct mptcp_sock *msk)
msk->pm.status = 0;
spin_lock_init(&msk->pm.lock);
- INIT_WORK(&msk->pm.work, pm_worker);
mptcp_pm_nl_data_init(msk);
}
-void mptcp_pm_close(struct mptcp_sock *msk)
-{
- if (cancel_work_sync(&msk->pm.work))
- sock_put((struct sock *)msk);
-}
-
-void mptcp_pm_init(void)
+void __init mptcp_pm_init(void)
{
- pm_wq = alloc_workqueue("pm_wq", WQ_UNBOUND | WQ_MEM_RECLAIM, 8);
- if (!pm_wq)
- panic("Failed to allocate workqueue");
-
mptcp_pm_nl_init();
}
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index b78edf237ba0..c8820c4156e6 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -851,7 +851,7 @@ static struct pernet_operations mptcp_pm_pernet_ops = {
.size = sizeof(struct pm_nl_pernet),
};
-void mptcp_pm_nl_init(void)
+void __init mptcp_pm_nl_init(void)
{
if (register_pernet_subsys(&mptcp_pm_pernet_ops) < 0)
panic("Failed to register MPTCP PM pernet subsystem.\n");
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index c0abe738e7d3..8c1d1a595701 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -16,6 +16,7 @@
#include <net/inet_hashtables.h>
#include <net/protocol.h>
#include <net/tcp.h>
+#include <net/tcp_states.h>
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
#include <net/transp_v6.h>
#endif
@@ -52,18 +53,10 @@ static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
return msk->subflow;
}
-static bool __mptcp_needs_tcp_fallback(const struct mptcp_sock *msk)
-{
- return msk->first && !sk_is_mptcp(msk->first);
-}
-
-static struct socket *mptcp_is_tcpsk(struct sock *sk)
+static bool mptcp_is_tcpsk(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
- if (sock->sk != sk)
- return NULL;
-
if (unlikely(sk->sk_prot == &tcp_prot)) {
/* we are being invoked after mptcp_accept() has
* accepted a non-mp-capable flow: sk is a tcp_sk,
@@ -73,59 +66,37 @@ static struct socket *mptcp_is_tcpsk(struct sock *sk)
* bypass mptcp.
*/
sock->ops = &inet_stream_ops;
- return sock;
+ return true;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
} else if (unlikely(sk->sk_prot == &tcpv6_prot)) {
sock->ops = &inet6_stream_ops;
- return sock;
+ return true;
#endif
}
- return NULL;
+ return false;
}
-static struct socket *__mptcp_tcp_fallback(struct mptcp_sock *msk)
+static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
{
- struct socket *sock;
-
sock_owned_by_me((const struct sock *)msk);
- sock = mptcp_is_tcpsk((struct sock *)msk);
- if (unlikely(sock))
- return sock;
-
- if (likely(!__mptcp_needs_tcp_fallback(msk)))
+ if (likely(!__mptcp_check_fallback(msk)))
return NULL;
- return msk->subflow;
-}
-
-static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk)
-{
- return !msk->first;
+ return msk->first;
}
-static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state)
+static int __mptcp_socket_create(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
struct socket *ssock;
int err;
- ssock = __mptcp_tcp_fallback(msk);
- if (unlikely(ssock))
- return ssock;
-
- ssock = __mptcp_nmpc_socket(msk);
- if (ssock)
- goto set_state;
-
- if (!__mptcp_can_create_subflow(msk))
- return ERR_PTR(-EINVAL);
-
err = mptcp_subflow_create_socket(sk, &ssock);
if (err)
- return ERR_PTR(err);
+ return err;
msk->first = ssock->sk;
msk->subflow = ssock;
@@ -133,10 +104,12 @@ static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state)
list_add(&subflow->node, &msk->conn_list);
subflow->request_mptcp = 1;
-set_state:
- if (state != MPTCP_SAME_STATE)
- inet_sk_state_store(sk, state);
- return ssock;
+ /* accept() will wait on first subflow sk_wq, and we always wakes up
+ * via msk->sk_socket
+ */
+ RCU_INIT_POINTER(msk->first->sk_wq, &sk->sk_socket->wq);
+
+ return 0;
}
static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
@@ -170,6 +143,14 @@ static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
MPTCP_SKB_CB(skb)->offset = offset;
}
+static void mptcp_stop_timer(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
+ mptcp_sk(sk)->timer_ival = 0;
+}
+
/* both sockets must be locked */
static bool mptcp_subflow_dsn_valid(const struct mptcp_sock *msk,
struct sock *ssk)
@@ -191,6 +172,139 @@ static bool mptcp_subflow_dsn_valid(const struct mptcp_sock *msk,
return mptcp_subflow_data_available(ssk);
}
+static void mptcp_check_data_fin_ack(struct sock *sk)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ if (__mptcp_check_fallback(msk))
+ return;
+
+ /* Look for an acknowledged DATA_FIN */
+ if (((1 << sk->sk_state) &
+ (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK)) &&
+ msk->write_seq == atomic64_read(&msk->snd_una)) {
+ mptcp_stop_timer(sk);
+
+ WRITE_ONCE(msk->snd_data_fin_enable, 0);
+
+ switch (sk->sk_state) {
+ case TCP_FIN_WAIT1:
+ inet_sk_state_store(sk, TCP_FIN_WAIT2);
+ sk->sk_state_change(sk);
+ break;
+ case TCP_CLOSING:
+ fallthrough;
+ case TCP_LAST_ACK:
+ inet_sk_state_store(sk, TCP_CLOSE);
+ sk->sk_state_change(sk);
+ break;
+ }
+
+ if (sk->sk_shutdown == SHUTDOWN_MASK ||
+ sk->sk_state == TCP_CLOSE)
+ sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
+ else
+ sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
+ }
+}
+
+static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ if (READ_ONCE(msk->rcv_data_fin) &&
+ ((1 << sk->sk_state) &
+ (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2))) {
+ u64 rcv_data_fin_seq = READ_ONCE(msk->rcv_data_fin_seq);
+
+ if (msk->ack_seq == rcv_data_fin_seq) {
+ if (seq)
+ *seq = rcv_data_fin_seq;
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
+{
+ long tout = ssk && inet_csk(ssk)->icsk_pending ?
+ inet_csk(ssk)->icsk_timeout - jiffies : 0;
+
+ if (tout <= 0)
+ tout = mptcp_sk(sk)->timer_ival;
+ mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN;
+}
+
+static void mptcp_check_data_fin(struct sock *sk)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ u64 rcv_data_fin_seq;
+
+ if (__mptcp_check_fallback(msk) || !msk->first)
+ return;
+
+ /* Need to ack a DATA_FIN received from a peer while this side
+ * of the connection is in ESTABLISHED, FIN_WAIT1, or FIN_WAIT2.
+ * msk->rcv_data_fin was set when parsing the incoming options
+ * at the subflow level and the msk lock was not held, so this
+ * is the first opportunity to act on the DATA_FIN and change
+ * the msk state.
+ *
+ * If we are caught up to the sequence number of the incoming
+ * DATA_FIN, send the DATA_ACK now and do state transition. If
+ * not caught up, do nothing and let the recv code send DATA_ACK
+ * when catching up.
+ */
+
+ if (mptcp_pending_data_fin(sk, &rcv_data_fin_seq)) {
+ struct mptcp_subflow_context *subflow;
+
+ msk->ack_seq++;
+ WRITE_ONCE(msk->rcv_data_fin, 0);
+
+ sk->sk_shutdown |= RCV_SHUTDOWN;
+ smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
+ set_bit(MPTCP_DATA_READY, &msk->flags);
+
+ switch (sk->sk_state) {
+ case TCP_ESTABLISHED:
+ inet_sk_state_store(sk, TCP_CLOSE_WAIT);
+ break;
+ case TCP_FIN_WAIT1:
+ inet_sk_state_store(sk, TCP_CLOSING);
+ break;
+ case TCP_FIN_WAIT2:
+ inet_sk_state_store(sk, TCP_CLOSE);
+ // @@ Close subflows now?
+ break;
+ default:
+ /* Other states not expected */
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ mptcp_set_timeout(sk, NULL);
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ lock_sock(ssk);
+ tcp_send_ack(ssk);
+ release_sock(ssk);
+ }
+
+ sk->sk_state_change(sk);
+
+ if (sk->sk_shutdown == SHUTDOWN_MASK ||
+ sk->sk_state == TCP_CLOSE)
+ sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
+ else
+ sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
+ }
+}
+
static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
struct sock *ssk,
unsigned int *bytes)
@@ -207,13 +321,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
return false;
}
- if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
- int rcvbuf = max(ssk->sk_rcvbuf, sk->sk_rcvbuf);
-
- if (rcvbuf > sk->sk_rcvbuf)
- sk->sk_rcvbuf = rcvbuf;
- }
-
tp = tcp_sk(ssk);
do {
u32 map_remaining, offset;
@@ -229,6 +336,15 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
if (!skb)
break;
+ if (__mptcp_check_fallback(msk)) {
+ /* if we are running under the workqueue, TCP could have
+ * collapsed skbs between dummy map creation and now
+ * be sure to adjust the size
+ */
+ map_remaining = skb->len;
+ subflow->map_data_len = skb->len;
+ }
+
offset = seq - TCP_SKB_CB(skb)->seq;
fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
if (fin) {
@@ -265,6 +381,15 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
*bytes = moved;
+ /* If the moves have caught up with the DATA_FIN sequence number
+ * it's time to ack the DATA_FIN and change socket state, but
+ * this is not a good place to change state. Let the workqueue
+ * do it.
+ */
+ if (mptcp_pending_data_fin(sk, NULL) &&
+ schedule_work(&msk->work))
+ sock_hold(sk);
+
return done;
}
@@ -329,16 +454,6 @@ static void __mptcp_flush_join_list(struct mptcp_sock *msk)
spin_unlock_bh(&msk->join_list_lock);
}
-static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
-{
- long tout = ssk && inet_csk(ssk)->icsk_pending ?
- inet_csk(ssk)->icsk_timeout - jiffies : 0;
-
- if (tout <= 0)
- tout = mptcp_sk(sk)->timer_ival;
- mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN;
-}
-
static bool mptcp_timer_pending(struct sock *sk)
{
return timer_pending(&inet_csk(sk)->icsk_retransmit_timer);
@@ -360,7 +475,8 @@ void mptcp_data_acked(struct sock *sk)
{
mptcp_reset_timer(sk);
- if (!sk_stream_is_writeable(sk) &&
+ if ((!sk_stream_is_writeable(sk) ||
+ (inet_sk_state_load(sk) != TCP_ESTABLISHED)) &&
schedule_work(&mptcp_sk(sk)->work))
sock_hold(sk);
}
@@ -395,14 +511,6 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk)
}
}
-static void mptcp_stop_timer(struct sock *sk)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
-
- sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
- mptcp_sk(sk)->timer_ival = 0;
-}
-
static bool mptcp_ext_cache_refill(struct mptcp_sock *msk)
{
const struct sock *sk = (const struct sock *)msk;
@@ -466,8 +574,15 @@ static void mptcp_clean_una(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_data_frag *dtmp, *dfrag;
- u64 snd_una = atomic64_read(&msk->snd_una);
bool cleaned = false;
+ u64 snd_una;
+
+ /* on fallback we just need to ignore snd_una, as this is really
+ * plain TCP
+ */
+ if (__mptcp_check_fallback(msk))
+ atomic64_set(&msk->snd_una, msk->write_seq);
+ snd_una = atomic64_read(&msk->snd_una);
list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) {
if (after64(dfrag->data_seq + dfrag->data_len, snd_una))
@@ -479,15 +594,20 @@ static void mptcp_clean_una(struct sock *sk)
dfrag = mptcp_rtx_head(sk);
if (dfrag && after64(snd_una, dfrag->data_seq)) {
- u64 delta = dfrag->data_seq + dfrag->data_len - snd_una;
+ u64 delta = snd_una - dfrag->data_seq;
+
+ if (WARN_ON_ONCE(delta > dfrag->data_len))
+ goto out;
dfrag->data_seq += delta;
+ dfrag->offset += delta;
dfrag->data_len -= delta;
dfrag_uncharge(sk, delta);
cleaned = true;
}
+out:
if (cleaned) {
sk_mem_reclaim_partial(sk);
@@ -673,7 +793,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
out:
if (!retransmission)
pfrag->offset += frag_truesize;
- *write_seq += ret;
+ WRITE_ONCE(*write_seq, *write_seq + ret);
mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
return ret;
@@ -740,7 +860,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int mss_now = 0, size_goal = 0, ret = 0;
struct mptcp_sock *msk = mptcp_sk(sk);
struct page_frag *pfrag;
- struct socket *ssock;
size_t copied = 0;
struct sock *ssk;
bool tx_ok;
@@ -759,19 +878,15 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto out;
}
-fallback:
- ssock = __mptcp_tcp_fallback(msk);
- if (unlikely(ssock)) {
- release_sock(sk);
- pr_debug("fallback passthrough");
- ret = sock_sendmsg(ssock, msg);
- return ret >= 0 ? ret + copied : (copied ? copied : ret);
- }
-
pfrag = sk_page_frag(sk);
restart:
mptcp_clean_una(sk);
+ if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) {
+ ret = -EPIPE;
+ goto out;
+ }
+
wait_for_sndbuf:
__mptcp_flush_join_list(msk);
ssk = mptcp_subflow_get_send(msk);
@@ -819,17 +934,6 @@ wait_for_sndbuf:
}
break;
}
- if (ret == 0 && unlikely(__mptcp_needs_tcp_fallback(msk))) {
- /* Can happen for passive sockets:
- * 3WHS negotiated MPTCP, but first packet after is
- * plain TCP (e.g. due to middlebox filtering unknown
- * options).
- *
- * Fall back to TCP.
- */
- release_sock(ssk);
- goto fallback;
- }
copied += ret;
@@ -880,7 +984,6 @@ wait_for_sndbuf:
mptcp_set_timeout(sk, ssk);
if (copied) {
- ret = copied;
tcp_push(ssk, msg->msg_flags, mss_now, tcp_sk(ssk)->nonagle,
size_goal);
@@ -893,7 +996,7 @@ wait_for_sndbuf:
release_sock(ssk);
out:
release_sock(sk);
- return ret;
+ return copied ? : ret;
}
static void mptcp_wait_data(struct sock *sk, long *timeo)
@@ -949,6 +1052,100 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
return copied;
}
+/* receive buffer autotuning. See tcp_rcv_space_adjust for more information.
+ *
+ * Only difference: Use highest rtt estimate of the subflows in use.
+ */
+static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
+{
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ u32 time, advmss = 1;
+ u64 rtt_us, mstamp;
+
+ sock_owned_by_me(sk);
+
+ if (copied <= 0)
+ return;
+
+ msk->rcvq_space.copied += copied;
+
+ mstamp = div_u64(tcp_clock_ns(), NSEC_PER_USEC);
+ time = tcp_stamp_us_delta(mstamp, msk->rcvq_space.time);
+
+ rtt_us = msk->rcvq_space.rtt_us;
+ if (rtt_us && time < (rtt_us >> 3))
+ return;
+
+ rtt_us = 0;
+ mptcp_for_each_subflow(msk, subflow) {
+ const struct tcp_sock *tp;
+ u64 sf_rtt_us;
+ u32 sf_advmss;
+
+ tp = tcp_sk(mptcp_subflow_tcp_sock(subflow));
+
+ sf_rtt_us = READ_ONCE(tp->rcv_rtt_est.rtt_us);
+ sf_advmss = READ_ONCE(tp->advmss);
+
+ rtt_us = max(sf_rtt_us, rtt_us);
+ advmss = max(sf_advmss, advmss);
+ }
+
+ msk->rcvq_space.rtt_us = rtt_us;
+ if (time < (rtt_us >> 3) || rtt_us == 0)
+ return;
+
+ if (msk->rcvq_space.copied <= msk->rcvq_space.space)
+ goto new_measure;
+
+ if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
+ !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
+ int rcvmem, rcvbuf;
+ u64 rcvwin, grow;
+
+ rcvwin = ((u64)msk->rcvq_space.copied << 1) + 16 * advmss;
+
+ grow = rcvwin * (msk->rcvq_space.copied - msk->rcvq_space.space);
+
+ do_div(grow, msk->rcvq_space.space);
+ rcvwin += (grow << 1);
+
+ rcvmem = SKB_TRUESIZE(advmss + MAX_TCP_HEADER);
+ while (tcp_win_from_space(sk, rcvmem) < advmss)
+ rcvmem += 128;
+
+ do_div(rcvwin, advmss);
+ rcvbuf = min_t(u64, rcvwin * rcvmem,
+ sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+
+ if (rcvbuf > sk->sk_rcvbuf) {
+ u32 window_clamp;
+
+ window_clamp = tcp_win_from_space(sk, rcvbuf);
+ WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
+
+ /* Make subflows follow along. If we do not do this, we
+ * get drops at subflow level if skbs can't be moved to
+ * the mptcp rx queue fast enough (announced rcv_win can
+ * exceed ssk->sk_rcvbuf).
+ */
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk;
+
+ ssk = mptcp_subflow_tcp_sock(subflow);
+ WRITE_ONCE(ssk->sk_rcvbuf, rcvbuf);
+ tcp_sk(ssk)->window_clamp = window_clamp;
+ }
+ }
+ }
+
+ msk->rcvq_space.space = msk->rcvq_space.copied;
+new_measure:
+ msk->rcvq_space.copied = 0;
+ msk->rcvq_space.time = mstamp;
+}
+
static bool __mptcp_move_skbs(struct mptcp_sock *msk)
{
unsigned int moved = 0;
@@ -972,7 +1169,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *ssock;
int copied = 0;
int target;
long timeo;
@@ -981,16 +1177,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return -EOPNOTSUPP;
lock_sock(sk);
- ssock = __mptcp_tcp_fallback(msk);
- if (unlikely(ssock)) {
-fallback:
- release_sock(sk);
- pr_debug("fallback-read subflow=%p",
- mptcp_subflow_ctx(ssock->sk));
- copied = sock_recvmsg(ssock, msg, flags);
- return copied;
- }
-
timeo = sock_rcvtimeo(sk, nonblock);
len = min_t(size_t, len, INT_MAX);
@@ -1056,9 +1242,6 @@ fallback:
pr_debug("block timeout %ld", timeo);
mptcp_wait_data(sk, &timeo);
- ssock = __mptcp_tcp_fallback(msk);
- if (unlikely(ssock))
- goto fallback;
}
if (skb_queue_empty(&sk->sk_receive_queue)) {
@@ -1075,6 +1258,8 @@ fallback:
set_bit(MPTCP_DATA_READY, &msk->flags);
}
out_err:
+ mptcp_rcv_space_adjust(msk, copied);
+
release_sock(sk);
return copied;
}
@@ -1083,7 +1268,7 @@ static void mptcp_retransmit_handler(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- if (atomic64_read(&msk->snd_una) == msk->write_seq) {
+ if (atomic64_read(&msk->snd_una) == READ_ONCE(msk->write_seq)) {
mptcp_stop_timer(sk);
} else {
set_bit(MPTCP_WORK_RTX, &msk->flags);
@@ -1172,6 +1357,29 @@ static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
return 0;
}
+static void pm_work(struct mptcp_sock *msk)
+{
+ struct mptcp_pm_data *pm = &msk->pm;
+
+ spin_lock_bh(&msk->pm.lock);
+
+ pr_debug("msk=%p status=%x", msk, pm->status);
+ if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) {
+ pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED);
+ mptcp_pm_nl_add_addr_received(msk);
+ }
+ if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) {
+ pm->status &= ~BIT(MPTCP_PM_ESTABLISHED);
+ mptcp_pm_nl_fully_established(msk);
+ }
+ if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) {
+ pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED);
+ mptcp_pm_nl_subflow_established(msk);
+ }
+
+ spin_unlock_bh(&msk->pm.lock);
+}
+
static void mptcp_worker(struct work_struct *work)
{
struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
@@ -1185,12 +1393,18 @@ static void mptcp_worker(struct work_struct *work)
lock_sock(sk);
mptcp_clean_una(sk);
+ mptcp_check_data_fin_ack(sk);
__mptcp_flush_join_list(msk);
__mptcp_move_skbs(msk);
+ if (msk->pm.status)
+ pm_work(msk);
+
if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
mptcp_check_for_eof(msk);
+ mptcp_check_data_fin(sk);
+
if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
goto unlock;
@@ -1283,7 +1497,12 @@ static int mptcp_init_sock(struct sock *sk)
if (ret)
return ret;
+ ret = __mptcp_socket_create(mptcp_sk(sk));
+ if (ret)
+ return ret;
+
sk_sockets_allocated_inc(sk);
+ sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[2];
return 0;
@@ -1308,8 +1527,7 @@ static void mptcp_cancel_work(struct sock *sk)
sock_put(sk);
}
-static void mptcp_subflow_shutdown(struct sock *ssk, int how,
- bool data_fin_tx_enable, u64 data_fin_tx_seq)
+static void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
{
lock_sock(ssk);
@@ -1322,36 +1540,84 @@ static void mptcp_subflow_shutdown(struct sock *ssk, int how,
tcp_disconnect(ssk, O_NONBLOCK);
break;
default:
- if (data_fin_tx_enable) {
- struct mptcp_subflow_context *subflow;
-
- subflow = mptcp_subflow_ctx(ssk);
- subflow->data_fin_tx_seq = data_fin_tx_seq;
- subflow->data_fin_tx_enable = 1;
+ if (__mptcp_check_fallback(mptcp_sk(sk))) {
+ pr_debug("Fallback");
+ ssk->sk_shutdown |= how;
+ tcp_shutdown(ssk, how);
+ } else {
+ pr_debug("Sending DATA_FIN on subflow %p", ssk);
+ mptcp_set_timeout(sk, ssk);
+ tcp_send_ack(ssk);
}
-
- ssk->sk_shutdown |= how;
- tcp_shutdown(ssk, how);
break;
}
- /* Wake up anyone sleeping in poll. */
- ssk->sk_state_change(ssk);
release_sock(ssk);
}
-/* Called with msk lock held, releases such lock before returning */
+static const unsigned char new_state[16] = {
+ /* current state: new state: action: */
+ [0 /* (Invalid) */] = TCP_CLOSE,
+ [TCP_ESTABLISHED] = TCP_FIN_WAIT1 | TCP_ACTION_FIN,
+ [TCP_SYN_SENT] = TCP_CLOSE,
+ [TCP_SYN_RECV] = TCP_FIN_WAIT1 | TCP_ACTION_FIN,
+ [TCP_FIN_WAIT1] = TCP_FIN_WAIT1,
+ [TCP_FIN_WAIT2] = TCP_FIN_WAIT2,
+ [TCP_TIME_WAIT] = TCP_CLOSE, /* should not happen ! */
+ [TCP_CLOSE] = TCP_CLOSE,
+ [TCP_CLOSE_WAIT] = TCP_LAST_ACK | TCP_ACTION_FIN,
+ [TCP_LAST_ACK] = TCP_LAST_ACK,
+ [TCP_LISTEN] = TCP_CLOSE,
+ [TCP_CLOSING] = TCP_CLOSING,
+ [TCP_NEW_SYN_RECV] = TCP_CLOSE, /* should not happen ! */
+};
+
+static int mptcp_close_state(struct sock *sk)
+{
+ int next = (int)new_state[sk->sk_state];
+ int ns = next & TCP_STATE_MASK;
+
+ inet_sk_state_store(sk, ns);
+
+ return next & TCP_ACTION_FIN;
+}
+
static void mptcp_close(struct sock *sk, long timeout)
{
struct mptcp_subflow_context *subflow, *tmp;
struct mptcp_sock *msk = mptcp_sk(sk);
LIST_HEAD(conn_list);
- u64 data_fin_tx_seq;
lock_sock(sk);
+ sk->sk_shutdown = SHUTDOWN_MASK;
+
+ if (sk->sk_state == TCP_LISTEN) {
+ inet_sk_state_store(sk, TCP_CLOSE);
+ goto cleanup;
+ } else if (sk->sk_state == TCP_CLOSE) {
+ goto cleanup;
+ }
+
+ if (__mptcp_check_fallback(msk)) {
+ goto update_state;
+ } else if (mptcp_close_state(sk)) {
+ pr_debug("Sending DATA_FIN sk=%p", sk);
+ WRITE_ONCE(msk->write_seq, msk->write_seq + 1);
+ WRITE_ONCE(msk->snd_data_fin_enable, 1);
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
+
+ mptcp_subflow_shutdown(sk, tcp_sk, SHUTDOWN_MASK);
+ }
+ }
+
+ sk_stream_wait_close(sk, timeout);
+update_state:
inet_sk_state_store(sk, TCP_CLOSE);
+cleanup:
/* be sure to always acquire the join list lock, to sync vs
* mptcp_finish_join().
*/
@@ -1360,22 +1626,16 @@ static void mptcp_close(struct sock *sk, long timeout)
spin_unlock_bh(&msk->join_list_lock);
list_splice_init(&msk->conn_list, &conn_list);
- data_fin_tx_seq = msk->write_seq;
-
__mptcp_clear_xmit(sk);
release_sock(sk);
list_for_each_entry_safe(subflow, tmp, &conn_list, node) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-
- subflow->data_fin_tx_seq = data_fin_tx_seq;
- subflow->data_fin_tx_enable = 1;
__mptcp_close_ssk(sk, ssk, subflow, timeout);
}
mptcp_cancel_work(sk);
- mptcp_pm_close(msk);
__skb_queue_purge(&sk->sk_receive_queue);
@@ -1447,20 +1707,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->local_key = subflow_req->local_key;
msk->token = subflow_req->token;
msk->subflow = NULL;
-
- if (unlikely(mptcp_token_new_accept(subflow_req->token, nsk))) {
- nsk->sk_state = TCP_CLOSE;
- bh_unlock_sock(nsk);
-
- /* we can't call into mptcp_close() here - possible BH context
- * free the sock directly.
- * sk_clone_lock() sets nsk refcnt to two, hence call sk_free()
- * too.
- */
- sk_common_release(nsk);
- sk_free(nsk);
- return NULL;
- }
+ WRITE_ONCE(msk->fully_established, false);
msk->write_seq = subflow_req->idsn + 1;
atomic64_set(&msk->snd_una, msk->write_seq);
@@ -1482,6 +1729,22 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
return nsk;
}
+void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
+{
+ const struct tcp_sock *tp = tcp_sk(ssk);
+
+ msk->rcvq_space.copied = 0;
+ msk->rcvq_space.rtt_us = 0;
+
+ msk->rcvq_space.time = tp->tcp_mstamp;
+
+ /* initial rcv_space offering made to peer */
+ msk->rcvq_space.space = min_t(u32, tp->rcv_wnd,
+ TCP_INIT_CWND * tp->advmss);
+ if (msk->rcvq_space.space == 0)
+ msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT;
+}
+
static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
bool kern)
{
@@ -1501,7 +1764,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
return NULL;
pr_debug("msk=%p, subflow is mptcp=%d", msk, sk_is_mptcp(newsk));
-
if (sk_is_mptcp(newsk)) {
struct mptcp_subflow_context *subflow;
struct sock *new_mptcp_sock;
@@ -1529,8 +1791,8 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
newsk = new_mptcp_sock;
mptcp_copy_inaddrs(newsk, ssk);
list_add(&subflow->node, &msk->conn_list);
- inet_sk_state_store(newsk, TCP_ESTABLISHED);
+ mptcp_rcv_space_init(msk, ssk);
bh_unlock_sock(new_mptcp_sock);
__MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
@@ -1547,21 +1809,82 @@ static void mptcp_destroy(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- mptcp_token_destroy(msk->token);
+ mptcp_token_destroy(msk);
if (msk->cached_ext)
__skb_ext_put(msk->cached_ext);
sk_sockets_allocated_dec(sk);
}
+static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
+ sockptr_t optval, unsigned int optlen)
+{
+ struct sock *sk = (struct sock *)msk;
+ struct socket *ssock;
+ int ret;
+
+ switch (optname) {
+ case SO_REUSEPORT:
+ case SO_REUSEADDR:
+ lock_sock(sk);
+ ssock = __mptcp_nmpc_socket(msk);
+ if (!ssock) {
+ release_sock(sk);
+ return -EINVAL;
+ }
+
+ ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen);
+ if (ret == 0) {
+ if (optname == SO_REUSEPORT)
+ sk->sk_reuseport = ssock->sk->sk_reuseport;
+ else if (optname == SO_REUSEADDR)
+ sk->sk_reuse = ssock->sk->sk_reuse;
+ }
+ release_sock(sk);
+ return ret;
+ }
+
+ return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen);
+}
+
+static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
+ sockptr_t optval, unsigned int optlen)
+{
+ struct sock *sk = (struct sock *)msk;
+ int ret = -EOPNOTSUPP;
+ struct socket *ssock;
+
+ switch (optname) {
+ case IPV6_V6ONLY:
+ lock_sock(sk);
+ ssock = __mptcp_nmpc_socket(msk);
+ if (!ssock) {
+ release_sock(sk);
+ return -EINVAL;
+ }
+
+ ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen);
+ if (ret == 0)
+ sk->sk_ipv6only = ssock->sk->sk_ipv6only;
+
+ release_sock(sk);
+ break;
+ }
+
+ return ret;
+}
+
static int mptcp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *ssock;
+ struct sock *ssk;
pr_debug("msk=%p", msk);
+ if (level == SOL_SOCKET)
+ return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen);
+
/* @@ the meaning of setsockopt() when the socket is connected and
* there are multiple subflows is not yet defined. It is up to the
* MPTCP-level socket to configure the subflows until the subflow
@@ -1569,11 +1892,13 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname,
* to the one remaining subflow.
*/
lock_sock(sk);
- ssock = __mptcp_tcp_fallback(msk);
+ ssk = __mptcp_tcp_fallback(msk);
release_sock(sk);
- if (ssock)
- return tcp_setsockopt(ssock->sk, level, optname, optval,
- optlen);
+ if (ssk)
+ return tcp_setsockopt(ssk, level, optname, optval, optlen);
+
+ if (level == SOL_IPV6)
+ return mptcp_setsockopt_v6(msk, optname, optval, optlen);
return -EOPNOTSUPP;
}
@@ -1582,7 +1907,7 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *option)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *ssock;
+ struct sock *ssk;
pr_debug("msk=%p", msk);
@@ -1593,11 +1918,10 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname,
* to the one remaining subflow.
*/
lock_sock(sk);
- ssock = __mptcp_tcp_fallback(msk);
+ ssk = __mptcp_tcp_fallback(msk);
release_sock(sk);
- if (ssock)
- return tcp_getsockopt(ssock->sk, level, optname, optval,
- option);
+ if (ssk)
+ return tcp_getsockopt(ssk, level, optname, optval, option);
return -EOPNOTSUPP;
}
@@ -1636,6 +1960,20 @@ static void mptcp_release_cb(struct sock *sk)
}
}
+static int mptcp_hash(struct sock *sk)
+{
+ /* should never be called,
+ * we hash the TCP subflows not the master socket
+ */
+ WARN_ON_ONCE(1);
+ return 0;
+}
+
+static void mptcp_unhash(struct sock *sk)
+{
+ /* called from sk_common_release(), but nothing to do here */
+}
+
static int mptcp_get_port(struct sock *sk, unsigned short snum)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1660,32 +1998,26 @@ void mptcp_finish_connect(struct sock *ssk)
sk = subflow->conn;
msk = mptcp_sk(sk);
- if (!subflow->mp_capable) {
- MPTCP_INC_STATS(sock_net(sk),
- MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
- return;
- }
-
pr_debug("msk=%p, token=%u", sk, subflow->token);
mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq);
ack_seq++;
subflow->map_seq = ack_seq;
subflow->map_subflow_seq = 1;
- subflow->rel_write_seq = 1;
/* the socket is not connected yet, no msk/subflow ops can access/race
* accessing the field below
*/
WRITE_ONCE(msk->remote_key, subflow->remote_key);
WRITE_ONCE(msk->local_key, subflow->local_key);
- WRITE_ONCE(msk->token, subflow->token);
WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
WRITE_ONCE(msk->ack_seq, ack_seq);
WRITE_ONCE(msk->can_ack, 1);
atomic64_set(&msk->snd_una, msk->write_seq);
mptcp_pm_new_connection(msk, 0);
+
+ mptcp_rcv_space_init(msk, ssk);
}
static void mptcp_sock_graft(struct sock *sk, struct socket *parent)
@@ -1708,7 +2040,7 @@ bool mptcp_finish_join(struct sock *sk)
pr_debug("msk=%p, subflow=%p", msk, subflow);
/* mptcp socket already closing? */
- if (inet_sk_state_load(parent) != TCP_ESTABLISHED)
+ if (!mptcp_is_fully_established(parent))
return false;
if (!msk->pm.server_side)
@@ -1761,8 +2093,8 @@ static struct proto mptcp_prot = {
.sendmsg = mptcp_sendmsg,
.recvmsg = mptcp_recvmsg,
.release_cb = mptcp_release_cb,
- .hash = inet_hash,
- .unhash = inet_unhash,
+ .hash = mptcp_hash,
+ .unhash = mptcp_unhash,
.get_port = mptcp_get_port,
.sockets_allocated = &mptcp_sockets_allocated,
.memory_allocated = &tcp_memory_allocated,
@@ -1771,6 +2103,7 @@ static struct proto mptcp_prot = {
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
.sysctl_mem = sysctl_tcp_mem,
.obj_size = sizeof(struct mptcp_sock),
+ .slab_flags = SLAB_TYPESAFE_BY_RCU,
.no_autobind = true,
};
@@ -1781,9 +2114,9 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
int err;
lock_sock(sock->sk);
- ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE);
- if (IS_ERR(ssock)) {
- err = PTR_ERR(ssock);
+ ssock = __mptcp_nmpc_socket(msk);
+ if (!ssock) {
+ err = -EINVAL;
goto unlock;
}
@@ -1796,10 +2129,18 @@ unlock:
return err;
}
+static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow)
+{
+ subflow->request_mptcp = 0;
+ __mptcp_do_fallback(msk);
+}
+
static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
+ struct mptcp_subflow_context *subflow;
struct socket *ssock;
int err;
@@ -1812,19 +2153,24 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
goto do_connect;
}
- ssock = __mptcp_socket_create(msk, TCP_SYN_SENT);
- if (IS_ERR(ssock)) {
- err = PTR_ERR(ssock);
+ ssock = __mptcp_nmpc_socket(msk);
+ if (!ssock) {
+ err = -EINVAL;
goto unlock;
}
+ mptcp_token_destroy(msk);
+ inet_sk_state_store(sock->sk, TCP_SYN_SENT);
+ subflow = mptcp_subflow_ctx(ssock->sk);
#ifdef CONFIG_TCP_MD5SIG
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
* TCP option space.
*/
if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
- mptcp_subflow_ctx(ssock->sk)->request_mptcp = 0;
+ mptcp_subflow_early_fallback(msk, subflow);
#endif
+ if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk))
+ mptcp_subflow_early_fallback(msk, subflow);
do_connect:
err = ssock->ops->connect(ssock, uaddr, addr_len, flags);
@@ -1843,42 +2189,6 @@ unlock:
return err;
}
-static int mptcp_v4_getname(struct socket *sock, struct sockaddr *uaddr,
- int peer)
-{
- if (sock->sk->sk_prot == &tcp_prot) {
- /* we are being invoked from __sys_accept4, after
- * mptcp_accept() has just accepted a non-mp-capable
- * flow: sk is a tcp_sk, not an mptcp one.
- *
- * Hand the socket over to tcp so all further socket ops
- * bypass mptcp.
- */
- sock->ops = &inet_stream_ops;
- }
-
- return inet_getname(sock, uaddr, peer);
-}
-
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
-static int mptcp_v6_getname(struct socket *sock, struct sockaddr *uaddr,
- int peer)
-{
- if (sock->sk->sk_prot == &tcpv6_prot) {
- /* we are being invoked from __sys_accept4 after
- * mptcp_accept() has accepted a non-mp-capable
- * subflow: sk is a tcp_sk, not mptcp.
- *
- * Hand the socket over to tcp so all further
- * socket ops bypass mptcp.
- */
- sock->ops = &inet6_stream_ops;
- }
-
- return inet6_getname(sock, uaddr, peer);
-}
-#endif
-
static int mptcp_listen(struct socket *sock, int backlog)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
@@ -1888,12 +2198,14 @@ static int mptcp_listen(struct socket *sock, int backlog)
pr_debug("msk=%p", msk);
lock_sock(sock->sk);
- ssock = __mptcp_socket_create(msk, TCP_LISTEN);
- if (IS_ERR(ssock)) {
- err = PTR_ERR(ssock);
+ ssock = __mptcp_nmpc_socket(msk);
+ if (!ssock) {
+ err = -EINVAL;
goto unlock;
}
+ mptcp_token_destroy(msk);
+ inet_sk_state_store(sock->sk, TCP_LISTEN);
sock_set_flag(sock->sk, SOCK_RCU_FREE);
err = ssock->ops->listen(ssock, backlog);
@@ -1906,15 +2218,6 @@ unlock:
return err;
}
-static bool is_tcp_proto(const struct proto *p)
-{
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- return p == &tcp_prot || p == &tcpv6_prot;
-#else
- return p == &tcp_prot;
-#endif
-}
-
static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
int flags, bool kern)
{
@@ -1932,11 +2235,12 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (!ssock)
goto unlock_fail;
+ clear_bit(MPTCP_DATA_READY, &msk->flags);
sock_hold(ssock->sk);
release_sock(sock->sk);
err = ssock->ops->accept(sock, newsock, flags, kern);
- if (err == 0 && !is_tcp_proto(newsock->sk->sk_prot)) {
+ if (err == 0 && !mptcp_is_tcpsk(newsock->sk)) {
struct mptcp_sock *msk = mptcp_sk(newsock->sk);
struct mptcp_subflow_context *subflow;
@@ -1944,7 +2248,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
* This is needed so NOSPACE flag can be set from tcp stack.
*/
__mptcp_flush_join_list(msk);
- list_for_each_entry(subflow, &msk->conn_list, node) {
+ mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
if (!ssk->sk_socket)
@@ -1952,6 +2256,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
}
}
+ if (inet_csk_listen_poll(ssock->sk))
+ set_bit(MPTCP_DATA_READY, &msk->flags);
sock_put(ssock->sk);
return err;
@@ -1960,39 +2266,36 @@ unlock_fail:
return -EINVAL;
}
+static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
+{
+ return test_bit(MPTCP_DATA_READY, &msk->flags) ? EPOLLIN | EPOLLRDNORM :
+ 0;
+}
+
static __poll_t mptcp_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait)
{
struct sock *sk = sock->sk;
struct mptcp_sock *msk;
- struct socket *ssock;
__poll_t mask = 0;
+ int state;
msk = mptcp_sk(sk);
- lock_sock(sk);
- ssock = __mptcp_tcp_fallback(msk);
- if (!ssock)
- ssock = __mptcp_nmpc_socket(msk);
- if (ssock) {
- mask = ssock->ops->poll(file, ssock, wait);
- release_sock(sk);
- return mask;
- }
-
- release_sock(sk);
sock_poll_wait(file, sock, wait);
- lock_sock(sk);
- if (test_bit(MPTCP_DATA_READY, &msk->flags))
- mask = EPOLLIN | EPOLLRDNORM;
- if (sk_stream_is_writeable(sk) &&
- test_bit(MPTCP_SEND_SPACE, &msk->flags))
- mask |= EPOLLOUT | EPOLLWRNORM;
+ state = inet_sk_state_load(sk);
+ if (state == TCP_LISTEN)
+ return mptcp_check_readable(msk);
+
+ if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) {
+ mask |= mptcp_check_readable(msk);
+ if (sk_stream_is_writeable(sk) &&
+ test_bit(MPTCP_SEND_SPACE, &msk->flags))
+ mask |= EPOLLOUT | EPOLLWRNORM;
+ }
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
- release_sock(sk);
-
return mask;
}
@@ -2000,23 +2303,13 @@ static int mptcp_shutdown(struct socket *sock, int how)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
struct mptcp_subflow_context *subflow;
- struct socket *ssock;
int ret = 0;
pr_debug("sk=%p, how=%d", msk, how);
lock_sock(sock->sk);
- ssock = __mptcp_tcp_fallback(msk);
- if (ssock) {
- release_sock(sock->sk);
- return inet_shutdown(ssock, how);
- }
-
- if (how == SHUT_WR || how == SHUT_RDWR)
- inet_sk_state_store(sock->sk, TCP_FIN_WAIT1);
how++;
-
if ((how & ~SHUTDOWN_MASK) || !how) {
ret = -EINVAL;
goto out_unlock;
@@ -2030,13 +2323,36 @@ static int mptcp_shutdown(struct socket *sock, int how)
sock->state = SS_CONNECTED;
}
- __mptcp_flush_join_list(msk);
- mptcp_for_each_subflow(msk, subflow) {
- struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
+ /* If we've already sent a FIN, or it's a closed state, skip this. */
+ if (__mptcp_check_fallback(msk)) {
+ if (how == SHUT_WR || how == SHUT_RDWR)
+ inet_sk_state_store(sock->sk, TCP_FIN_WAIT1);
- mptcp_subflow_shutdown(tcp_sk, how, 1, msk->write_seq);
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
+
+ mptcp_subflow_shutdown(sock->sk, tcp_sk, how);
+ }
+ } else if ((how & SEND_SHUTDOWN) &&
+ ((1 << sock->sk->sk_state) &
+ (TCPF_ESTABLISHED | TCPF_SYN_SENT |
+ TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) &&
+ mptcp_close_state(sock->sk)) {
+ __mptcp_flush_join_list(msk);
+
+ WRITE_ONCE(msk->write_seq, msk->write_seq + 1);
+ WRITE_ONCE(msk->snd_data_fin_enable, 1);
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
+
+ mptcp_subflow_shutdown(sock->sk, tcp_sk, how);
+ }
}
+ /* Wake up anyone sleeping in poll. */
+ sock->sk->sk_state_change(sock->sk);
+
out_unlock:
release_sock(sock->sk);
@@ -2051,7 +2367,7 @@ static const struct proto_ops mptcp_stream_ops = {
.connect = mptcp_stream_connect,
.socketpair = sock_no_socketpair,
.accept = mptcp_stream_accept,
- .getname = mptcp_v4_getname,
+ .getname = inet_getname,
.poll = mptcp_poll,
.ioctl = inet_ioctl,
.gettstamp = sock_gettstamp,
@@ -2063,10 +2379,6 @@ static const struct proto_ops mptcp_stream_ops = {
.recvmsg = inet_recvmsg,
.mmap = sock_no_mmap,
.sendpage = inet_sendpage,
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_sock_common_setsockopt,
- .compat_getsockopt = compat_sock_common_getsockopt,
-#endif
};
static struct inet_protosw mptcp_protosw = {
@@ -2077,7 +2389,7 @@ static struct inet_protosw mptcp_protosw = {
.flags = INET_PROTOSW_ICSK,
};
-void mptcp_proto_init(void)
+void __init mptcp_proto_init(void)
{
mptcp_prot.h.hashinfo = tcp_prot.h.hashinfo;
@@ -2086,6 +2398,7 @@ void mptcp_proto_init(void)
mptcp_subflow_init();
mptcp_pm_init();
+ mptcp_token_init();
if (proto_register(&mptcp_prot, 1) != 0)
panic("Failed to register MPTCP proto.\n");
@@ -2104,7 +2417,7 @@ static const struct proto_ops mptcp_v6_stream_ops = {
.connect = mptcp_stream_connect,
.socketpair = sock_no_socketpair,
.accept = mptcp_stream_accept,
- .getname = mptcp_v6_getname,
+ .getname = inet6_getname,
.poll = mptcp_poll,
.ioctl = inet6_ioctl,
.gettstamp = sock_gettstamp,
@@ -2118,8 +2431,6 @@ static const struct proto_ops mptcp_v6_stream_ops = {
.sendpage = inet_sendpage,
#ifdef CONFIG_COMPAT
.compat_ioctl = inet6_compat_ioctl,
- .compat_setsockopt = compat_sock_common_setsockopt,
- .compat_getsockopt = compat_sock_common_getsockopt,
#endif
};
@@ -2139,7 +2450,7 @@ static struct inet_protosw mptcp_v6_protosw = {
.flags = INET_PROTOSW_ICSK,
};
-int mptcp_proto_v6_init(void)
+int __init mptcp_proto_v6_init(void)
{
int err;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index c6eeaf3e8dcb..60b27d44c184 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -89,6 +89,7 @@
#define MPTCP_SEND_SPACE 1
#define MPTCP_WORK_RTX 2
#define MPTCP_WORK_EOF 3
+#define MPTCP_FALLBACK_DONE 4
struct mptcp_options_received {
u64 sndr_key;
@@ -173,8 +174,6 @@ struct mptcp_pm_data {
u8 local_addr_max;
u8 subflows_max;
u8 status;
-
- struct work_struct work;
};
struct mptcp_data_frag {
@@ -194,11 +193,15 @@ struct mptcp_sock {
u64 remote_key;
u64 write_seq;
u64 ack_seq;
+ u64 rcv_data_fin_seq;
atomic64_t snd_una;
unsigned long timer_ival;
u32 token;
unsigned long flags;
bool can_ack;
+ bool fully_established;
+ bool rcv_data_fin;
+ bool snd_data_fin_enable;
spinlock_t join_list_lock;
struct work_struct work;
struct list_head conn_list;
@@ -208,6 +211,12 @@ struct mptcp_sock {
struct socket *subflow; /* outgoing connect/listener/!mp_capable */
struct sock *first;
struct mptcp_pm_data pm;
+ struct {
+ u32 space; /* bytes copied in last measurement window */
+ u32 copied; /* bytes copied in this measurement window */
+ u64 time; /* start time of measurement window */
+ u64 rtt_us; /* last maximum rtt of subflows */
+ } rcvq_space;
};
#define mptcp_for_each_subflow(__msk, __subflow) \
@@ -250,6 +259,7 @@ struct mptcp_subflow_request_sock {
u32 local_nonce;
u32 remote_nonce;
struct mptcp_sock *msk;
+ struct hlist_nulls_node token_node;
};
static inline struct mptcp_subflow_request_sock *
@@ -284,10 +294,8 @@ struct mptcp_subflow_context {
backup : 1,
data_avail : 1,
rx_eof : 1,
- data_fin_tx_enable : 1,
use_64bit_ack : 1, /* Set when we received a 64-bit DSN */
can_ack : 1; /* only after processing the remote a key */
- u64 data_fin_tx_seq;
u32 remote_nonce;
u64 thmac;
u32 local_nonce;
@@ -336,8 +344,10 @@ mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
}
int mptcp_is_enabled(struct net *net);
+void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
+ struct mptcp_options_received *mp_opt);
bool mptcp_subflow_data_available(struct sock *sk);
-void mptcp_subflow_init(void);
+void __init mptcp_subflow_init(void);
/* called with sk socket lock held */
int __mptcp_subflow_connect(struct sock *sk, int ifindex,
@@ -355,14 +365,9 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
}
-extern const struct inet_connection_sock_af_ops ipv4_specific;
+void __init mptcp_proto_init(void);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
-extern const struct inet_connection_sock_af_ops ipv6_specific;
-#endif
-
-void mptcp_proto_init(void);
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
-int mptcp_proto_v6_init(void);
+int __init mptcp_proto_v6_init(void);
#endif
struct sock *mptcp_sk_clone(const struct sock *sk,
@@ -372,36 +377,41 @@ void mptcp_get_options(const struct sk_buff *skb,
struct mptcp_options_received *mp_opt);
void mptcp_finish_connect(struct sock *sk);
+static inline bool mptcp_is_fully_established(struct sock *sk)
+{
+ return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
+ READ_ONCE(mptcp_sk(sk)->fully_established);
+}
+void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk);
void mptcp_data_ready(struct sock *sk, struct sock *ssk);
bool mptcp_finish_join(struct sock *sk);
void mptcp_data_acked(struct sock *sk);
void mptcp_subflow_eof(struct sock *sk);
+bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq);
+
+void __init mptcp_token_init(void);
+static inline void mptcp_token_init_request(struct request_sock *req)
+{
+ mptcp_subflow_rsk(req)->token_node.pprev = NULL;
+}
int mptcp_token_new_request(struct request_sock *req);
-void mptcp_token_destroy_request(u32 token);
+void mptcp_token_destroy_request(struct request_sock *req);
int mptcp_token_new_connect(struct sock *sk);
-int mptcp_token_new_accept(u32 token, struct sock *conn);
+void mptcp_token_accept(struct mptcp_subflow_request_sock *r,
+ struct mptcp_sock *msk);
+bool mptcp_token_exists(u32 token);
struct mptcp_sock *mptcp_token_get_sock(u32 token);
-void mptcp_token_destroy(u32 token);
+struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot,
+ long *s_num);
+void mptcp_token_destroy(struct mptcp_sock *msk);
void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn);
-static inline void mptcp_crypto_key_gen_sha(u64 *key, u32 *token, u64 *idsn)
-{
- /* we might consider a faster version that computes the key as a
- * hash of some information available in the MPTCP socket. Use
- * random data at the moment, as it's probably the safest option
- * in case multiple sockets are opened in different namespaces at
- * the same time.
- */
- get_random_bytes(key, sizeof(u64));
- mptcp_crypto_key_sha(*key, token, idsn);
-}
void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
-void mptcp_pm_init(void);
+void __init mptcp_pm_init(void);
void mptcp_pm_data_init(struct mptcp_sock *msk);
-void mptcp_pm_close(struct mptcp_sock *msk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side);
void mptcp_pm_fully_established(struct mptcp_sock *msk);
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
@@ -433,7 +443,7 @@ bool mptcp_pm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
struct mptcp_addr_info *saddr);
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
-void mptcp_pm_nl_init(void);
+void __init mptcp_pm_nl_init(void);
void mptcp_pm_nl_data_init(struct mptcp_sock *msk);
void mptcp_pm_nl_fully_established(struct mptcp_sock *msk);
void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk);
@@ -454,4 +464,66 @@ static inline bool before64(__u64 seq1, __u64 seq2)
void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops);
+static inline bool __mptcp_check_fallback(struct mptcp_sock *msk)
+{
+ return test_bit(MPTCP_FALLBACK_DONE, &msk->flags);
+}
+
+static inline bool mptcp_check_fallback(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+ return __mptcp_check_fallback(msk);
+}
+
+static inline void __mptcp_do_fallback(struct mptcp_sock *msk)
+{
+ if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) {
+ pr_debug("TCP fallback already done (msk=%p)", msk);
+ return;
+ }
+ set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
+}
+
+static inline void mptcp_do_fallback(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+ __mptcp_do_fallback(msk);
+}
+
+#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a)
+
+static inline bool subflow_simultaneous_connect(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct sock *parent = subflow->conn;
+
+ return sk->sk_state == TCP_ESTABLISHED &&
+ !mptcp_sk(parent)->pm.server_side &&
+ !subflow->conn_finished;
+}
+
+#ifdef CONFIG_SYN_COOKIES
+void subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req,
+ struct sk_buff *skb);
+bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req,
+ struct sk_buff *skb);
+void __init mptcp_join_cookie_init(void);
+#else
+static inline void
+subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req,
+ struct sk_buff *skb) {}
+static inline bool
+mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req,
+ struct sk_buff *skb)
+{
+ return false;
+}
+
+static inline void mptcp_join_cookie_init(void) {}
+#endif
+
#endif /* __MPTCP_PROTOCOL_H */
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 3838a0b3a21f..a4cc4591bd4e 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -29,40 +29,6 @@ static void SUBFLOW_REQ_INC_STATS(struct request_sock *req,
MPTCP_INC_STATS(sock_net(req_to_sk(req)), field);
}
-static int subflow_rebuild_header(struct sock *sk)
-{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- int local_id, err = 0;
-
- if (subflow->request_mptcp && !subflow->token) {
- pr_debug("subflow=%p", sk);
- err = mptcp_token_new_connect(sk);
- } else if (subflow->request_join && !subflow->local_nonce) {
- struct mptcp_sock *msk = (struct mptcp_sock *)subflow->conn;
-
- pr_debug("subflow=%p", sk);
-
- do {
- get_random_bytes(&subflow->local_nonce, sizeof(u32));
- } while (!subflow->local_nonce);
-
- if (subflow->local_id)
- goto out;
-
- local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)sk);
- if (local_id < 0)
- return -EINVAL;
-
- subflow->local_id = local_id;
- }
-
-out:
- if (err)
- return err;
-
- return subflow->icsk_af_ops->rebuild_header(sk);
-}
-
static void subflow_req_destructor(struct request_sock *req)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
@@ -72,8 +38,7 @@ static void subflow_req_destructor(struct request_sock *req)
if (subflow_req->msk)
sock_put((struct sock *)subflow_req->msk);
- if (subflow_req->mp_capable)
- mptcp_token_destroy_request(subflow_req->token);
+ mptcp_token_destroy_request(req);
tcp_request_sock_ops.destructor(req);
}
@@ -88,6 +53,12 @@ static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
}
+static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk)
+{
+ return mptcp_is_fully_established((void *)msk) &&
+ READ_ONCE(msk->pm.accept_subflow);
+}
+
/* validate received token and create truncated hmac and nonce for SYN-ACK */
static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
const struct sk_buff *skb)
@@ -120,30 +91,43 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
return msk;
}
-static void subflow_init_req(struct request_sock *req,
- const struct sock *sk_listener,
- struct sk_buff *skb)
+static int __subflow_init_req(struct request_sock *req, const struct sock *sk_listener)
{
- struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
- struct mptcp_options_received mp_opt;
-
- pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
-
- mptcp_get_options(skb, &mp_opt);
subflow_req->mp_capable = 0;
subflow_req->mp_join = 0;
subflow_req->msk = NULL;
+ mptcp_token_init_request(req);
#ifdef CONFIG_TCP_MD5SIG
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
* TCP option space.
*/
if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
- return;
+ return -EINVAL;
#endif
+ return 0;
+}
+
+static void subflow_init_req(struct request_sock *req,
+ const struct sock *sk_listener,
+ struct sk_buff *skb)
+{
+ struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
+ struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
+ struct mptcp_options_received mp_opt;
+ int ret;
+
+ pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
+
+ ret = __subflow_init_req(req, sk_listener);
+ if (ret)
+ return;
+
+ mptcp_get_options(skb, &mp_opt);
+
if (mp_opt.mp_capable) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
@@ -154,13 +138,33 @@ static void subflow_init_req(struct request_sock *req,
}
if (mp_opt.mp_capable && listener->request_mptcp) {
- int err;
+ int err, retries = 4;
+
+ subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
+again:
+ do {
+ get_random_bytes(&subflow_req->local_key, sizeof(subflow_req->local_key));
+ } while (subflow_req->local_key == 0);
+
+ if (unlikely(req->syncookie)) {
+ mptcp_crypto_key_sha(subflow_req->local_key,
+ &subflow_req->token,
+ &subflow_req->idsn);
+ if (mptcp_token_exists(subflow_req->token)) {
+ if (retries-- > 0)
+ goto again;
+ } else {
+ subflow_req->mp_capable = 1;
+ }
+ return;
+ }
err = mptcp_token_new_request(req);
if (err == 0)
subflow_req->mp_capable = 1;
+ else if (retries-- > 0)
+ goto again;
- subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
} else if (mp_opt.mp_join && listener->request_mptcp) {
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
subflow_req->mp_join = 1;
@@ -169,11 +173,60 @@ static void subflow_init_req(struct request_sock *req,
subflow_req->token = mp_opt.token;
subflow_req->remote_nonce = mp_opt.nonce;
subflow_req->msk = subflow_token_join_request(req, skb);
+
+ if (unlikely(req->syncookie) && subflow_req->msk) {
+ if (mptcp_can_accept_new_subflow(subflow_req->msk))
+ subflow_init_req_cookie_join_save(subflow_req, skb);
+ }
+
pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token,
subflow_req->remote_nonce, subflow_req->msk);
}
}
+int mptcp_subflow_init_cookie_req(struct request_sock *req,
+ const struct sock *sk_listener,
+ struct sk_buff *skb)
+{
+ struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
+ struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
+ struct mptcp_options_received mp_opt;
+ int err;
+
+ err = __subflow_init_req(req, sk_listener);
+ if (err)
+ return err;
+
+ mptcp_get_options(skb, &mp_opt);
+
+ if (mp_opt.mp_capable && mp_opt.mp_join)
+ return -EINVAL;
+
+ if (mp_opt.mp_capable && listener->request_mptcp) {
+ if (mp_opt.sndr_key == 0)
+ return -EINVAL;
+
+ subflow_req->local_key = mp_opt.rcvr_key;
+ err = mptcp_token_new_request(req);
+ if (err)
+ return err;
+
+ subflow_req->mp_capable = 1;
+ subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1;
+ } else if (mp_opt.mp_join && listener->request_mptcp) {
+ if (!mptcp_token_join_cookie_init_state(subflow_req, skb))
+ return -EINVAL;
+
+ if (mptcp_can_accept_new_subflow(subflow_req->msk))
+ subflow_req->mp_join = 1;
+
+ subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mptcp_subflow_init_cookie_req);
+
static void subflow_v4_init_req(struct request_sock *req,
const struct sock *sk_listener,
struct sk_buff *skb)
@@ -222,7 +275,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_options_received mp_opt;
struct sock *parent = subflow->conn;
- struct tcp_sock *tp = tcp_sk(sk);
subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
@@ -235,46 +287,40 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (subflow->conn_finished)
return;
+ subflow->rel_write_seq = 1;
subflow->conn_finished = 1;
+ subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
+ pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
mptcp_get_options(skb, &mp_opt);
- if (subflow->request_mptcp && mp_opt.mp_capable) {
+ if (subflow->request_mptcp) {
+ if (!mp_opt.mp_capable) {
+ MPTCP_INC_STATS(sock_net(sk),
+ MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
+ mptcp_do_fallback(sk);
+ pr_fallback(mptcp_sk(subflow->conn));
+ goto fallback;
+ }
+
subflow->mp_capable = 1;
subflow->can_ack = 1;
subflow->remote_key = mp_opt.sndr_key;
pr_debug("subflow=%p, remote_key=%llu", subflow,
subflow->remote_key);
- } else if (subflow->request_join && mp_opt.mp_join) {
- subflow->mp_join = 1;
+ mptcp_finish_connect(sk);
+ } else if (subflow->request_join) {
+ u8 hmac[SHA256_DIGEST_SIZE];
+
+ if (!mp_opt.mp_join)
+ goto do_reset;
+
subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce;
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
subflow->thmac, subflow->remote_nonce);
- } else if (subflow->request_mptcp) {
- tp->is_mptcp = 0;
- }
-
- if (!tp->is_mptcp)
- return;
- if (subflow->mp_capable) {
- pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
- subflow->remote_key);
- mptcp_finish_connect(sk);
-
- if (skb) {
- pr_debug("synack seq=%u", TCP_SKB_CB(skb)->seq);
- subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
- }
- } else if (subflow->mp_join) {
- u8 hmac[SHA256_DIGEST_SIZE];
-
- pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u",
- subflow, subflow->thmac,
- subflow->remote_nonce);
if (!subflow_thmac_valid(subflow)) {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
- subflow->mp_join = 0;
goto do_reset;
}
@@ -282,24 +328,26 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->local_nonce,
subflow->remote_nonce,
hmac);
-
memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
- if (skb)
- subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
-
if (!mptcp_finish_join(sk))
goto do_reset;
+ subflow->mp_join = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
- } else {
-do_reset:
- tcp_send_active_reset(sk, GFP_ATOMIC);
- tcp_done(sk);
+ } else if (mptcp_check_fallback(sk)) {
+fallback:
+ mptcp_rcv_space_init(mptcp_sk(parent), sk);
}
+ return;
+
+do_reset:
+ tcp_send_active_reset(sk, GFP_ATOMIC);
+ tcp_done(sk);
}
-static struct request_sock_ops subflow_request_sock_ops;
+struct request_sock_ops mptcp_subflow_request_sock_ops;
+EXPORT_SYMBOL_GPL(mptcp_subflow_request_sock_ops);
static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
@@ -312,7 +360,7 @@ static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
goto drop;
- return tcp_conn_request(&subflow_request_sock_ops,
+ return tcp_conn_request(&mptcp_subflow_request_sock_ops,
&subflow_request_sock_ipv4_ops,
sk, skb);
drop:
@@ -337,7 +385,7 @@ static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (!ipv6_unicast_destination(skb))
goto drop;
- return tcp_conn_request(&subflow_request_sock_ops,
+ return tcp_conn_request(&mptcp_subflow_request_sock_ops,
&subflow_request_sock_ipv6_ops, sk, skb);
drop:
@@ -386,7 +434,7 @@ static void mptcp_sock_destruct(struct sock *sk)
sock_orphan(sk);
}
- mptcp_token_destroy(mptcp_sk(sk)->token);
+ mptcp_token_destroy(mptcp_sk(sk));
inet_sock_destruct(sk);
}
@@ -421,6 +469,17 @@ static void subflow_drop_ctx(struct sock *ssk)
kfree_rcu(ctx, rcu);
}
+void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
+ struct mptcp_options_received *mp_opt)
+{
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+ subflow->remote_key = mp_opt->sndr_key;
+ subflow->fully_established = 1;
+ subflow->can_ack = 1;
+ WRITE_ONCE(msk->fully_established, true);
+}
+
static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct sk_buff *skb,
struct request_sock *req,
@@ -444,7 +503,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
/* hopefully temporary handling for MP_JOIN+syncookie */
subflow_req = mptcp_subflow_rsk(req);
- fallback_is_fatal = subflow_req->mp_join;
+ fallback_is_fatal = tcp_rsk(req)->is_mptcp && subflow_req->mp_join;
fallback = !tcp_rsk(req)->is_mptcp;
if (fallback)
goto create_child;
@@ -472,6 +531,7 @@ create_msk:
} else if (subflow_req->mp_join) {
mptcp_get_options(skb, &mp_opt);
if (!mp_opt.mp_join ||
+ !mptcp_can_accept_new_subflow(subflow_req->msk) ||
!subflow_hmac_valid(req, &mp_opt)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
fallback = true;
@@ -500,20 +560,25 @@ create_child:
}
if (ctx->mp_capable) {
+ /* this can't race with mptcp_close(), as the msk is
+ * not yet exposted to user-space
+ */
+ inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED);
+
/* new mpc subflow takes ownership of the newly
* created mptcp socket
*/
new_msk->sk_destruct = mptcp_sock_destruct;
mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
+ mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
ctx->conn = new_msk;
new_msk = NULL;
/* with OoO packets we can reach here without ingress
* mpc option
*/
- ctx->remote_key = mp_opt.sndr_key;
- ctx->fully_established = mp_opt.mp_capable;
- ctx->can_ack = mp_opt.mp_capable;
+ if (mp_opt.mp_capable)
+ mptcp_subflow_fully_established(ctx, &mp_opt);
} else if (ctx->mp_join) {
struct mptcp_sock *owner;
@@ -548,9 +613,9 @@ out:
dispose_child:
subflow_drop_ctx(child);
tcp_rsk(req)->drop_req = true;
- tcp_send_active_reset(child, GFP_ATOMIC);
inet_csk_prepare_for_destroy_sock(child);
tcp_done(child);
+ req->rsk_ops->send_reset(sk, skb);
/* The last child reference will be released by the caller */
return child;
@@ -562,7 +627,8 @@ enum mapping_status {
MAPPING_OK,
MAPPING_INVALID,
MAPPING_EMPTY,
- MAPPING_DATA_FIN
+ MAPPING_DATA_FIN,
+ MAPPING_DUMMY
};
static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
@@ -614,7 +680,8 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
return true;
}
-static enum mapping_status get_mapping_status(struct sock *ssk)
+static enum mapping_status get_mapping_status(struct sock *ssk,
+ struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct mptcp_ext *mpext;
@@ -626,6 +693,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk)
if (!skb)
return MAPPING_EMPTY;
+ if (mptcp_check_fallback(ssk))
+ return MAPPING_DUMMY;
+
mpext = mptcp_get_ext(skb);
if (!mpext || !mpext->use_map) {
if (!subflow->map_valid && !skb->len) {
@@ -661,7 +731,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk)
if (mpext->data_fin == 1) {
if (data_len == 1) {
- pr_debug("DATA_FIN with no payload");
+ mptcp_update_rcv_data_fin(msk, mpext->data_seq);
+ pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq);
if (subflow->map_valid) {
/* A DATA_FIN might arrive in a DSS
* option before the previous mapping
@@ -673,6 +744,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk)
} else {
return MAPPING_DATA_FIN;
}
+ } else {
+ mptcp_update_rcv_data_fin(msk, mpext->data_seq + data_len);
+ pr_debug("DATA_FIN with mapping seq=%llu", mpext->data_seq + data_len);
}
/* Adjust for DATA_FIN using 1 byte of sequence space */
@@ -761,12 +835,22 @@ static bool subflow_check_data_avail(struct sock *ssk)
u64 ack_seq;
u64 old_ack;
- status = get_mapping_status(ssk);
+ status = get_mapping_status(ssk, msk);
pr_debug("msk=%p ssk=%p status=%d", msk, ssk, status);
if (status == MAPPING_INVALID) {
ssk->sk_err = EBADMSG;
goto fatal;
}
+ if (status == MAPPING_DUMMY) {
+ __mptcp_do_fallback(msk);
+ skb = skb_peek(&ssk->sk_receive_queue);
+ subflow->map_valid = 1;
+ subflow->map_seq = READ_ONCE(msk->ack_seq);
+ subflow->map_data_len = skb->len;
+ subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
+ subflow->ssn_offset;
+ return true;
+ }
if (status != MAPPING_OK)
return false;
@@ -889,15 +973,20 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
static void subflow_data_ready(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ u16 state = 1 << inet_sk_state_load(sk);
struct sock *parent = subflow->conn;
+ struct mptcp_sock *msk;
- if (!subflow->mp_capable && !subflow->mp_join) {
- subflow->tcp_data_ready(sk);
-
+ msk = mptcp_sk(parent);
+ if (state & TCPF_LISTEN) {
+ set_bit(MPTCP_DATA_READY, &msk->flags);
parent->sk_data_ready(parent);
return;
}
+ WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
+ !subflow->mp_join && !(state & TCPF_CLOSE));
+
if (mptcp_subflow_data_available(sk))
mptcp_data_ready(parent, sk);
}
@@ -974,19 +1063,34 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex,
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_subflow_context *subflow;
struct sockaddr_storage addr;
+ int local_id = loc->id;
struct socket *sf;
+ struct sock *ssk;
u32 remote_token;
int addrlen;
int err;
- if (sk->sk_state != TCP_ESTABLISHED)
+ if (!mptcp_is_fully_established(sk))
return -ENOTCONN;
err = mptcp_subflow_create_socket(sk, &sf);
if (err)
return err;
- subflow = mptcp_subflow_ctx(sf->sk);
+ ssk = sf->sk;
+ subflow = mptcp_subflow_ctx(ssk);
+ do {
+ get_random_bytes(&subflow->local_nonce, sizeof(u32));
+ } while (!subflow->local_nonce);
+
+ if (!local_id) {
+ err = mptcp_pm_get_local_id(msk, (struct sock_common *)ssk);
+ if (err < 0)
+ goto failed;
+
+ local_id = err;
+ }
+
subflow->remote_key = msk->remote_key;
subflow->local_key = msk->local_key;
subflow->token = msk->token;
@@ -997,15 +1101,16 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex,
if (loc->family == AF_INET6)
addrlen = sizeof(struct sockaddr_in6);
#endif
- sf->sk->sk_bound_dev_if = ifindex;
+ ssk->sk_bound_dev_if = ifindex;
err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
if (err)
goto failed;
mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL);
- pr_debug("msk=%p remote_token=%u", msk, remote_token);
+ pr_debug("msk=%p remote_token=%u local_id=%d", msk, remote_token,
+ local_id);
subflow->remote_token = remote_token;
- subflow->local_id = loc->id;
+ subflow->local_id = local_id;
subflow->request_join = 1;
subflow->request_bkup = 1;
mptcp_info2sockaddr(remote, &addr);
@@ -1118,14 +1223,26 @@ static void subflow_state_change(struct sock *sk)
__subflow_state_change(sk);
+ if (subflow_simultaneous_connect(sk)) {
+ mptcp_do_fallback(sk);
+ mptcp_rcv_space_init(mptcp_sk(parent), sk);
+ pr_fallback(mptcp_sk(parent));
+ subflow->conn_finished = 1;
+ if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
+ inet_sk_state_store(parent, TCP_ESTABLISHED);
+ parent->sk_state_change(parent);
+ }
+ }
+
/* as recvmsg() does not acquire the subflow socket for ssk selection
* a fin packet carrying a DSS can be unnoticed if we don't trigger
* the data available machinery here.
*/
- if (subflow->mp_capable && mptcp_subflow_data_available(sk))
+ if (mptcp_subflow_data_available(sk))
mptcp_data_ready(parent, sk);
- if (!(parent->sk_shutdown & RCV_SHUTDOWN) &&
+ if (__mptcp_check_fallback(mptcp_sk(parent)) &&
+ !(parent->sk_shutdown & RCV_SHUTDOWN) &&
!subflow->rx_eof && subflow_is_done(sk)) {
subflow->rx_eof = 1;
mptcp_subflow_eof(parent);
@@ -1255,10 +1372,10 @@ static int subflow_ops_init(struct request_sock_ops *subflow_ops)
return 0;
}
-void mptcp_subflow_init(void)
+void __init mptcp_subflow_init(void)
{
- subflow_request_sock_ops = tcp_request_sock_ops;
- if (subflow_ops_init(&subflow_request_sock_ops) != 0)
+ mptcp_subflow_request_sock_ops = tcp_request_sock_ops;
+ if (subflow_ops_init(&mptcp_subflow_request_sock_ops) != 0)
panic("MPTCP: failed to init subflow request sock ops\n");
subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
@@ -1268,7 +1385,6 @@ void mptcp_subflow_init(void)
subflow_specific.conn_request = subflow_v4_conn_request;
subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
subflow_specific.sk_rx_dst_set = subflow_finish_connect;
- subflow_specific.rebuild_header = subflow_rebuild_header;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
@@ -1278,7 +1394,6 @@ void mptcp_subflow_init(void)
subflow_v6_specific.conn_request = subflow_v6_conn_request;
subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock;
subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect;
- subflow_v6_specific.rebuild_header = subflow_rebuild_header;
subflow_v6m_specific = subflow_v6_specific;
subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit;
diff --git a/net/mptcp/syncookies.c b/net/mptcp/syncookies.c
new file mode 100644
index 000000000000..abe0fd099746
--- /dev/null
+++ b/net/mptcp/syncookies.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/skbuff.h>
+
+#include "protocol.h"
+
+/* Syncookies do not work for JOIN requests.
+ *
+ * Unlike MP_CAPABLE, where the ACK cookie contains the needed MPTCP
+ * options to reconstruct the initial syn state, MP_JOIN does not contain
+ * the token to obtain the mptcp socket nor the server-generated nonce
+ * that was used in the cookie SYN/ACK response.
+ *
+ * Keep a small best effort state table to store the syn/synack data,
+ * indexed by skb hash.
+ *
+ * A MP_JOIN SYN packet handled by syn cookies is only stored if the 32bit
+ * token matches a known mptcp connection that can still accept more subflows.
+ *
+ * There is no timeout handling -- state is only re-constructed
+ * when the TCP ACK passed the cookie validation check.
+ */
+
+struct join_entry {
+ u32 token;
+ u32 remote_nonce;
+ u32 local_nonce;
+ u8 join_id;
+ u8 local_id;
+ u8 backup;
+ u8 valid;
+};
+
+#define COOKIE_JOIN_SLOTS 1024
+
+static struct join_entry join_entries[COOKIE_JOIN_SLOTS] __cacheline_aligned_in_smp;
+static spinlock_t join_entry_locks[COOKIE_JOIN_SLOTS] __cacheline_aligned_in_smp;
+
+static u32 mptcp_join_entry_hash(struct sk_buff *skb, struct net *net)
+{
+ u32 i = skb_get_hash(skb) ^ net_hash_mix(net);
+
+ return i % ARRAY_SIZE(join_entries);
+}
+
+static void mptcp_join_store_state(struct join_entry *entry,
+ const struct mptcp_subflow_request_sock *subflow_req)
+{
+ entry->token = subflow_req->token;
+ entry->remote_nonce = subflow_req->remote_nonce;
+ entry->local_nonce = subflow_req->local_nonce;
+ entry->backup = subflow_req->backup;
+ entry->join_id = subflow_req->remote_id;
+ entry->local_id = subflow_req->local_id;
+ entry->valid = 1;
+}
+
+void subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req,
+ struct sk_buff *skb)
+{
+ struct net *net = read_pnet(&subflow_req->sk.req.ireq_net);
+ u32 i = mptcp_join_entry_hash(skb, net);
+
+ /* No use in waiting if other cpu is already using this slot --
+ * would overwrite the data that got stored.
+ */
+ spin_lock_bh(&join_entry_locks[i]);
+ mptcp_join_store_state(&join_entries[i], subflow_req);
+ spin_unlock_bh(&join_entry_locks[i]);
+}
+
+/* Called for a cookie-ack with MP_JOIN option present.
+ * Look up the saved state based on skb hash & check token matches msk
+ * in same netns.
+ *
+ * Caller will check msk can still accept another subflow. The hmac
+ * present in the cookie ACK mptcp option space will be checked later.
+ */
+bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req,
+ struct sk_buff *skb)
+{
+ struct net *net = read_pnet(&subflow_req->sk.req.ireq_net);
+ u32 i = mptcp_join_entry_hash(skb, net);
+ struct mptcp_sock *msk;
+ struct join_entry *e;
+
+ e = &join_entries[i];
+
+ spin_lock_bh(&join_entry_locks[i]);
+
+ if (e->valid == 0) {
+ spin_unlock_bh(&join_entry_locks[i]);
+ return false;
+ }
+
+ e->valid = 0;
+
+ msk = mptcp_token_get_sock(e->token);
+ if (!msk) {
+ spin_unlock_bh(&join_entry_locks[i]);
+ return false;
+ }
+
+ /* If this fails, the token got re-used in the mean time by another
+ * mptcp socket in a different netns, i.e. entry is outdated.
+ */
+ if (!net_eq(sock_net((struct sock *)msk), net))
+ goto err_put;
+
+ subflow_req->remote_nonce = e->remote_nonce;
+ subflow_req->local_nonce = e->local_nonce;
+ subflow_req->backup = e->backup;
+ subflow_req->remote_id = e->join_id;
+ subflow_req->token = e->token;
+ subflow_req->msk = msk;
+ spin_unlock_bh(&join_entry_locks[i]);
+ return true;
+
+err_put:
+ spin_unlock_bh(&join_entry_locks[i]);
+ sock_put((struct sock *)msk);
+ return false;
+}
+
+void __init mptcp_join_cookie_init(void)
+{
+ int i;
+
+ for (i = 0; i < COOKIE_JOIN_SLOTS; i++)
+ spin_lock_init(&join_entry_locks[i]);
+}
diff --git a/net/mptcp/token.c b/net/mptcp/token.c
index 33352dd99d4d..8b47c4bb1c6b 100644
--- a/net/mptcp/token.c
+++ b/net/mptcp/token.c
@@ -24,7 +24,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/radix-tree.h>
+#include <linux/memblock.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <net/sock.h>
@@ -33,10 +33,67 @@
#include <net/mptcp.h>
#include "protocol.h"
-static RADIX_TREE(token_tree, GFP_ATOMIC);
-static RADIX_TREE(token_req_tree, GFP_ATOMIC);
-static DEFINE_SPINLOCK(token_tree_lock);
-static int token_used __read_mostly;
+#define TOKEN_MAX_RETRIES 4
+#define TOKEN_MAX_CHAIN_LEN 4
+
+struct token_bucket {
+ spinlock_t lock;
+ int chain_len;
+ struct hlist_nulls_head req_chain;
+ struct hlist_nulls_head msk_chain;
+};
+
+static struct token_bucket *token_hash __read_mostly;
+static unsigned int token_mask __read_mostly;
+
+static struct token_bucket *token_bucket(u32 token)
+{
+ return &token_hash[token & token_mask];
+}
+
+/* called with bucket lock held */
+static struct mptcp_subflow_request_sock *
+__token_lookup_req(struct token_bucket *t, u32 token)
+{
+ struct mptcp_subflow_request_sock *req;
+ struct hlist_nulls_node *pos;
+
+ hlist_nulls_for_each_entry_rcu(req, pos, &t->req_chain, token_node)
+ if (req->token == token)
+ return req;
+ return NULL;
+}
+
+/* called with bucket lock held */
+static struct mptcp_sock *
+__token_lookup_msk(struct token_bucket *t, u32 token)
+{
+ struct hlist_nulls_node *pos;
+ struct sock *sk;
+
+ sk_nulls_for_each_rcu(sk, pos, &t->msk_chain)
+ if (mptcp_sk(sk)->token == token)
+ return mptcp_sk(sk);
+ return NULL;
+}
+
+static bool __token_bucket_busy(struct token_bucket *t, u32 token)
+{
+ return !token || t->chain_len >= TOKEN_MAX_CHAIN_LEN ||
+ __token_lookup_req(t, token) || __token_lookup_msk(t, token);
+}
+
+static void mptcp_crypto_key_gen_sha(u64 *key, u32 *token, u64 *idsn)
+{
+ /* we might consider a faster version that computes the key as a
+ * hash of some information available in the MPTCP socket. Use
+ * random data at the moment, as it's probably the safest option
+ * in case multiple sockets are opened in different namespaces at
+ * the same time.
+ */
+ get_random_bytes(key, sizeof(u64));
+ mptcp_crypto_key_sha(*key, token, idsn);
+}
/**
* mptcp_token_new_request - create new key/idsn/token for subflow_request
@@ -52,30 +109,28 @@ static int token_used __read_mostly;
int mptcp_token_new_request(struct request_sock *req)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
- int err;
-
- while (1) {
- u32 token;
-
- mptcp_crypto_key_gen_sha(&subflow_req->local_key,
- &subflow_req->token,
- &subflow_req->idsn);
- pr_debug("req=%p local_key=%llu, token=%u, idsn=%llu\n",
- req, subflow_req->local_key, subflow_req->token,
- subflow_req->idsn);
-
- token = subflow_req->token;
- spin_lock_bh(&token_tree_lock);
- if (!radix_tree_lookup(&token_req_tree, token) &&
- !radix_tree_lookup(&token_tree, token))
- break;
- spin_unlock_bh(&token_tree_lock);
+ struct token_bucket *bucket;
+ u32 token;
+
+ mptcp_crypto_key_sha(subflow_req->local_key,
+ &subflow_req->token,
+ &subflow_req->idsn);
+ pr_debug("req=%p local_key=%llu, token=%u, idsn=%llu\n",
+ req, subflow_req->local_key, subflow_req->token,
+ subflow_req->idsn);
+
+ token = subflow_req->token;
+ bucket = token_bucket(token);
+ spin_lock_bh(&bucket->lock);
+ if (__token_bucket_busy(bucket, token)) {
+ spin_unlock_bh(&bucket->lock);
+ return -EBUSY;
}
- err = radix_tree_insert(&token_req_tree,
- subflow_req->token, &token_used);
- spin_unlock_bh(&token_tree_lock);
- return err;
+ hlist_nulls_add_head_rcu(&subflow_req->token_node, &bucket->req_chain);
+ bucket->chain_len++;
+ spin_unlock_bh(&bucket->lock);
+ return 0;
}
/**
@@ -97,48 +152,82 @@ int mptcp_token_new_request(struct request_sock *req)
int mptcp_token_new_connect(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- struct sock *mptcp_sock = subflow->conn;
- int err;
-
- while (1) {
- u32 token;
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+ int retries = TOKEN_MAX_RETRIES;
+ struct token_bucket *bucket;
- mptcp_crypto_key_gen_sha(&subflow->local_key, &subflow->token,
- &subflow->idsn);
+ pr_debug("ssk=%p, local_key=%llu, token=%u, idsn=%llu\n",
+ sk, subflow->local_key, subflow->token, subflow->idsn);
- pr_debug("ssk=%p, local_key=%llu, token=%u, idsn=%llu\n",
- sk, subflow->local_key, subflow->token, subflow->idsn);
+again:
+ mptcp_crypto_key_gen_sha(&subflow->local_key, &subflow->token,
+ &subflow->idsn);
- token = subflow->token;
- spin_lock_bh(&token_tree_lock);
- if (!radix_tree_lookup(&token_req_tree, token) &&
- !radix_tree_lookup(&token_tree, token))
- break;
- spin_unlock_bh(&token_tree_lock);
+ bucket = token_bucket(subflow->token);
+ spin_lock_bh(&bucket->lock);
+ if (__token_bucket_busy(bucket, subflow->token)) {
+ spin_unlock_bh(&bucket->lock);
+ if (!--retries)
+ return -EBUSY;
+ goto again;
}
- err = radix_tree_insert(&token_tree, subflow->token, mptcp_sock);
- spin_unlock_bh(&token_tree_lock);
- return err;
+ WRITE_ONCE(msk->token, subflow->token);
+ __sk_nulls_add_node_rcu((struct sock *)msk, &bucket->msk_chain);
+ bucket->chain_len++;
+ spin_unlock_bh(&bucket->lock);
+ return 0;
}
/**
- * mptcp_token_new_accept - insert token for later processing
- * @token: the token to insert to the tree
- * @conn: the just cloned socket linked to the new connection
+ * mptcp_token_accept - replace a req sk with full sock in token hash
+ * @req: the request socket to be removed
+ * @msk: the just cloned socket linked to the new connection
*
* Called when a SYN packet creates a new logical connection, i.e.
* is not a join request.
*/
-int mptcp_token_new_accept(u32 token, struct sock *conn)
+void mptcp_token_accept(struct mptcp_subflow_request_sock *req,
+ struct mptcp_sock *msk)
{
- int err;
+ struct mptcp_subflow_request_sock *pos;
+ struct token_bucket *bucket;
- spin_lock_bh(&token_tree_lock);
- err = radix_tree_insert(&token_tree, token, conn);
- spin_unlock_bh(&token_tree_lock);
+ bucket = token_bucket(req->token);
+ spin_lock_bh(&bucket->lock);
- return err;
+ /* pedantic lookup check for the moved token */
+ pos = __token_lookup_req(bucket, req->token);
+ if (!WARN_ON_ONCE(pos != req))
+ hlist_nulls_del_init_rcu(&req->token_node);
+ __sk_nulls_add_node_rcu((struct sock *)msk, &bucket->msk_chain);
+ spin_unlock_bh(&bucket->lock);
+}
+
+bool mptcp_token_exists(u32 token)
+{
+ struct hlist_nulls_node *pos;
+ struct token_bucket *bucket;
+ struct mptcp_sock *msk;
+ struct sock *sk;
+
+ rcu_read_lock();
+ bucket = token_bucket(token);
+
+again:
+ sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) {
+ msk = mptcp_sk(sk);
+ if (READ_ONCE(msk->token) == token)
+ goto found;
+ }
+ if (get_nulls_value(pos) != (token & token_mask))
+ goto again;
+
+ rcu_read_unlock();
+ return false;
+found:
+ rcu_read_unlock();
+ return true;
}
/**
@@ -152,45 +241,171 @@ int mptcp_token_new_accept(u32 token, struct sock *conn)
*/
struct mptcp_sock *mptcp_token_get_sock(u32 token)
{
- struct sock *conn;
-
- spin_lock_bh(&token_tree_lock);
- conn = radix_tree_lookup(&token_tree, token);
- if (conn) {
- /* token still reserved? */
- if (conn == (struct sock *)&token_used)
- conn = NULL;
- else
- sock_hold(conn);
+ struct hlist_nulls_node *pos;
+ struct token_bucket *bucket;
+ struct mptcp_sock *msk;
+ struct sock *sk;
+
+ rcu_read_lock();
+ bucket = token_bucket(token);
+
+again:
+ sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) {
+ msk = mptcp_sk(sk);
+ if (READ_ONCE(msk->token) != token)
+ continue;
+ if (!refcount_inc_not_zero(&sk->sk_refcnt))
+ goto not_found;
+ if (READ_ONCE(msk->token) != token) {
+ sock_put(sk);
+ goto again;
+ }
+ goto found;
+ }
+ if (get_nulls_value(pos) != (token & token_mask))
+ goto again;
+
+not_found:
+ msk = NULL;
+
+found:
+ rcu_read_unlock();
+ return msk;
+}
+EXPORT_SYMBOL_GPL(mptcp_token_get_sock);
+
+/**
+ * mptcp_token_iter_next - iterate over the token container from given pos
+ * @net: namespace to be iterated
+ * @s_slot: start slot number
+ * @s_num: start number inside the given lock
+ *
+ * This function returns the first mptcp connection structure found inside the
+ * token container starting from the specified position, or NULL.
+ *
+ * On successful iteration, the iterator is move to the next position and the
+ * the acquires a reference to the returned socket.
+ */
+struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot,
+ long *s_num)
+{
+ struct mptcp_sock *ret = NULL;
+ struct hlist_nulls_node *pos;
+ int slot, num;
+
+ for (slot = *s_slot; slot <= token_mask; *s_num = 0, slot++) {
+ struct token_bucket *bucket = &token_hash[slot];
+ struct sock *sk;
+
+ num = 0;
+
+ if (hlist_nulls_empty(&bucket->msk_chain))
+ continue;
+
+ rcu_read_lock();
+ sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) {
+ ++num;
+ if (!net_eq(sock_net(sk), net))
+ continue;
+
+ if (num <= *s_num)
+ continue;
+
+ if (!refcount_inc_not_zero(&sk->sk_refcnt))
+ continue;
+
+ if (!net_eq(sock_net(sk), net)) {
+ sock_put(sk);
+ continue;
+ }
+
+ ret = mptcp_sk(sk);
+ rcu_read_unlock();
+ goto out;
+ }
+ rcu_read_unlock();
}
- spin_unlock_bh(&token_tree_lock);
- return mptcp_sk(conn);
+out:
+ *s_slot = slot;
+ *s_num = num;
+ return ret;
}
+EXPORT_SYMBOL_GPL(mptcp_token_iter_next);
/**
* mptcp_token_destroy_request - remove mptcp connection/token
- * @token: token of mptcp connection to remove
+ * @req: mptcp request socket dropping the token
*
- * Remove not-yet-fully-established incoming connection identified
- * by @token.
+ * Remove the token associated to @req.
*/
-void mptcp_token_destroy_request(u32 token)
+void mptcp_token_destroy_request(struct request_sock *req)
{
- spin_lock_bh(&token_tree_lock);
- radix_tree_delete(&token_req_tree, token);
- spin_unlock_bh(&token_tree_lock);
+ struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
+ struct mptcp_subflow_request_sock *pos;
+ struct token_bucket *bucket;
+
+ if (hlist_nulls_unhashed(&subflow_req->token_node))
+ return;
+
+ bucket = token_bucket(subflow_req->token);
+ spin_lock_bh(&bucket->lock);
+ pos = __token_lookup_req(bucket, subflow_req->token);
+ if (!WARN_ON_ONCE(pos != subflow_req)) {
+ hlist_nulls_del_init_rcu(&pos->token_node);
+ bucket->chain_len--;
+ }
+ spin_unlock_bh(&bucket->lock);
}
/**
* mptcp_token_destroy - remove mptcp connection/token
- * @token: token of mptcp connection to remove
+ * @msk: mptcp connection dropping the token
*
- * Remove the connection identified by @token.
+ * Remove the token associated to @msk
*/
-void mptcp_token_destroy(u32 token)
+void mptcp_token_destroy(struct mptcp_sock *msk)
{
- spin_lock_bh(&token_tree_lock);
- radix_tree_delete(&token_tree, token);
- spin_unlock_bh(&token_tree_lock);
+ struct token_bucket *bucket;
+ struct mptcp_sock *pos;
+
+ if (sk_unhashed((struct sock *)msk))
+ return;
+
+ bucket = token_bucket(msk->token);
+ spin_lock_bh(&bucket->lock);
+ pos = __token_lookup_msk(bucket, msk->token);
+ if (!WARN_ON_ONCE(pos != msk)) {
+ __sk_nulls_del_node_init_rcu((struct sock *)pos);
+ bucket->chain_len--;
+ }
+ spin_unlock_bh(&bucket->lock);
}
+
+void __init mptcp_token_init(void)
+{
+ int i;
+
+ token_hash = alloc_large_system_hash("MPTCP token",
+ sizeof(struct token_bucket),
+ 0,
+ 20,/* one slot per 1MB of memory */
+ HASH_ZERO,
+ NULL,
+ &token_mask,
+ 0,
+ 64 * 1024);
+ for (i = 0; i < token_mask + 1; ++i) {
+ INIT_HLIST_NULLS_HEAD(&token_hash[i].req_chain, i);
+ INIT_HLIST_NULLS_HEAD(&token_hash[i].msk_chain, i);
+ spin_lock_init(&token_hash[i].lock);
+ }
+}
+
+#if IS_MODULE(CONFIG_MPTCP_KUNIT_TESTS)
+EXPORT_SYMBOL_GPL(mptcp_token_new_request);
+EXPORT_SYMBOL_GPL(mptcp_token_new_connect);
+EXPORT_SYMBOL_GPL(mptcp_token_accept);
+EXPORT_SYMBOL_GPL(mptcp_token_destroy_request);
+EXPORT_SYMBOL_GPL(mptcp_token_destroy);
+#endif
diff --git a/net/mptcp/token_test.c b/net/mptcp/token_test.c
new file mode 100644
index 000000000000..e1bd6f0a0676
--- /dev/null
+++ b/net/mptcp/token_test.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <kunit/test.h>
+
+#include "protocol.h"
+
+static struct mptcp_subflow_request_sock *build_req_sock(struct kunit *test)
+{
+ struct mptcp_subflow_request_sock *req;
+
+ req = kunit_kzalloc(test, sizeof(struct mptcp_subflow_request_sock),
+ GFP_USER);
+ KUNIT_EXPECT_NOT_ERR_OR_NULL(test, req);
+ mptcp_token_init_request((struct request_sock *)req);
+ return req;
+}
+
+static void mptcp_token_test_req_basic(struct kunit *test)
+{
+ struct mptcp_subflow_request_sock *req = build_req_sock(test);
+ struct mptcp_sock *null_msk = NULL;
+
+ KUNIT_ASSERT_EQ(test, 0,
+ mptcp_token_new_request((struct request_sock *)req));
+ KUNIT_EXPECT_NE(test, 0, (int)req->token);
+ KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(req->token));
+
+ /* cleanup */
+ mptcp_token_destroy_request((struct request_sock *)req);
+}
+
+static struct inet_connection_sock *build_icsk(struct kunit *test)
+{
+ struct inet_connection_sock *icsk;
+
+ icsk = kunit_kzalloc(test, sizeof(struct inet_connection_sock),
+ GFP_USER);
+ KUNIT_EXPECT_NOT_ERR_OR_NULL(test, icsk);
+ return icsk;
+}
+
+static struct mptcp_subflow_context *build_ctx(struct kunit *test)
+{
+ struct mptcp_subflow_context *ctx;
+
+ ctx = kunit_kzalloc(test, sizeof(struct mptcp_subflow_context),
+ GFP_USER);
+ KUNIT_EXPECT_NOT_ERR_OR_NULL(test, ctx);
+ return ctx;
+}
+
+static struct mptcp_sock *build_msk(struct kunit *test)
+{
+ struct mptcp_sock *msk;
+
+ msk = kunit_kzalloc(test, sizeof(struct mptcp_sock), GFP_USER);
+ KUNIT_EXPECT_NOT_ERR_OR_NULL(test, msk);
+ refcount_set(&((struct sock *)msk)->sk_refcnt, 1);
+ return msk;
+}
+
+static void mptcp_token_test_msk_basic(struct kunit *test)
+{
+ struct inet_connection_sock *icsk = build_icsk(test);
+ struct mptcp_subflow_context *ctx = build_ctx(test);
+ struct mptcp_sock *msk = build_msk(test);
+ struct mptcp_sock *null_msk = NULL;
+ struct sock *sk;
+
+ rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
+ ctx->conn = (struct sock *)msk;
+ sk = (struct sock *)msk;
+
+ KUNIT_ASSERT_EQ(test, 0,
+ mptcp_token_new_connect((struct sock *)icsk));
+ KUNIT_EXPECT_NE(test, 0, (int)ctx->token);
+ KUNIT_EXPECT_EQ(test, ctx->token, msk->token);
+ KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(ctx->token));
+ KUNIT_EXPECT_EQ(test, 2, (int)refcount_read(&sk->sk_refcnt));
+
+ mptcp_token_destroy(msk);
+ KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(ctx->token));
+}
+
+static void mptcp_token_test_accept(struct kunit *test)
+{
+ struct mptcp_subflow_request_sock *req = build_req_sock(test);
+ struct mptcp_sock *msk = build_msk(test);
+
+ KUNIT_ASSERT_EQ(test, 0,
+ mptcp_token_new_request((struct request_sock *)req));
+ msk->token = req->token;
+ mptcp_token_accept(req, msk);
+ KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(msk->token));
+
+ /* this is now a no-op */
+ mptcp_token_destroy_request((struct request_sock *)req);
+ KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(msk->token));
+
+ /* cleanup */
+ mptcp_token_destroy(msk);
+}
+
+static void mptcp_token_test_destroyed(struct kunit *test)
+{
+ struct mptcp_subflow_request_sock *req = build_req_sock(test);
+ struct mptcp_sock *msk = build_msk(test);
+ struct mptcp_sock *null_msk = NULL;
+ struct sock *sk;
+
+ sk = (struct sock *)msk;
+
+ KUNIT_ASSERT_EQ(test, 0,
+ mptcp_token_new_request((struct request_sock *)req));
+ msk->token = req->token;
+ mptcp_token_accept(req, msk);
+
+ /* simulate race on removal */
+ refcount_set(&sk->sk_refcnt, 0);
+ KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(msk->token));
+
+ /* cleanup */
+ mptcp_token_destroy(msk);
+}
+
+static struct kunit_case mptcp_token_test_cases[] = {
+ KUNIT_CASE(mptcp_token_test_req_basic),
+ KUNIT_CASE(mptcp_token_test_msk_basic),
+ KUNIT_CASE(mptcp_token_test_accept),
+ KUNIT_CASE(mptcp_token_test_destroyed),
+ {}
+};
+
+static struct kunit_suite mptcp_token_suite = {
+ .name = "mptcp-token",
+ .test_cases = mptcp_token_test_cases,
+};
+
+kunit_test_suite(mptcp_token_suite);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index a94bb59793f0..5b1f4ec66dd9 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -471,7 +471,7 @@ static int ncsi_rsp_handler_sma(struct ncsi_request *nr)
memcpy(&ncf->addrs[index], cmd->mac, ETH_ALEN);
} else {
clear_bit(cmd->index - 1, bitmap);
- memset(&ncf->addrs[index], 0, ETH_ALEN);
+ eth_zero_addr(&ncf->addrs[index]);
}
spin_unlock_irqrestore(&nc->lock, flags);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 0ffe2b8723c4..25313c29d799 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -447,7 +447,7 @@ config NF_TABLES
replace the existing {ip,ip6,arp,eb}_tables infrastructure. It
provides a pseudo-state machine with an extensible instruction-set
(also known as expressions) that the userspace 'nft' utility
- (http://www.netfilter.org/projects/nftables) uses to build the
+ (https://www.netfilter.org/projects/nftables) uses to build the
rule-set. It also comes with the generic set infrastructure that
allows you to construct mappings between matchings and actions
for performance lookups.
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 56621d6bfd29..920b7c4331f0 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1644,7 +1644,7 @@ dump_last:
goto next_set;
if (set->variant->uref)
set->variant->uref(set, cb, true);
- /* fall through */
+ fallthrough;
default:
ret = set->variant->list(set, skb, cb);
if (!cb->args[IPSET_CB_ARG0])
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 02f2f636798d..a90b8eac16ac 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -807,6 +807,31 @@ static void ip_vs_conn_rcu_free(struct rcu_head *head)
kmem_cache_free(ip_vs_conn_cachep, cp);
}
+/* Try to delete connection while not holding reference */
+static void ip_vs_conn_del(struct ip_vs_conn *cp)
+{
+ if (del_timer(&cp->timer)) {
+ /* Drop cp->control chain too */
+ if (cp->control)
+ cp->timeout = 0;
+ ip_vs_conn_expire(&cp->timer);
+ }
+}
+
+/* Try to delete connection while holding reference */
+static void ip_vs_conn_del_put(struct ip_vs_conn *cp)
+{
+ if (del_timer(&cp->timer)) {
+ /* Drop cp->control chain too */
+ if (cp->control)
+ cp->timeout = 0;
+ __ip_vs_conn_put(cp);
+ ip_vs_conn_expire(&cp->timer);
+ } else {
+ __ip_vs_conn_put(cp);
+ }
+}
+
static void ip_vs_conn_expire(struct timer_list *t)
{
struct ip_vs_conn *cp = from_timer(cp, t, timer);
@@ -827,14 +852,17 @@ static void ip_vs_conn_expire(struct timer_list *t)
/* does anybody control me? */
if (ct) {
+ bool has_ref = !cp->timeout && __ip_vs_conn_get(ct);
+
ip_vs_control_del(cp);
/* Drop CTL or non-assured TPL if not used anymore */
- if (!cp->timeout && !atomic_read(&ct->n_control) &&
+ if (has_ref && !atomic_read(&ct->n_control) &&
(!(ct->flags & IP_VS_CONN_F_TEMPLATE) ||
!(ct->state & IP_VS_CTPL_S_ASSURED))) {
IP_VS_DBG(4, "drop controlling connection\n");
- ct->timeout = 0;
- ip_vs_conn_expire_now(ct);
+ ip_vs_conn_del_put(ct);
+ } else if (has_ref) {
+ __ip_vs_conn_put(ct);
}
}
@@ -1317,8 +1345,7 @@ try_drop:
drop:
IP_VS_DBG(4, "drop connection\n");
- cp->timeout = 0;
- ip_vs_conn_expire_now(cp);
+ ip_vs_conn_del(cp);
}
cond_resched_rcu();
}
@@ -1341,19 +1368,15 @@ flush_again:
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
if (cp->ipvs != ipvs)
continue;
- /* As timers are expired in LIFO order, restart
- * the timer of controlling connection first, so
- * that it is expired after us.
- */
+ if (atomic_read(&cp->n_control))
+ continue;
cp_c = cp->control;
- /* cp->control is valid only with reference to cp */
- if (cp_c && __ip_vs_conn_get(cp)) {
+ IP_VS_DBG(4, "del connection\n");
+ ip_vs_conn_del(cp);
+ if (cp_c && !atomic_read(&cp_c->n_control)) {
IP_VS_DBG(4, "del controlling connection\n");
- ip_vs_conn_expire_now(cp_c);
- __ip_vs_conn_put(cp);
+ ip_vs_conn_del(cp_c);
}
- IP_VS_DBG(4, "del connection\n");
- ip_vs_conn_expire_now(cp);
}
cond_resched_rcu();
}
@@ -1366,6 +1389,45 @@ flush_again:
goto flush_again;
}
}
+
+#ifdef CONFIG_SYSCTL
+void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs)
+{
+ int idx;
+ struct ip_vs_conn *cp, *cp_c;
+ struct ip_vs_dest *dest;
+
+ rcu_read_lock();
+ for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
+ hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
+ if (cp->ipvs != ipvs)
+ continue;
+
+ dest = cp->dest;
+ if (!dest || (dest->flags & IP_VS_DEST_F_AVAILABLE))
+ continue;
+
+ if (atomic_read(&cp->n_control))
+ continue;
+
+ cp_c = cp->control;
+ IP_VS_DBG(4, "del connection\n");
+ ip_vs_conn_del(cp);
+ if (cp_c && !atomic_read(&cp_c->n_control)) {
+ IP_VS_DBG(4, "del controlling connection\n");
+ ip_vs_conn_del(cp_c);
+ }
+ }
+ cond_resched_rcu();
+
+ /* netns clean up started, abort delayed work */
+ if (!ipvs->enable)
+ break;
+ }
+ rcu_read_unlock();
+}
+#endif
+
/*
* per netns init and exit
*/
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index aa6a603a2425..e3668a6e54e4 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -694,16 +694,10 @@ static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs)
return ipvs->sysctl_nat_icmp_send;
}
-static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs)
-{
- return ipvs->sysctl_expire_nodest_conn;
-}
-
#else
static int sysctl_snat_reroute(struct netns_ipvs *ipvs) { return 0; }
static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs) { return 0; }
-static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) { return 0; }
#endif
@@ -2066,14 +2060,14 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) {
- bool uses_ct = false, resched = false;
+ bool old_ct = false, resched = false;
if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
unlikely(!atomic_read(&cp->dest->weight))) {
resched = true;
- uses_ct = ip_vs_conn_uses_conntrack(cp, skb);
+ old_ct = ip_vs_conn_uses_old_conntrack(cp, skb);
} else if (is_new_conn_expected(cp, conn_reuse_mode)) {
- uses_ct = ip_vs_conn_uses_conntrack(cp, skb);
+ old_ct = ip_vs_conn_uses_old_conntrack(cp, skb);
if (!atomic_read(&cp->n_control)) {
resched = true;
} else {
@@ -2081,50 +2075,51 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
* that uses conntrack while it is still
* referenced by controlled connection(s).
*/
- resched = !uses_ct;
+ resched = !old_ct;
}
}
if (resched) {
+ if (!old_ct)
+ cp->flags &= ~IP_VS_CONN_F_NFCT;
if (!atomic_read(&cp->n_control))
ip_vs_conn_expire_now(cp);
__ip_vs_conn_put(cp);
- if (uses_ct)
+ if (old_ct)
return NF_DROP;
cp = NULL;
}
}
- if (unlikely(!cp)) {
- int v;
-
- if (!ip_vs_try_to_schedule(ipvs, af, skb, pd, &v, &cp, &iph))
- return v;
- }
-
- IP_VS_DBG_PKT(11, af, pp, skb, iph.off, "Incoming packet");
-
/* Check the server status */
- if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+ if (cp && cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
/* the destination server is not available */
+ if (sysctl_expire_nodest_conn(ipvs)) {
+ bool old_ct = ip_vs_conn_uses_old_conntrack(cp, skb);
- __u32 flags = cp->flags;
-
- /* when timer already started, silently drop the packet.*/
- if (timer_pending(&cp->timer))
- __ip_vs_conn_put(cp);
- else
- ip_vs_conn_put(cp);
+ if (!old_ct)
+ cp->flags &= ~IP_VS_CONN_F_NFCT;
- if (sysctl_expire_nodest_conn(ipvs) &&
- !(flags & IP_VS_CONN_F_ONE_PACKET)) {
- /* try to expire the connection immediately */
ip_vs_conn_expire_now(cp);
+ __ip_vs_conn_put(cp);
+ if (old_ct)
+ return NF_DROP;
+ cp = NULL;
+ } else {
+ __ip_vs_conn_put(cp);
+ return NF_DROP;
}
+ }
- return NF_DROP;
+ if (unlikely(!cp)) {
+ int v;
+
+ if (!ip_vs_try_to_schedule(ipvs, af, skb, pd, &v, &cp, &iph))
+ return v;
}
+ IP_VS_DBG_PKT(11, af, pp, skb, iph.off, "Incoming packet");
+
ip_vs_in_stats(cp, skb);
ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
if (cp->packet_xmit)
@@ -2256,7 +2251,7 @@ ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb,
#endif
-static const struct nf_hook_ops ip_vs_ops[] = {
+static const struct nf_hook_ops ip_vs_ops4[] = {
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply4,
@@ -2302,7 +2297,10 @@ static const struct nf_hook_ops ip_vs_ops[] = {
.hooknum = NF_INET_FORWARD,
.priority = 100,
},
+};
+
#ifdef CONFIG_IP_VS_IPV6
+static const struct nf_hook_ops ip_vs_ops6[] = {
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply6,
@@ -2348,8 +2346,64 @@ static const struct nf_hook_ops ip_vs_ops[] = {
.hooknum = NF_INET_FORWARD,
.priority = 100,
},
-#endif
};
+#endif
+
+int ip_vs_register_hooks(struct netns_ipvs *ipvs, unsigned int af)
+{
+ const struct nf_hook_ops *ops;
+ unsigned int count;
+ unsigned int afmask;
+ int ret = 0;
+
+ if (af == AF_INET6) {
+#ifdef CONFIG_IP_VS_IPV6
+ ops = ip_vs_ops6;
+ count = ARRAY_SIZE(ip_vs_ops6);
+ afmask = 2;
+#else
+ return -EINVAL;
+#endif
+ } else {
+ ops = ip_vs_ops4;
+ count = ARRAY_SIZE(ip_vs_ops4);
+ afmask = 1;
+ }
+
+ if (!(ipvs->hooks_afmask & afmask)) {
+ ret = nf_register_net_hooks(ipvs->net, ops, count);
+ if (ret >= 0)
+ ipvs->hooks_afmask |= afmask;
+ }
+ return ret;
+}
+
+void ip_vs_unregister_hooks(struct netns_ipvs *ipvs, unsigned int af)
+{
+ const struct nf_hook_ops *ops;
+ unsigned int count;
+ unsigned int afmask;
+
+ if (af == AF_INET6) {
+#ifdef CONFIG_IP_VS_IPV6
+ ops = ip_vs_ops6;
+ count = ARRAY_SIZE(ip_vs_ops6);
+ afmask = 2;
+#else
+ return;
+#endif
+ } else {
+ ops = ip_vs_ops4;
+ count = ARRAY_SIZE(ip_vs_ops4);
+ afmask = 1;
+ }
+
+ if (ipvs->hooks_afmask & afmask) {
+ nf_unregister_net_hooks(ipvs->net, ops, count);
+ ipvs->hooks_afmask &= ~afmask;
+ }
+}
+
/*
* Initialize IP Virtual Server netns mem.
*/
@@ -2425,19 +2479,6 @@ static void __net_exit __ip_vs_cleanup_batch(struct list_head *net_list)
}
}
-static int __net_init __ip_vs_dev_init(struct net *net)
-{
- int ret;
-
- ret = nf_register_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
- if (ret < 0)
- goto hook_fail;
- return 0;
-
-hook_fail:
- return ret;
-}
-
static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
{
struct netns_ipvs *ipvs;
@@ -2446,7 +2487,8 @@ static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
EnterFunction(2);
list_for_each_entry(net, net_list, exit_list) {
ipvs = net_ipvs(net);
- nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+ ip_vs_unregister_hooks(ipvs, AF_INET);
+ ip_vs_unregister_hooks(ipvs, AF_INET6);
ipvs->enable = 0; /* Disable packet reception */
smp_wmb();
ip_vs_sync_net_cleanup(ipvs);
@@ -2462,7 +2504,6 @@ static struct pernet_operations ipvs_core_ops = {
};
static struct pernet_operations ipvs_core_dev_ops = {
- .init = __ip_vs_dev_init,
.exit_batch = __ip_vs_dev_cleanup_batch,
};
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 412656c34f20..678c5b14841c 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -210,6 +210,17 @@ static void update_defense_level(struct netns_ipvs *ipvs)
local_bh_enable();
}
+/* Handler for delayed work for expiring no
+ * destination connections
+ */
+static void expire_nodest_conn_handler(struct work_struct *work)
+{
+ struct netns_ipvs *ipvs;
+
+ ipvs = container_of(work, struct netns_ipvs,
+ expire_nodest_conn_work.work);
+ ip_vs_expire_nodest_conn_flush(ipvs);
+}
/*
* Timer for checking the defense
@@ -224,7 +235,8 @@ static void defense_work_handler(struct work_struct *work)
update_defense_level(ipvs);
if (atomic_read(&ipvs->dropentry))
ip_vs_random_dropentry(ipvs);
- schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
+ queue_delayed_work(system_long_wq, &ipvs->defense_work,
+ DEFENSE_TIMER_PERIOD);
}
#endif
@@ -1163,6 +1175,12 @@ static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest,
list_add(&dest->t_list, &ipvs->dest_trash);
dest->idle_start = 0;
spin_unlock_bh(&ipvs->dest_trash_lock);
+
+ /* Queue up delayed work to expire all no destination connections.
+ * No-op when CONFIG_SYSCTL is disabled.
+ */
+ if (!cleanup)
+ ip_vs_enqueue_expire_nodest_conns(ipvs);
}
@@ -1272,6 +1290,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
struct ip_vs_scheduler *sched = NULL;
struct ip_vs_pe *pe = NULL;
struct ip_vs_service *svc = NULL;
+ int ret_hooks = -1;
/* increase the module use count */
if (!ip_vs_use_count_inc())
@@ -1313,6 +1332,14 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
}
#endif
+ if ((u->af == AF_INET && !ipvs->num_services) ||
+ (u->af == AF_INET6 && !ipvs->num_services6)) {
+ ret = ip_vs_register_hooks(ipvs, u->af);
+ if (ret < 0)
+ goto out_err;
+ ret_hooks = ret;
+ }
+
svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
if (svc == NULL) {
IP_VS_DBG(1, "%s(): no memory\n", __func__);
@@ -1374,6 +1401,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
/* Count only IPv4 services for old get/setsockopt interface */
if (svc->af == AF_INET)
ipvs->num_services++;
+ else if (svc->af == AF_INET6)
+ ipvs->num_services6++;
/* Hash the service into the service table */
ip_vs_svc_hash(svc);
@@ -1385,6 +1414,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
out_err:
+ if (ret_hooks >= 0)
+ ip_vs_unregister_hooks(ipvs, u->af);
if (svc != NULL) {
ip_vs_unbind_scheduler(svc, sched);
ip_vs_service_free(svc);
@@ -1500,9 +1531,15 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
struct ip_vs_pe *old_pe;
struct netns_ipvs *ipvs = svc->ipvs;
- /* Count only IPv4 services for old get/setsockopt interface */
- if (svc->af == AF_INET)
+ if (svc->af == AF_INET) {
ipvs->num_services--;
+ if (!ipvs->num_services)
+ ip_vs_unregister_hooks(ipvs, svc->af);
+ } else if (svc->af == AF_INET6) {
+ ipvs->num_services6--;
+ if (!ipvs->num_services6)
+ ip_vs_unregister_hooks(ipvs, svc->af);
+ }
ip_vs_stop_estimator(svc->ipvs, &svc->stats);
@@ -2414,7 +2451,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
}
static int
-do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len)
{
struct net *net = sock_net(sk);
int ret;
@@ -2438,7 +2475,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
return -EINVAL;
}
- if (copy_from_user(arg, user, len) != 0)
+ if (copy_from_sockptr(arg, ptr, len) != 0)
return -EFAULT;
/* Handle daemons since they have another lock */
@@ -4063,7 +4100,12 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
ipvs->sysctl_tbl = tbl;
/* Schedule defense work */
INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
- schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
+ queue_delayed_work(system_long_wq, &ipvs->defense_work,
+ DEFENSE_TIMER_PERIOD);
+
+ /* Init delayed work for expiring no dest conn */
+ INIT_DELAYED_WORK(&ipvs->expire_nodest_conn_work,
+ expire_nodest_conn_handler);
return 0;
}
@@ -4072,6 +4114,7 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
{
struct net *net = ipvs->net;
+ cancel_delayed_work_sync(&ipvs->expire_nodest_conn_work);
cancel_delayed_work_sync(&ipvs->defense_work);
cancel_work_sync(&ipvs->defense_work.work);
unregister_net_sysctl_table(ipvs->sysctl_hdr);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c3cea50d1bcb..e38b60fc183e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1006,7 +1006,7 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
*
* @skb: skb that causes the clash
* @h: tuplehash of the clashing entry already in table
- * @hash_reply: hash slot for reply direction
+ * @reply_hash: hash slot for reply direction
*
* A conntrack entry can be inserted to the connection tracking table
* if there is no existing entry with an identical tuple.
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index 573cb4481481..e697a824b001 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -257,15 +257,15 @@ static unsigned int get_uint(struct bitstr *bs, int b)
case 4:
v |= *bs->cur++;
v <<= 8;
- /* fall through */
+ fallthrough;
case 3:
v |= *bs->cur++;
v <<= 8;
- /* fall through */
+ fallthrough;
case 2:
v |= *bs->cur++;
v <<= 8;
- /* fall through */
+ fallthrough;
case 1:
v |= *bs->cur++;
break;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index a0560d175a7f..95f79980348c 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -610,7 +610,7 @@ void nf_ct_netns_put(struct net *net, uint8_t nfproto)
switch (nfproto) {
case NFPROTO_BRIDGE:
nf_ct_netns_do_put(net, NFPROTO_BRIDGE);
- /* fall through */
+ fallthrough;
case NFPROTO_INET:
nf_ct_netns_do_put(net, NFPROTO_IPV4);
nf_ct_netns_do_put(net, NFPROTO_IPV6);
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 1926fd56df56..6892e497781c 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -900,7 +900,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
return -NF_REPEAT;
return NF_DROP;
}
- /* Fall through */
+ fallthrough;
case TCP_CONNTRACK_IGNORE:
/* Ignored packets:
*
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 6a26299cb064..a604f43e3e6b 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -60,7 +60,7 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
ntohs(tuple->src.u.tcp.port),
ntohs(tuple->dst.u.tcp.port));
break;
- case IPPROTO_UDPLITE: /* fallthrough */
+ case IPPROTO_UDPLITE:
case IPPROTO_UDP:
seq_printf(s, "sport=%hu dport=%hu ",
ntohs(tuple->src.u.udp.port),
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 5fff1e040168..2a6993fa40d7 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -964,7 +964,7 @@ static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
extack);
- return flow_indr_dev_setup_offload(dev, TC_SETUP_FT, flowtable, bo,
+ return flow_indr_dev_setup_offload(dev, NULL, TC_SETUP_FT, flowtable, bo,
nf_flow_table_indr_cleanup);
}
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index bfc555fcbc72..ea923f8cf9c4 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -408,7 +408,7 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
static const unsigned int max_attempts = 128;
switch (tuple->dst.protonum) {
- case IPPROTO_ICMP: /* fallthrough */
+ case IPPROTO_ICMP:
case IPPROTO_ICMPV6:
/* id is same for either direction... */
keyptr = &tuple->src.u.icmp.id;
@@ -442,11 +442,11 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
}
goto find_free_id;
#endif
- case IPPROTO_UDP: /* fallthrough */
- case IPPROTO_UDPLITE: /* fallthrough */
- case IPPROTO_TCP: /* fallthrough */
- case IPPROTO_SCTP: /* fallthrough */
- case IPPROTO_DCCP: /* fallthrough */
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
+ case IPPROTO_TCP:
+ case IPPROTO_SCTP:
+ case IPPROTO_DCCP:
if (maniptype == NF_NAT_MANIP_SRC)
keyptr = &tuple->src.u.all;
else
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index 46cb3786e0ec..34afcd03b6f6 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -89,78 +89,32 @@ out:
return ops;
}
-/* Call get/setsockopt() */
-static int nf_sockopt(struct sock *sk, u_int8_t pf, int val,
- char __user *opt, int *len, int get)
+int nf_setsockopt(struct sock *sk, u_int8_t pf, int val, sockptr_t opt,
+ unsigned int len)
{
struct nf_sockopt_ops *ops;
int ret;
- ops = nf_sockopt_find(sk, pf, val, get);
+ ops = nf_sockopt_find(sk, pf, val, 0);
if (IS_ERR(ops))
return PTR_ERR(ops);
-
- if (get)
- ret = ops->get(sk, val, opt, len);
- else
- ret = ops->set(sk, val, opt, *len);
-
+ ret = ops->set(sk, val, opt, len);
module_put(ops->owner);
return ret;
}
-
-int nf_setsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt,
- unsigned int len)
-{
- return nf_sockopt(sk, pf, val, opt, &len, 0);
-}
EXPORT_SYMBOL(nf_setsockopt);
int nf_getsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt,
int *len)
{
- return nf_sockopt(sk, pf, val, opt, len, 1);
-}
-EXPORT_SYMBOL(nf_getsockopt);
-
-#ifdef CONFIG_COMPAT
-static int compat_nf_sockopt(struct sock *sk, u_int8_t pf, int val,
- char __user *opt, int *len, int get)
-{
struct nf_sockopt_ops *ops;
int ret;
- ops = nf_sockopt_find(sk, pf, val, get);
+ ops = nf_sockopt_find(sk, pf, val, 1);
if (IS_ERR(ops))
return PTR_ERR(ops);
-
- if (get) {
- if (ops->compat_get)
- ret = ops->compat_get(sk, val, opt, len);
- else
- ret = ops->get(sk, val, opt, len);
- } else {
- if (ops->compat_set)
- ret = ops->compat_set(sk, val, opt, *len);
- else
- ret = ops->set(sk, val, opt, *len);
- }
-
+ ret = ops->get(sk, val, opt, len);
module_put(ops->owner);
return ret;
}
-
-int compat_nf_setsockopt(struct sock *sk, u_int8_t pf,
- int val, char __user *opt, unsigned int len)
-{
- return compat_nf_sockopt(sk, pf, val, opt, &len, 0);
-}
-EXPORT_SYMBOL(compat_nf_setsockopt);
-
-int compat_nf_getsockopt(struct sock *sk, u_int8_t pf,
- int val, char __user *opt, int *len)
-{
- return compat_nf_sockopt(sk, pf, val, opt, len, 1);
-}
-EXPORT_SYMBOL(compat_nf_getsockopt);
-#endif
+EXPORT_SYMBOL(nf_getsockopt);
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index ebcdc8e54476..9cca35d22927 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -704,8 +704,7 @@ ipv4_synproxy_hook(void *priv, struct sk_buff *skb,
nf_ct_seqadj_init(ct, ctinfo, 0);
synproxy->tsoff = 0;
this_cpu_inc(snet->stats->conn_reopened);
-
- /* fall through */
+ fallthrough;
case TCP_CONNTRACK_SYN_SENT:
if (!synproxy_parse_options(skb, thoff, th, &opts))
return NF_DROP;
@@ -1128,8 +1127,7 @@ ipv6_synproxy_hook(void *priv, struct sk_buff *skb,
nf_ct_seqadj_init(ct, ctinfo, 0);
synproxy->tsoff = 0;
this_cpu_inc(snet->stats->conn_reopened);
-
- /* fall through */
+ fallthrough;
case TCP_CONNTRACK_SYN_SENT:
if (!synproxy_parse_options(skb, thoff, th, &opts))
return NF_DROP;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 79e4db3cadec..7e286e54be47 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -268,9 +268,15 @@ static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
if (trans == NULL)
return ERR_PTR(-ENOMEM);
- if (msg_type == NFT_MSG_NEWCHAIN)
+ if (msg_type == NFT_MSG_NEWCHAIN) {
nft_activate_next(ctx->net, ctx->chain);
+ if (ctx->nla[NFTA_CHAIN_ID]) {
+ nft_trans_chain_id(trans) =
+ ntohl(nla_get_be32(ctx->nla[NFTA_CHAIN_ID]));
+ }
+ }
+
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return trans;
}
@@ -1037,6 +1043,9 @@ static int nft_flush_table(struct nft_ctx *ctx)
if (!nft_is_active_next(ctx->net, chain))
continue;
+ if (nft_chain_is_bound(chain))
+ continue;
+
ctx->chain = chain;
err = nft_delrule_by_chain(ctx);
@@ -1079,6 +1088,9 @@ static int nft_flush_table(struct nft_ctx *ctx)
if (!nft_is_active_next(ctx->net, chain))
continue;
+ if (nft_chain_is_bound(chain))
+ continue;
+
ctx->chain = chain;
err = nft_delchain(ctx);
@@ -1261,6 +1273,7 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
.len = NFT_MODULE_AUTOLOAD_LIMIT },
[NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED },
[NFTA_CHAIN_FLAGS] = { .type = NLA_U32 },
+ [NFTA_CHAIN_ID] = { .type = NLA_U32 },
};
static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = {
@@ -1393,13 +1406,12 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
lockdep_commit_lock_is_held(net));
if (nft_dump_stats(skb, stats))
goto nla_put_failure;
-
- if ((chain->flags & NFT_CHAIN_HW_OFFLOAD) &&
- nla_put_be32(skb, NFTA_CHAIN_FLAGS,
- htonl(NFT_CHAIN_HW_OFFLOAD)))
- goto nla_put_failure;
}
+ if (chain->flags &&
+ nla_put_be32(skb, NFTA_CHAIN_FLAGS, htonl(chain->flags)))
+ goto nla_put_failure;
+
if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use)))
goto nla_put_failure;
@@ -1601,7 +1613,7 @@ static void nf_tables_chain_free_chain_rules(struct nft_chain *chain)
kvfree(chain->rules_next);
}
-static void nf_tables_chain_destroy(struct nft_ctx *ctx)
+void nf_tables_chain_destroy(struct nft_ctx *ctx)
{
struct nft_chain *chain = ctx->chain;
struct nft_hook *hook, *next;
@@ -1883,7 +1895,7 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
nft_basechain_hook_init(&basechain->ops, family, hook, chain);
}
- chain->flags |= NFT_BASE_CHAIN | flags;
+ chain->flags |= NFT_CHAIN_BASE | flags;
basechain->policy = NF_ACCEPT;
if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
nft_chain_offload_priority(basechain) < 0)
@@ -1894,6 +1906,22 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
return 0;
}
+static int nft_chain_add(struct nft_table *table, struct nft_chain *chain)
+{
+ int err;
+
+ err = rhltable_insert_key(&table->chains_ht, chain->name,
+ &chain->rhlhead, nft_chain_ht_params);
+ if (err)
+ return err;
+
+ list_add_tail_rcu(&chain->list, &table->chains);
+
+ return 0;
+}
+
+static u64 chain_id;
+
static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
u8 policy, u32 flags)
{
@@ -1902,6 +1930,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
struct nft_base_chain *basechain;
struct nft_stats __percpu *stats;
struct net *net = ctx->net;
+ char name[NFT_NAME_MAXLEN];
struct nft_trans *trans;
struct nft_chain *chain;
struct nft_rule **rules;
@@ -1913,6 +1942,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
if (nla[NFTA_CHAIN_HOOK]) {
struct nft_chain_hook hook;
+ if (flags & NFT_CHAIN_BINDING)
+ return -EOPNOTSUPP;
+
err = nft_chain_parse_hook(net, nla, &hook, family, true);
if (err < 0)
return err;
@@ -1942,16 +1974,33 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
return err;
}
} else {
+ if (flags & NFT_CHAIN_BASE)
+ return -EINVAL;
+ if (flags & NFT_CHAIN_HW_OFFLOAD)
+ return -EOPNOTSUPP;
+
chain = kzalloc(sizeof(*chain), GFP_KERNEL);
if (chain == NULL)
return -ENOMEM;
+
+ chain->flags = flags;
}
ctx->chain = chain;
INIT_LIST_HEAD(&chain->rules);
chain->handle = nf_tables_alloc_handle(table);
chain->table = table;
- chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
+
+ if (nla[NFTA_CHAIN_NAME]) {
+ chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
+ } else {
+ if (!(flags & NFT_CHAIN_BINDING))
+ return -EINVAL;
+
+ snprintf(name, sizeof(name), "__chain%llu", ++chain_id);
+ chain->name = kstrdup(name, GFP_KERNEL);
+ }
+
if (!chain->name) {
err = -ENOMEM;
goto err1;
@@ -1971,16 +2020,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
if (err < 0)
goto err1;
- err = rhltable_insert_key(&table->chains_ht, chain->name,
- &chain->rhlhead, nft_chain_ht_params);
- if (err)
- goto err2;
-
trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
- rhltable_remove(&table->chains_ht, &chain->rhlhead,
- nft_chain_ht_params);
goto err2;
}
@@ -1988,8 +2030,13 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
if (nft_is_base_chain(chain))
nft_trans_chain_policy(trans) = policy;
+ err = nft_chain_add(table, chain);
+ if (err < 0) {
+ nft_trans_destroy(trans);
+ goto err2;
+ }
+
table->use++;
- list_add_tail_rcu(&chain->list, &table->chains);
return 0;
err2:
@@ -2037,7 +2084,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
if (nla[NFTA_CHAIN_HOOK]) {
if (!nft_is_base_chain(chain))
- return -EBUSY;
+ return -EEXIST;
err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family,
false);
@@ -2047,21 +2094,21 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
basechain = nft_base_chain(chain);
if (basechain->type != hook.type) {
nft_chain_release_hook(&hook);
- return -EBUSY;
+ return -EEXIST;
}
if (ctx->family == NFPROTO_NETDEV) {
if (!nft_hook_list_equal(&basechain->hook_list,
&hook.list)) {
nft_chain_release_hook(&hook);
- return -EBUSY;
+ return -EEXIST;
}
} else {
ops = &basechain->ops;
if (ops->hooknum != hook.num ||
ops->priority != hook.priority) {
nft_chain_release_hook(&hook);
- return -EBUSY;
+ return -EEXIST;
}
}
nft_chain_release_hook(&hook);
@@ -2133,6 +2180,22 @@ err:
return err;
}
+static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
+ const struct nlattr *nla)
+{
+ u32 id = ntohl(nla_get_be32(nla));
+ struct nft_trans *trans;
+
+ list_for_each_entry(trans, &net->nft.commit_list, list) {
+ struct nft_chain *chain = trans->ctx.chain;
+
+ if (trans->msg_type == NFT_MSG_NEWCHAIN &&
+ id == nft_trans_chain_id(trans))
+ return chain;
+ }
+ return ERR_PTR(-ENOENT);
+}
+
static int nf_tables_newchain(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
@@ -2141,9 +2204,9 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
+ struct nft_chain *chain = NULL;
const struct nlattr *attr;
struct nft_table *table;
- struct nft_chain *chain;
u8 policy = NF_ACCEPT;
struct nft_ctx ctx;
u64 handle = 0;
@@ -2168,7 +2231,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
return PTR_ERR(chain);
}
attr = nla[NFTA_CHAIN_HANDLE];
- } else {
+ } else if (nla[NFTA_CHAIN_NAME]) {
chain = nft_chain_lookup(net, table, attr, genmask);
if (IS_ERR(chain)) {
if (PTR_ERR(chain) != -ENOENT) {
@@ -2177,6 +2240,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
}
chain = NULL;
}
+ } else if (!nla[NFTA_CHAIN_ID]) {
+ return -EINVAL;
}
if (nla[NFTA_CHAIN_POLICY]) {
@@ -2207,6 +2272,9 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
else if (chain)
flags = chain->flags;
+ if (flags & ~NFT_CHAIN_FLAGS)
+ return -EOPNOTSUPP;
+
nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
if (chain != NULL) {
@@ -2217,7 +2285,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- flags |= chain->flags & NFT_BASE_CHAIN;
+ flags |= chain->flags & NFT_CHAIN_BASE;
return nf_tables_updchain(&ctx, genmask, policy, flags);
}
@@ -2294,7 +2362,7 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
/**
* nft_register_expr - register nf_tables expr type
- * @ops: expr type
+ * @type: expr type
*
* Registers the expr type for use with nf_tables. Returns zero on
* success or a negative errno code otherwise.
@@ -2313,7 +2381,7 @@ EXPORT_SYMBOL_GPL(nft_register_expr);
/**
* nft_unregister_expr - unregister nf_tables expr type
- * @ops: expr type
+ * @type: expr type
*
* Unregisters the expr typefor use with nf_tables.
*/
@@ -2428,6 +2496,7 @@ nla_put_failure:
struct nft_expr_info {
const struct nft_expr_ops *ops;
+ const struct nlattr *attr;
struct nlattr *tb[NFT_EXPR_MAXATTR + 1];
};
@@ -2475,7 +2544,9 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,
} else
ops = type->ops;
+ info->attr = nla;
info->ops = ops;
+
return 0;
err1:
@@ -2611,6 +2682,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
.len = NFT_USERDATA_MAXLEN },
[NFTA_RULE_ID] = { .type = NLA_U32 },
[NFTA_RULE_POSITION_ID] = { .type = NLA_U32 },
+ [NFTA_RULE_CHAIN_ID] = { .type = NLA_U32 },
};
static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
@@ -2925,8 +2997,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
kfree(rule);
}
-static void nf_tables_rule_release(const struct nft_ctx *ctx,
- struct nft_rule *rule)
+void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule)
{
nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE);
nf_tables_rule_destroy(ctx, rule);
@@ -3017,10 +3088,24 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
return PTR_ERR(table);
}
- chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask);
- if (IS_ERR(chain)) {
- NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
- return PTR_ERR(chain);
+ if (nla[NFTA_RULE_CHAIN]) {
+ chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN],
+ genmask);
+ if (IS_ERR(chain)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
+ return PTR_ERR(chain);
+ }
+ if (nft_chain_is_bound(chain))
+ return -EOPNOTSUPP;
+
+ } else if (nla[NFTA_RULE_CHAIN_ID]) {
+ chain = nft_chain_lookup_byid(net, nla[NFTA_RULE_CHAIN_ID]);
+ if (IS_ERR(chain)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN_ID]);
+ return PTR_ERR(chain);
+ }
+ } else {
+ return -EINVAL;
}
if (nla[NFTA_RULE_HANDLE]) {
@@ -3119,8 +3204,10 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
expr = nft_expr_first(rule);
for (i = 0; i < n; i++) {
err = nf_tables_newexpr(&ctx, &info[i], expr);
- if (err < 0)
+ if (err < 0) {
+ NL_SET_BAD_ATTR(extack, info[i].attr);
goto err2;
+ }
if (info[i].ops->validate)
nft_validate_state_update(net, NFT_VALIDATE_NEED);
@@ -3232,6 +3319,8 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
return PTR_ERR(chain);
}
+ if (nft_chain_is_bound(chain))
+ return -EOPNOTSUPP;
}
nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
@@ -4278,7 +4367,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
case NFT_TRANS_ABORT:
case NFT_TRANS_RELEASE:
set->use--;
- /* fall through */
+ fallthrough;
default:
nf_tables_unbind_set(ctx, set, binding,
phase == NFT_TRANS_COMMIT);
@@ -5160,10 +5249,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^
nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) ||
nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^
- nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF)) {
- err = -EBUSY;
+ nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF))
goto err_element_clash;
- }
if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) &&
memcmp(nft_set_ext_data(ext),
@@ -5171,7 +5258,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
(nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) &&
nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF) &&
*nft_set_ext_obj(ext) != *nft_set_ext_obj(ext2)))
- err = -EBUSY;
+ goto err_element_clash;
else if (!(nlmsg_flags & NLM_F_EXCL))
err = 0;
} else if (err == -ENOTEMPTY) {
@@ -5268,11 +5355,24 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
*/
void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
{
+ struct nft_chain *chain;
+ struct nft_rule *rule;
+
if (type == NFT_DATA_VERDICT) {
switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
- data->verdict.chain->use++;
+ chain = data->verdict.chain;
+ chain->use++;
+
+ if (!nft_chain_is_bound(chain))
+ break;
+
+ chain->table->use++;
+ list_for_each_entry(rule, &chain->rules, list)
+ chain->use++;
+
+ nft_chain_add(chain->table, chain);
break;
}
}
@@ -5485,7 +5585,7 @@ struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
/**
* nft_register_obj- register nf_tables stateful object type
- * @obj: object type
+ * @obj_type: object type
*
* Registers the object type for use with nf_tables. Returns zero on
* success or a negative errno code otherwise.
@@ -5504,7 +5604,7 @@ EXPORT_SYMBOL_GPL(nft_register_obj);
/**
* nft_unregister_obj - unregister nf_tables object type
- * @obj: object type
+ * @obj_type: object type
*
* Unregisters the object type for use with nf_tables.
*/
@@ -6146,7 +6246,7 @@ void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx,
case NFT_TRANS_ABORT:
case NFT_TRANS_RELEASE:
flowtable->use--;
- /* fall through */
+ fallthrough;
default:
return;
}
@@ -6308,7 +6408,7 @@ static int nft_register_flowtable_net_hooks(struct net *net,
list_for_each_entry(hook2, &ft->hook_list, list) {
if (hook->ops.dev == hook2->ops.dev &&
hook->ops.pf == hook2->ops.pf) {
- err = -EBUSY;
+ err = -EEXIST;
goto err_unregister_net_hooks;
}
}
@@ -7152,7 +7252,7 @@ static int nf_tables_validate(struct net *net)
break;
case NFT_VALIDATE_NEED:
nft_validate_state_update(net, NFT_VALIDATE_DO);
- /* fall through */
+ fallthrough;
case NFT_VALIDATE_DO:
list_for_each_entry(table, &net->nft.tables, list) {
if (nft_table_validate(net, table) < 0)
@@ -7418,7 +7518,7 @@ static void nft_obj_del(struct nft_object *obj)
list_del_rcu(&obj->list);
}
-static void nft_chain_del(struct nft_chain *chain)
+void nft_chain_del(struct nft_chain *chain)
{
struct nft_table *table = chain->table;
@@ -7769,6 +7869,10 @@ static int __nf_tables_abort(struct net *net, bool autoload)
kfree(nft_trans_chain_name(trans));
nft_trans_destroy(trans);
} else {
+ if (nft_chain_is_bound(trans->ctx.chain)) {
+ nft_trans_destroy(trans);
+ break;
+ }
trans->ctx.table->use--;
nft_chain_del(trans->ctx.chain);
nf_tables_unregister_hook(trans->ctx.net,
@@ -8198,6 +8302,7 @@ static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
[NFTA_VERDICT_CODE] = { .type = NLA_U32 },
[NFTA_VERDICT_CHAIN] = { .type = NLA_STRING,
.len = NFT_CHAIN_MAXNAMELEN - 1 },
+ [NFTA_VERDICT_CHAIN_ID] = { .type = NLA_U32 },
};
static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
@@ -8227,17 +8332,26 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
default:
return -EINVAL;
}
- /* fall through */
+ fallthrough;
case NFT_CONTINUE:
case NFT_BREAK:
case NFT_RETURN:
break;
case NFT_JUMP:
case NFT_GOTO:
- if (!tb[NFTA_VERDICT_CHAIN])
+ if (tb[NFTA_VERDICT_CHAIN]) {
+ chain = nft_chain_lookup(ctx->net, ctx->table,
+ tb[NFTA_VERDICT_CHAIN],
+ genmask);
+ } else if (tb[NFTA_VERDICT_CHAIN_ID]) {
+ chain = nft_chain_lookup_byid(ctx->net,
+ tb[NFTA_VERDICT_CHAIN_ID]);
+ if (IS_ERR(chain))
+ return PTR_ERR(chain);
+ } else {
return -EINVAL;
- chain = nft_chain_lookup(ctx->net, ctx->table,
- tb[NFTA_VERDICT_CHAIN], genmask);
+ }
+
if (IS_ERR(chain))
return PTR_ERR(chain);
if (nft_is_base_chain(chain))
@@ -8255,10 +8369,23 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
static void nft_verdict_uninit(const struct nft_data *data)
{
+ struct nft_chain *chain;
+ struct nft_rule *rule;
+
switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
- data->verdict.chain->use--;
+ chain = data->verdict.chain;
+ chain->use--;
+
+ if (!nft_chain_is_bound(chain))
+ break;
+
+ chain->table->use--;
+ list_for_each_entry(rule, &chain->rules, list)
+ chain->use--;
+
+ nft_chain_del(chain);
break;
}
}
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 96c74c4c7176..587897a2498b 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -213,7 +213,7 @@ next_rule:
jumpstack[stackptr].chain = chain;
jumpstack[stackptr].rules = rules + 1;
stackptr++;
- /* fall through */
+ fallthrough;
case NFT_GOTO:
nft_trace_packet(&info, chain, rule,
NFT_TRACETYPE_RULE);
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index c7cf1cde46de..9ef37c1b7b3b 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -312,7 +312,7 @@ static int nft_indr_block_offload_cmd(struct nft_base_chain *basechain,
nft_flow_block_offload_init(&bo, dev_net(dev), cmd, basechain, &extack);
- err = flow_indr_dev_setup_offload(dev, TC_SETUP_BLOCK, basechain, &bo,
+ err = flow_indr_dev_setup_offload(dev, NULL, TC_SETUP_BLOCK, basechain, &bo,
nft_indr_block_cleanup);
if (err < 0)
return err;
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 5827117f2635..5bfec829c12f 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org>
- * (C) 2011 Intra2net AG <http://www.intra2net.com>
+ * (C) 2011 Intra2net AG <https://www.intra2net.com>
*/
#include <linux/init.h>
#include <linux/module.h>
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index da915c224a82..89a381f7f945 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -451,7 +451,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
case IPPROTO_TCP:
timeouts = nf_tcp_pernet(net)->timeouts;
break;
- case IPPROTO_UDP: /* fallthrough */
+ case IPPROTO_UDP:
case IPPROTO_UDPLITE:
timeouts = nf_udp_pernet(net)->timeouts;
break;
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 8a28c127effc..16f4d84599ac 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -43,7 +43,7 @@ void nft_cmp_eval(const struct nft_expr *expr,
case NFT_CMP_LT:
if (d == 0)
goto mismatch;
- /* fall through */
+ fallthrough;
case NFT_CMP_LTE:
if (d > 0)
goto mismatch;
@@ -51,7 +51,7 @@ void nft_cmp_eval(const struct nft_expr *expr,
case NFT_CMP_GT:
if (d == 0)
goto mismatch;
- /* fall through */
+ fallthrough;
case NFT_CMP_GTE:
if (d < 0)
goto mismatch;
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 77258af1fce0..322bd674963e 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -129,7 +129,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
return;
}
#endif
- case NFT_CT_BYTES: /* fallthrough */
+ case NFT_CT_BYTES:
case NFT_CT_PKTS: {
const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
u64 count = 0;
@@ -1013,8 +1013,8 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx,
help6 = nf_conntrack_helper_try_module_get(name, family,
priv->l4proto);
break;
- case NFPROTO_NETDEV: /* fallthrough */
- case NFPROTO_BRIDGE: /* same */
+ case NFPROTO_NETDEV:
+ case NFPROTO_BRIDGE:
case NFPROTO_INET:
help4 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV4,
priv->l4proto);
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index cfac0964f48d..4dfdaeaf09a5 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -32,7 +32,7 @@ int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
unsigned int hooks;
switch (priv->result) {
- case NFT_FIB_RESULT_OIF: /* fallthrough */
+ case NFT_FIB_RESULT_OIF:
case NFT_FIB_RESULT_OIFNAME:
hooks = (1 << NF_INET_PRE_ROUTING);
break;
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index c7f0ef73d939..c63eb3b17178 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -54,6 +54,23 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
if (err < 0)
goto err1;
+ if (priv->dreg == NFT_REG_VERDICT) {
+ struct nft_chain *chain = priv->data.verdict.chain;
+
+ switch (priv->data.verdict.code) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ if (nft_chain_is_bound(chain)) {
+ err = -EBUSY;
+ goto err1;
+ }
+ chain->bound = true;
+ break;
+ default:
+ break;
+ }
+ }
+
return 0;
err1:
@@ -81,6 +98,39 @@ static void nft_immediate_deactivate(const struct nft_ctx *ctx,
return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg));
}
+static void nft_immediate_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+ const struct nft_data *data = &priv->data;
+ struct nft_rule *rule, *n;
+ struct nft_ctx chain_ctx;
+ struct nft_chain *chain;
+
+ if (priv->dreg != NFT_REG_VERDICT)
+ return;
+
+ switch (data->verdict.code) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ chain = data->verdict.chain;
+
+ if (!nft_chain_is_bound(chain))
+ break;
+
+ chain_ctx = *ctx;
+ chain_ctx.chain = chain;
+
+ list_for_each_entry_safe(rule, n, &chain->rules, list)
+ nf_tables_rule_release(&chain_ctx, rule);
+
+ nf_tables_chain_destroy(&chain_ctx);
+ break;
+ default:
+ break;
+ }
+}
+
static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
@@ -170,6 +220,7 @@ static const struct nft_expr_ops nft_imm_ops = {
.init = nft_immediate_init,
.activate = nft_immediate_activate,
.deactivate = nft_immediate_deactivate,
+ .destroy = nft_immediate_destroy,
.dump = nft_immediate_dump,
.validate = nft_immediate_validate,
.offload = nft_immediate_offload,
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index a7de3a58f553..ed7cb9f747f6 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -467,7 +467,7 @@ static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt,
case IPPROTO_UDP:
if (!nft_payload_udp_checksum(skb, pkt->xt.thoff))
return -1;
- /* Fall through. */
+ fallthrough;
case IPPROTO_UDPLITE:
*l4csum_offset = offsetof(struct udphdr, check);
break;
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index 86eafbb0fdd0..61fb7e8afbf0 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -30,7 +30,8 @@ int nft_reject_validate(const struct nft_ctx *ctx,
return nft_chain_validate_hooks(ctx->chain,
(1 << NF_INET_LOCAL_IN) |
(1 << NF_INET_FORWARD) |
- (1 << NF_INET_LOCAL_OUT));
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_PRE_ROUTING));
}
EXPORT_SYMBOL_GPL(nft_reject_validate);
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 8c04388296b0..9944523f5c2c 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -312,7 +312,7 @@
* Jay Ligatti, Josh Kuhn, and Chris Gage.
* Proceedings of the IEEE International Conference on Computer
* Communication Networks (ICCCN), August 2010.
- * http://www.cse.usf.edu/~ligatti/papers/grouper-conf.pdf
+ * https://www.cse.usf.edu/~ligatti/papers/grouper-conf.pdf
*
* [Rottenstreich 2010]
* Worst-Case TCAM Rule Expansion
@@ -325,7 +325,7 @@
* Kirill Kogan, Sergey Nikolenko, Ori Rottenstreich, William Culhane,
* and Patrick Eugster.
* Proceedings of the 2014 ACM conference on SIGCOMM, August 2014.
- * http://www.sigcomm.org/sites/default/files/ccr/papers/2014/August/2619239-2626294.pdf
+ * https://www.sigcomm.org/sites/default/files/ccr/papers/2014/August/2619239-2626294.pdf
*/
#include <linux/kernel.h>
@@ -401,7 +401,7 @@ int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,
* nft_pipapo_lookup() - Lookup function
* @net: Network namespace
* @set: nftables API set representation
- * @elem: nftables API element representation containing key data
+ * @key: nftables API element representation containing key data
* @ext: nftables API extension pointer, filled with matching reference
*
* For more details, see DOC: Theory of Operation.
@@ -1075,7 +1075,7 @@ out:
* @m: Matching data, including mapping table
* @map: Table of rule maps: array of first rule and amount of rules
* in next field a given rule maps to, for each field
- * @ext: For last field, nft_set_ext pointer matching rules map to
+ * @e: For last field, nft_set_ext pointer matching rules map to
*/
static void pipapo_map(struct nft_pipapo_match *m,
union nft_pipapo_map_bucket map[NFT_PIPAPO_MAX_FIELDS],
@@ -1099,7 +1099,7 @@ static void pipapo_map(struct nft_pipapo_match *m,
/**
* pipapo_realloc_scratch() - Reallocate scratch maps for partial match results
* @clone: Copy of matching data with pending insertions and deletions
- * @bsize_max Maximum bucket size, scratch maps cover two buckets
+ * @bsize_max: Maximum bucket size, scratch maps cover two buckets
*
* Return: 0 on success, -ENOMEM on failure.
*/
@@ -1249,8 +1249,6 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
if (err)
return err;
- this_cpu_write(nft_pipapo_scratch_index, false);
-
m->bsize_max = bsize_max;
} else {
put_cpu_ptr(m->scratch);
@@ -1449,7 +1447,7 @@ static void pipapo_unmap(union nft_pipapo_map_bucket *mt, int rules,
/**
* pipapo_drop() - Delete entry from lookup and mapping tables, given rule map
* @m: Matching data
- * @rulemap Table of rule maps, arrays of first rule and amount of rules
+ * @rulemap: Table of rule maps, arrays of first rule and amount of rules
* in next field a given entry maps to, for each field
*
* For each rule in lookup table buckets mapping to this set of rules, drop
diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c
index 51b454d8fa9c..cedf47ab3c6f 100644
--- a/net/netfilter/utils.c
+++ b/net/netfilter/utils.c
@@ -25,7 +25,7 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
skb->ip_summed = CHECKSUM_UNNECESSARY;
break;
}
- /* fall through */
+ fallthrough;
case CHECKSUM_NONE:
if (protocol != IPPROTO_TCP && protocol != IPPROTO_UDP)
skb->csum = 0;
@@ -51,7 +51,7 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
case CHECKSUM_COMPLETE:
if (len == skb->len - dataoff)
return nf_ip_checksum(skb, hook, dataoff, protocol);
- /* fall through */
+ fallthrough;
case CHECKSUM_NONE:
skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol,
skb->len - dataoff, 0);
@@ -79,7 +79,7 @@ __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
skb->ip_summed = CHECKSUM_UNNECESSARY;
break;
}
- /* fall through */
+ fallthrough;
case CHECKSUM_NONE:
skb->csum = ~csum_unfold(
csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
@@ -106,7 +106,7 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
case CHECKSUM_COMPLETE:
if (len == skb->len - dataoff)
return nf_ip6_checksum(skb, hook, dataoff, protocol);
- /* fall through */
+ fallthrough;
case CHECKSUM_NONE:
hsum = skb_checksum(skb, 0, dataoff, 0);
skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 99a468be4a59..73d343abd115 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1028,34 +1028,34 @@ int xt_check_target(struct xt_tgchk_param *par,
EXPORT_SYMBOL_GPL(xt_check_target);
/**
- * xt_copy_counters_from_user - copy counters and metadata from userspace
+ * xt_copy_counters - copy counters and metadata from a sockptr_t
*
- * @user: src pointer to userspace memory
+ * @arg: src sockptr
* @len: alleged size of userspace memory
* @info: where to store the xt_counters_info metadata
- * @compat: true if we setsockopt call is done by 32bit task on 64bit kernel
*
* Copies counter meta data from @user and stores it in @info.
*
* vmallocs memory to hold the counters, then copies the counter data
* from @user to the new memory and returns a pointer to it.
*
- * If @compat is true, @info gets converted automatically to the 64bit
- * representation.
+ * If called from a compat syscall, @info gets converted automatically to the
+ * 64bit representation.
*
* The metadata associated with the counters is stored in @info.
*
* Return: returns pointer that caller has to test via IS_ERR().
* If IS_ERR is false, caller has to vfree the pointer.
*/
-void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
- struct xt_counters_info *info, bool compat)
+void *xt_copy_counters(sockptr_t arg, unsigned int len,
+ struct xt_counters_info *info)
{
+ size_t offset;
void *mem;
u64 size;
#ifdef CONFIG_COMPAT
- if (compat) {
+ if (in_compat_syscall()) {
/* structures only differ in size due to alignment */
struct compat_xt_counters_info compat_tmp;
@@ -1063,12 +1063,12 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
return ERR_PTR(-EINVAL);
len -= sizeof(compat_tmp);
- if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0)
+ if (copy_from_sockptr(&compat_tmp, arg, sizeof(compat_tmp)) != 0)
return ERR_PTR(-EFAULT);
memcpy(info->name, compat_tmp.name, sizeof(info->name) - 1);
info->num_counters = compat_tmp.num_counters;
- user += sizeof(compat_tmp);
+ offset = sizeof(compat_tmp);
} else
#endif
{
@@ -1076,10 +1076,10 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
return ERR_PTR(-EINVAL);
len -= sizeof(*info);
- if (copy_from_user(info, user, sizeof(*info)) != 0)
+ if (copy_from_sockptr(info, arg, sizeof(*info)) != 0)
return ERR_PTR(-EFAULT);
- user += sizeof(*info);
+ offset = sizeof(*info);
}
info->name[sizeof(info->name) - 1] = '\0';
@@ -1093,13 +1093,13 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
if (!mem)
return ERR_PTR(-ENOMEM);
- if (copy_from_user(mem, user, len) == 0)
+ if (copy_from_sockptr_offset(mem, arg, offset, len) == 0)
return mem;
vfree(mem);
return ERR_PTR(-EFAULT);
}
-EXPORT_SYMBOL_GPL(xt_copy_counters_from_user);
+EXPORT_SYMBOL_GPL(xt_copy_counters);
#ifdef CONFIG_COMPAT
int xt_compat_target_offset(const struct xt_target *target)
@@ -1571,7 +1571,7 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos,
trav->curr = trav->curr->next;
if (trav->curr != trav->head)
break;
- /* fall through */
+ fallthrough;
default:
return NULL;
}
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index a5c8b653476a..76acecf3e757 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -6,7 +6,7 @@
* with the SECMARK target and state match.
*
* Based somewhat on CONNMARK:
- * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
+ * Copyright (C) 2002,2004 MARA Systems AB <https://www.marasystems.com>
* by Henrik Nordstrom <hno@marasystems.com>
*
* (C) 2006,2008 Red Hat, Inc., James Morris <jmorris@redhat.com>
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index eec2f3a88d73..e5ebc0810675 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -2,7 +2,7 @@
/*
* xt_connmark - Netfilter module to operate on connection marks
*
- * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
+ * Copyright (C) 2002,2004 MARA Systems AB <https://www.marasystems.com>
* by Henrik Nordstrom <hno@marasystems.com>
* Copyright © CC Computer Consultants GmbH, 2007 - 2008
* Jan Engelhardt <jengelh@medozas.de>
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index 5aab6df74e0f..a97c2259bbc8 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org>
- * (C) 2011 Intra2net AG <http://www.intra2net.com>
+ * (C) 2011 Intra2net AG <https://www.intra2net.com>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 67cb98489415..6aa12d0f54e2 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -5,7 +5,7 @@
* based on ipt_time by Fabrice MARIE <fabrice@netfilter.org>
* This is a module which is used for time matching
* It is using some modified code from dietlibc (localtime() function)
- * that you can find at http://www.fefe.de/dietlibc/
+ * that you can find at https://www.fefe.de/dietlibc/
* This file is distributed under the terms of the GNU General Public
* License (GPL). Copies of the GPL can be obtained from gnu.org/gpl.
*/
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index a1f2320ecc16..d07de2c0fbc7 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -92,7 +92,7 @@ static void netlbl_domhsh_free_entry(struct rcu_head *entry)
/**
* netlbl_domhsh_hash - Hashing function for the domain hash table
- * @domain: the domain name to hash
+ * @key: the domain name to hash
*
* Description:
* This is the hashing function for the domain hash table, it returns the
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 4f2c3b14ddbf..b5f30d7d30d0 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -60,6 +60,7 @@
#include <linux/genetlink.h>
#include <linux/net_namespace.h>
#include <linux/nospec.h>
+#include <linux/btf_ids.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -1620,7 +1621,7 @@ static void netlink_update_socket_mc(struct netlink_sock *nlk,
}
static int netlink_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
@@ -1631,7 +1632,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
return -ENOPROTOOPT;
if (optlen >= sizeof(int) &&
- get_user(val, (unsigned int __user *)optval))
+ copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
switch (optname) {
@@ -2803,21 +2804,29 @@ static const struct rhashtable_params netlink_rhashtable_params = {
};
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
-static const struct bpf_iter_reg netlink_reg_info = {
- .target = "netlink",
+BTF_ID_LIST(btf_netlink_sock_id)
+BTF_ID(struct, netlink_sock)
+
+static const struct bpf_iter_seq_info netlink_seq_info = {
.seq_ops = &netlink_seq_ops,
.init_seq_private = bpf_iter_init_seq_net,
.fini_seq_private = bpf_iter_fini_seq_net,
.seq_priv_size = sizeof(struct nl_seq_iter),
+};
+
+static struct bpf_iter_reg netlink_reg_info = {
+ .target = "netlink",
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__netlink, sk),
PTR_TO_BTF_ID_OR_NULL },
},
+ .seq_info = &netlink_seq_info,
};
static int __init bpf_iter_register(void)
{
+ netlink_reg_info.ctx_arg_info[0].btf_id = *btf_netlink_sock_id;
return bpf_iter_reg_target(&netlink_reg_info);
}
#endif
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index f90ef6934b8f..6d16e1ab1a8a 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -294,7 +294,7 @@ void nr_destroy_socket(struct sock *sk)
*/
static int nr_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct nr_sock *nr = nr_sk(sk);
@@ -306,7 +306,7 @@ static int nr_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(unsigned int))
return -EINVAL;
- if (get_user(opt, (unsigned int __user *)optval))
+ if (copy_from_sockptr(&opt, optval, sizeof(unsigned int)))
return -EFAULT;
switch (optname) {
diff --git a/net/nfc/core.c b/net/nfc/core.c
index c5f9c3ee82f8..eb377f87bcae 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -704,7 +704,6 @@ EXPORT_SYMBOL(nfc_tm_deactivated);
* nfc_alloc_send_skb - allocate a skb for data exchange responses
*
* @size: size to allocate
- * @gfp: gfp flags
*/
struct sk_buff *nfc_alloc_send_skb(struct nfc_dev *dev, struct sock *sk,
unsigned int flags, unsigned int size,
@@ -749,7 +748,7 @@ EXPORT_SYMBOL(nfc_alloc_recv_skb);
*
* @dev: The nfc device that found the targets
* @targets: array of nfc targets found
- * @ntargets: targets array size
+ * @n_targets: targets array size
*
* The device driver must call this function when one or many nfc targets
* are found. After calling this function, the device driver must stop
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 28604414dec1..d257ed3b732a 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -218,7 +218,7 @@ error:
}
static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
@@ -241,7 +241,7 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (get_user(opt, (u32 __user *) optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
err = -EFAULT;
break;
}
@@ -263,7 +263,7 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (get_user(opt, (u32 __user *) optval)) {
+ if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
err = -EFAULT;
break;
}
@@ -921,8 +921,6 @@ static const struct proto_ops llcp_rawsock_ops = {
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
- .setsockopt = sock_no_setsockopt,
- .getsockopt = sock_no_getsockopt,
.sendmsg = sock_no_sendmsg,
.recvmsg = llcp_sock_recvmsg,
.mmap = sock_no_mmap,
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 78ea8c94dcba..741da8f81c2b 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -1182,7 +1182,7 @@ EXPORT_SYMBOL(nci_free_device);
/**
* nci_register_device - register a nci device in the nfc subsystem
*
- * @dev: The nci device to register
+ * @ndev: The nci device to register
*/
int nci_register_device(struct nci_dev *ndev)
{
@@ -1249,7 +1249,7 @@ EXPORT_SYMBOL(nci_register_device);
/**
* nci_unregister_device - unregister a nci device in the nfc subsystem
*
- * @dev: The nci device to unregister
+ * @ndev: The nci device to unregister
*/
void nci_unregister_device(struct nci_dev *ndev)
{
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index ba5ffd3badd3..b2061b6746ea 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -276,8 +276,6 @@ static const struct proto_ops rawsock_ops = {
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
- .setsockopt = sock_no_setsockopt,
- .getsockopt = sock_no_getsockopt,
.sendmsg = rawsock_sendmsg,
.recvmsg = rawsock_recvmsg,
.mmap = sock_no_mmap,
@@ -296,8 +294,6 @@ static const struct proto_ops rawsock_raw_ops = {
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
- .setsockopt = sock_no_setsockopt,
- .getsockopt = sock_no_getsockopt,
.sendmsg = sock_no_sendmsg,
.recvmsg = rawsock_recvmsg,
.mmap = sock_no_mmap,
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 4340f25fe390..98d393e70de3 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -276,10 +276,6 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
ovs_ct_update_key(skb, NULL, key, false, false);
}
-#define IN6_ADDR_INITIALIZER(ADDR) \
- { (ADDR).s6_addr32[0], (ADDR).s6_addr32[1], \
- (ADDR).s6_addr32[2], (ADDR).s6_addr32[3] }
-
int ovs_ct_put_key(const struct sw_flow_key *swkey,
const struct sw_flow_key *output, struct sk_buff *skb)
{
@@ -301,24 +297,30 @@ int ovs_ct_put_key(const struct sw_flow_key *swkey,
if (swkey->ct_orig_proto) {
if (swkey->eth.type == htons(ETH_P_IP)) {
- struct ovs_key_ct_tuple_ipv4 orig = {
- output->ipv4.ct_orig.src,
- output->ipv4.ct_orig.dst,
- output->ct.orig_tp.src,
- output->ct.orig_tp.dst,
- output->ct_orig_proto,
- };
+ struct ovs_key_ct_tuple_ipv4 orig;
+
+ memset(&orig, 0, sizeof(orig));
+ orig.ipv4_src = output->ipv4.ct_orig.src;
+ orig.ipv4_dst = output->ipv4.ct_orig.dst;
+ orig.src_port = output->ct.orig_tp.src;
+ orig.dst_port = output->ct.orig_tp.dst;
+ orig.ipv4_proto = output->ct_orig_proto;
+
if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4,
sizeof(orig), &orig))
return -EMSGSIZE;
} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
- struct ovs_key_ct_tuple_ipv6 orig = {
- IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.src),
- IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.dst),
- output->ct.orig_tp.src,
- output->ct.orig_tp.dst,
- output->ct_orig_proto,
- };
+ struct ovs_key_ct_tuple_ipv6 orig;
+
+ memset(&orig, 0, sizeof(orig));
+ memcpy(orig.ipv6_src, output->ipv6.ct_orig.src.s6_addr32,
+ sizeof(orig.ipv6_src));
+ memcpy(orig.ipv6_dst, output->ipv6.ct_orig.dst.s6_addr32,
+ sizeof(orig.ipv6_dst));
+ orig.src_port = output->ct.orig_tp.src;
+ orig.dst_port = output->ct.orig_tp.dst;
+ orig.ipv6_proto = output->ct_orig_proto;
+
if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6,
sizeof(orig), &orig))
return -EMSGSIZE;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 94b024534987..42f8cc70bb2c 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -130,6 +130,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
const struct dp_upcall_info *,
uint32_t cutlen);
+static void ovs_dp_masks_rebalance(struct work_struct *work);
+
/* Must be called with rcu_read_lock or ovs_mutex. */
const char *ovs_dp_name(const struct datapath *dp)
{
@@ -223,13 +225,14 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
struct dp_stats_percpu *stats;
u64 *stats_counter;
u32 n_mask_hit;
+ u32 n_cache_hit;
int error;
stats = this_cpu_ptr(dp->stats_percpu);
/* Look up flow. */
flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
- &n_mask_hit);
+ &n_mask_hit, &n_cache_hit);
if (unlikely(!flow)) {
struct dp_upcall_info upcall;
@@ -260,6 +263,7 @@ out:
u64_stats_update_begin(&stats->syncp);
(*stats_counter)++;
stats->n_mask_hit += n_mask_hit;
+ stats->n_cache_hit += n_cache_hit;
u64_stats_update_end(&stats->syncp);
}
@@ -697,6 +701,7 @@ static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
stats->n_missed += local_stats.n_missed;
stats->n_lost += local_stats.n_lost;
mega_stats->n_mask_hit += local_stats.n_mask_hit;
+ mega_stats->n_cache_hit += local_stats.n_cache_hit;
}
}
@@ -1493,6 +1498,7 @@ static size_t ovs_dp_cmd_msg_size(void)
msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
+ msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
return msgsize;
}
@@ -1530,6 +1536,10 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
goto nla_put_failure;
+ if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
+ ovs_flow_tbl_masks_cache_size(&dp->table)))
+ goto nla_put_failure;
+
genlmsg_end(skb, ovs_header);
return 0;
@@ -1594,6 +1604,16 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
#endif
}
+ if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
+ int err;
+ u32 cache_size;
+
+ cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
+ err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size);
+ if (err)
+ return err;
+ }
+
dp->user_features = user_features;
if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
@@ -1882,6 +1902,8 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
[OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
+ [OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0,
+ PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
};
static const struct genl_ops dp_datapath_genl_ops[] = {
@@ -2338,6 +2360,23 @@ out:
return skb->len;
}
+static void ovs_dp_masks_rebalance(struct work_struct *work)
+{
+ struct ovs_net *ovs_net = container_of(work, struct ovs_net,
+ masks_rebalance.work);
+ struct datapath *dp;
+
+ ovs_lock();
+
+ list_for_each_entry(dp, &ovs_net->dps, list_node)
+ ovs_flow_masks_rebalance(&dp->table);
+
+ ovs_unlock();
+
+ schedule_delayed_work(&ovs_net->masks_rebalance,
+ msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
+}
+
static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
[OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
[OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
@@ -2432,6 +2471,9 @@ static int __net_init ovs_init_net(struct net *net)
INIT_LIST_HEAD(&ovs_net->dps);
INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
+ INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance);
+ schedule_delayed_work(&ovs_net->masks_rebalance,
+ msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
return ovs_ct_init(net);
}
@@ -2486,6 +2528,7 @@ static void __net_exit ovs_exit_net(struct net *dnet)
ovs_unlock();
+ cancel_delayed_work_sync(&ovs_net->masks_rebalance);
cancel_work_sync(&ovs_net->dp_notify_work);
}
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 2016dd107939..38f7d3e66ca6 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -20,8 +20,9 @@
#include "meter.h"
#include "vport-internal_dev.h"
-#define DP_MAX_PORTS USHRT_MAX
-#define DP_VPORT_HASH_BUCKETS 1024
+#define DP_MAX_PORTS USHRT_MAX
+#define DP_VPORT_HASH_BUCKETS 1024
+#define DP_MASKS_REBALANCE_INTERVAL 4000
/**
* struct dp_stats_percpu - per-cpu packet processing statistics for a given
@@ -37,12 +38,15 @@
* @n_mask_hit: Number of masks looked up for flow match.
* @n_mask_hit / (@n_hit + @n_missed) will be the average masks looked
* up per packet.
+ * @n_cache_hit: The number of received packets that had their mask found using
+ * the mask cache.
*/
struct dp_stats_percpu {
u64 n_hit;
u64 n_missed;
u64 n_lost;
u64 n_mask_hit;
+ u64 n_cache_hit;
struct u64_stats_sync syncp;
};
@@ -131,6 +135,7 @@ struct dp_upcall_info {
struct ovs_net {
struct list_head dps;
struct work_struct dp_notify_work;
+ struct delayed_work masks_rebalance;
#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
struct ovs_ct_limit_info *ct_limit_info;
#endif
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 9d375e74b607..03942c30d83e 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -890,6 +890,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
if (static_branch_unlikely(&tc_recirc_sharing_support)) {
tc_ext = skb_ext_find(skb, TC_SKB_EXT);
key->recirc_id = tc_ext ? tc_ext->chain : 0;
+ OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
} else {
key->recirc_id = 0;
}
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 79252d4887ff..9d3e50c4d29f 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1763,11 +1763,11 @@ static void mask_set_nlattr(struct nlattr *attr, u8 val)
* does not include any don't care bit.
* @net: Used to determine per-namespace field support.
* @match: receives the extracted flow match information.
- * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
+ * @nla_key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
* sequence. The fields should of the packet that triggered the creation
* of this flow.
- * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
- * attribute specifies the mask field of the wildcarded flow.
+ * @nla_mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_*
+ * Netlink attribute specifies the mask field of the wildcarded flow.
* @log: Boolean to allow kernel error logging. Normally true, but when
* probing for feature compatibility this should be passed in as false to
* suppress unnecessary error logging.
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 2398d7238300..6527d84c3ea6 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -29,6 +29,7 @@
#include <linux/icmp.h>
#include <linux/icmpv6.h>
#include <linux/rculist.h>
+#include <linux/sort.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ndisc.h>
@@ -37,8 +38,8 @@
#define MASK_ARRAY_SIZE_MIN 16
#define REHASH_INTERVAL (10 * 60 * HZ)
+#define MC_DEFAULT_HASH_ENTRIES 256
#define MC_HASH_SHIFT 8
-#define MC_HASH_ENTRIES (1u << MC_HASH_SHIFT)
#define MC_HASH_SEGS ((sizeof(uint32_t) * 8) / MC_HASH_SHIFT)
static struct kmem_cache *flow_cache;
@@ -169,16 +170,70 @@ static struct table_instance *table_instance_alloc(int new_size)
return ti;
}
+static void __mask_array_destroy(struct mask_array *ma)
+{
+ free_percpu(ma->masks_usage_cntr);
+ kfree(ma);
+}
+
+static void mask_array_rcu_cb(struct rcu_head *rcu)
+{
+ struct mask_array *ma = container_of(rcu, struct mask_array, rcu);
+
+ __mask_array_destroy(ma);
+}
+
+static void tbl_mask_array_reset_counters(struct mask_array *ma)
+{
+ int i, cpu;
+
+ /* As the per CPU counters are not atomic we can not go ahead and
+ * reset them from another CPU. To be able to still have an approximate
+ * zero based counter we store the value at reset, and subtract it
+ * later when processing.
+ */
+ for (i = 0; i < ma->max; i++) {
+ ma->masks_usage_zero_cntr[i] = 0;
+
+ for_each_possible_cpu(cpu) {
+ u64 *usage_counters = per_cpu_ptr(ma->masks_usage_cntr,
+ cpu);
+ unsigned int start;
+ u64 counter;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&ma->syncp);
+ counter = usage_counters[i];
+ } while (u64_stats_fetch_retry_irq(&ma->syncp, start));
+
+ ma->masks_usage_zero_cntr[i] += counter;
+ }
+ }
+}
+
static struct mask_array *tbl_mask_array_alloc(int size)
{
struct mask_array *new;
size = max(MASK_ARRAY_SIZE_MIN, size);
new = kzalloc(sizeof(struct mask_array) +
- sizeof(struct sw_flow_mask *) * size, GFP_KERNEL);
+ sizeof(struct sw_flow_mask *) * size +
+ sizeof(u64) * size, GFP_KERNEL);
if (!new)
return NULL;
+ new->masks_usage_zero_cntr = (u64 *)((u8 *)new +
+ sizeof(struct mask_array) +
+ sizeof(struct sw_flow_mask *) *
+ size);
+
+ new->masks_usage_cntr = __alloc_percpu(sizeof(u64) * size,
+ __alignof__(u64));
+ if (!new->masks_usage_cntr) {
+ kfree(new);
+ return NULL;
+ }
+
new->count = 0;
new->max = size;
@@ -202,10 +257,10 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
if (ovsl_dereference(old->masks[i]))
new->masks[new->count++] = old->masks[i];
}
+ call_rcu(&old->rcu, mask_array_rcu_cb);
}
rcu_assign_pointer(tbl->mask_array, new);
- kfree_rcu(old, rcu);
return 0;
}
@@ -223,6 +278,11 @@ static int tbl_mask_array_add_mask(struct flow_table *tbl,
return err;
ma = ovsl_dereference(tbl->mask_array);
+ } else {
+ /* On every add or delete we need to reset the counters so
+ * every new mask gets a fair chance of being prioritized.
+ */
+ tbl_mask_array_reset_counters(ma);
}
BUG_ON(ovsl_dereference(ma->masks[ma_count]));
@@ -260,6 +320,9 @@ found:
if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
ma_count <= (ma->max / 3))
tbl_mask_array_realloc(tbl, ma->max / 2);
+ else
+ tbl_mask_array_reset_counters(ma);
+
}
/* Remove 'mask' from the mask list, if it is not needed any more. */
@@ -278,15 +341,79 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
}
}
+static void __mask_cache_destroy(struct mask_cache *mc)
+{
+ free_percpu(mc->mask_cache);
+ kfree(mc);
+}
+
+static void mask_cache_rcu_cb(struct rcu_head *rcu)
+{
+ struct mask_cache *mc = container_of(rcu, struct mask_cache, rcu);
+
+ __mask_cache_destroy(mc);
+}
+
+static struct mask_cache *tbl_mask_cache_alloc(u32 size)
+{
+ struct mask_cache_entry __percpu *cache = NULL;
+ struct mask_cache *new;
+
+ /* Only allow size to be 0, or a power of 2, and does not exceed
+ * percpu allocation size.
+ */
+ if ((!is_power_of_2(size) && size != 0) ||
+ (size * sizeof(struct mask_cache_entry)) > PCPU_MIN_UNIT_SIZE)
+ return NULL;
+
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ new->cache_size = size;
+ if (new->cache_size > 0) {
+ cache = __alloc_percpu(array_size(sizeof(struct mask_cache_entry),
+ new->cache_size),
+ __alignof__(struct mask_cache_entry));
+ if (!cache) {
+ kfree(new);
+ return NULL;
+ }
+ }
+
+ new->mask_cache = cache;
+ return new;
+}
+int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size)
+{
+ struct mask_cache *mc = rcu_dereference(table->mask_cache);
+ struct mask_cache *new;
+
+ if (size == mc->cache_size)
+ return 0;
+
+ if ((!is_power_of_2(size) && size != 0) ||
+ (size * sizeof(struct mask_cache_entry)) > PCPU_MIN_UNIT_SIZE)
+ return -EINVAL;
+
+ new = tbl_mask_cache_alloc(size);
+ if (!new)
+ return -ENOMEM;
+
+ rcu_assign_pointer(table->mask_cache, new);
+ call_rcu(&mc->rcu, mask_cache_rcu_cb);
+
+ return 0;
+}
+
int ovs_flow_tbl_init(struct flow_table *table)
{
struct table_instance *ti, *ufid_ti;
+ struct mask_cache *mc;
struct mask_array *ma;
- table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) *
- MC_HASH_ENTRIES,
- __alignof__(struct mask_cache_entry));
- if (!table->mask_cache)
+ mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES);
+ if (!mc)
return -ENOMEM;
ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
@@ -304,6 +431,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
rcu_assign_pointer(table->ti, ti);
rcu_assign_pointer(table->ufid_ti, ufid_ti);
rcu_assign_pointer(table->mask_array, ma);
+ rcu_assign_pointer(table->mask_cache, mc);
table->last_rehash = jiffies;
table->count = 0;
table->ufid_count = 0;
@@ -312,9 +440,9 @@ int ovs_flow_tbl_init(struct flow_table *table)
free_ti:
__table_instance_destroy(ti);
free_mask_array:
- kfree(ma);
+ __mask_array_destroy(ma);
free_mask_cache:
- free_percpu(table->mask_cache);
+ __mask_cache_destroy(mc);
return -ENOMEM;
}
@@ -390,9 +518,11 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
{
struct table_instance *ti = rcu_dereference_raw(table->ti);
struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
+ struct mask_cache *mc = rcu_dereference(table->mask_cache);
+ struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
- free_percpu(table->mask_cache);
- kfree_rcu(rcu_dereference_raw(table->mask_array), rcu);
+ call_rcu(&mc->rcu, mask_cache_rcu_cb);
+ call_rcu(&ma->rcu, mask_array_rcu_cb);
table_instance_destroy(table, ti, ufid_ti, false);
}
@@ -604,8 +734,10 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
struct mask_array *ma,
const struct sw_flow_key *key,
u32 *n_mask_hit,
+ u32 *n_cache_hit,
u32 *index)
{
+ u64 *usage_counters = this_cpu_ptr(ma->masks_usage_cntr);
struct sw_flow *flow;
struct sw_flow_mask *mask;
int i;
@@ -614,8 +746,13 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
mask = rcu_dereference_ovsl(ma->masks[*index]);
if (mask) {
flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
- if (flow)
+ if (flow) {
+ u64_stats_update_begin(&ma->syncp);
+ usage_counters[*index]++;
+ u64_stats_update_end(&ma->syncp);
+ (*n_cache_hit)++;
return flow;
+ }
}
}
@@ -631,6 +768,9 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
if (flow) { /* Found */
*index = i;
+ u64_stats_update_begin(&ma->syncp);
+ usage_counters[*index]++;
+ u64_stats_update_end(&ma->syncp);
return flow;
}
}
@@ -648,8 +788,10 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
const struct sw_flow_key *key,
u32 skb_hash,
- u32 *n_mask_hit)
+ u32 *n_mask_hit,
+ u32 *n_cache_hit)
{
+ struct mask_cache *mc = rcu_dereference(tbl->mask_cache);
struct mask_array *ma = rcu_dereference(tbl->mask_array);
struct table_instance *ti = rcu_dereference(tbl->ti);
struct mask_cache_entry *entries, *ce;
@@ -658,10 +800,13 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
int seg;
*n_mask_hit = 0;
- if (unlikely(!skb_hash)) {
+ *n_cache_hit = 0;
+ if (unlikely(!skb_hash || mc->cache_size == 0)) {
u32 mask_index = 0;
+ u32 cache = 0;
- return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index);
+ return flow_lookup(tbl, ti, ma, key, n_mask_hit, &cache,
+ &mask_index);
}
/* Pre and post recirulation flows usually have the same skb_hash
@@ -672,17 +817,17 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
ce = NULL;
hash = skb_hash;
- entries = this_cpu_ptr(tbl->mask_cache);
+ entries = this_cpu_ptr(mc->mask_cache);
/* Find the cache entry 'ce' to operate on. */
for (seg = 0; seg < MC_HASH_SEGS; seg++) {
- int index = hash & (MC_HASH_ENTRIES - 1);
+ int index = hash & (mc->cache_size - 1);
struct mask_cache_entry *e;
e = &entries[index];
if (e->skb_hash == skb_hash) {
flow = flow_lookup(tbl, ti, ma, key, n_mask_hit,
- &e->mask_index);
+ n_cache_hit, &e->mask_index);
if (!flow)
e->skb_hash = 0;
return flow;
@@ -695,10 +840,12 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
}
/* Cache miss, do full lookup. */
- flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &ce->mask_index);
+ flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, n_cache_hit,
+ &ce->mask_index);
if (flow)
ce->skb_hash = skb_hash;
+ *n_cache_hit = 0;
return flow;
}
@@ -708,9 +855,10 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
u32 __always_unused n_mask_hit;
+ u32 __always_unused n_cache_hit;
u32 index = 0;
- return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index);
+ return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &n_cache_hit, &index);
}
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
@@ -787,6 +935,13 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table)
return READ_ONCE(ma->count);
}
+u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table)
+{
+ struct mask_cache *mc = rcu_dereference(table->mask_cache);
+
+ return READ_ONCE(mc->cache_size);
+}
+
static struct table_instance *table_instance_expand(struct table_instance *ti,
bool ufid)
{
@@ -934,6 +1089,98 @@ int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
return 0;
}
+static int compare_mask_and_count(const void *a, const void *b)
+{
+ const struct mask_count *mc_a = a;
+ const struct mask_count *mc_b = b;
+
+ return (s64)mc_b->counter - (s64)mc_a->counter;
+}
+
+/* Must be called with OVS mutex held. */
+void ovs_flow_masks_rebalance(struct flow_table *table)
+{
+ struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
+ struct mask_count *masks_and_count;
+ struct mask_array *new;
+ int masks_entries = 0;
+ int i;
+
+ /* Build array of all current entries with use counters. */
+ masks_and_count = kmalloc_array(ma->max, sizeof(*masks_and_count),
+ GFP_KERNEL);
+ if (!masks_and_count)
+ return;
+
+ for (i = 0; i < ma->max; i++) {
+ struct sw_flow_mask *mask;
+ unsigned int start;
+ int cpu;
+
+ mask = rcu_dereference_ovsl(ma->masks[i]);
+ if (unlikely(!mask))
+ break;
+
+ masks_and_count[i].index = i;
+ masks_and_count[i].counter = 0;
+
+ for_each_possible_cpu(cpu) {
+ u64 *usage_counters = per_cpu_ptr(ma->masks_usage_cntr,
+ cpu);
+ u64 counter;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&ma->syncp);
+ counter = usage_counters[i];
+ } while (u64_stats_fetch_retry_irq(&ma->syncp, start));
+
+ masks_and_count[i].counter += counter;
+ }
+
+ /* Subtract the zero count value. */
+ masks_and_count[i].counter -= ma->masks_usage_zero_cntr[i];
+
+ /* Rather than calling tbl_mask_array_reset_counters()
+ * below when no change is needed, do it inline here.
+ */
+ ma->masks_usage_zero_cntr[i] += masks_and_count[i].counter;
+ }
+
+ if (i == 0)
+ goto free_mask_entries;
+
+ /* Sort the entries */
+ masks_entries = i;
+ sort(masks_and_count, masks_entries, sizeof(*masks_and_count),
+ compare_mask_and_count, NULL);
+
+ /* If the order is the same, nothing to do... */
+ for (i = 0; i < masks_entries; i++) {
+ if (i != masks_and_count[i].index)
+ break;
+ }
+ if (i == masks_entries)
+ goto free_mask_entries;
+
+ /* Rebuilt the new list in order of usage. */
+ new = tbl_mask_array_alloc(ma->max);
+ if (!new)
+ goto free_mask_entries;
+
+ for (i = 0; i < masks_entries; i++) {
+ int index = masks_and_count[i].index;
+
+ if (ovsl_dereference(ma->masks[index]))
+ new->masks[new->count++] = ma->masks[index];
+ }
+
+ rcu_assign_pointer(table->mask_array, new);
+ call_rcu(&ma->rcu, mask_array_rcu_cb);
+
+free_mask_entries:
+ kfree(masks_and_count);
+}
+
/* Initializes the flow module.
* Returns zero if successful or a negative error code. */
int ovs_flow_init(void)
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 8a5cea6ae111..74ce48fecba9 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -27,9 +27,23 @@ struct mask_cache_entry {
u32 mask_index;
};
+struct mask_cache {
+ struct rcu_head rcu;
+ u32 cache_size; /* Must be ^2 value. */
+ struct mask_cache_entry __percpu *mask_cache;
+};
+
+struct mask_count {
+ int index;
+ u64 counter;
+};
+
struct mask_array {
struct rcu_head rcu;
int count, max;
+ u64 __percpu *masks_usage_cntr;
+ u64 *masks_usage_zero_cntr;
+ struct u64_stats_sync syncp;
struct sw_flow_mask __rcu *masks[];
};
@@ -45,7 +59,7 @@ struct table_instance {
struct flow_table {
struct table_instance __rcu *ti;
struct table_instance __rcu *ufid_ti;
- struct mask_cache_entry __percpu *mask_cache;
+ struct mask_cache __rcu *mask_cache;
struct mask_array __rcu *mask_array;
unsigned long last_rehash;
unsigned int count;
@@ -69,12 +83,15 @@ int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
const struct sw_flow_mask *mask);
void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
int ovs_flow_tbl_num_masks(const struct flow_table *table);
+u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table);
+int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size);
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
u32 *bucket, u32 *idx);
struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
const struct sw_flow_key *,
u32 skb_hash,
- u32 *n_mask_hit);
+ u32 *n_mask_hit,
+ u32 *n_cache_hit);
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
const struct sw_flow_key *);
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
@@ -86,4 +103,7 @@ bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *);
void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
bool full, const struct sw_flow_mask *mask);
+
+void ovs_flow_masks_rebalance(struct flow_table *table);
+
#endif /* flow_table.h */
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 47febb4504f0..0d44c5c013fa 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -87,6 +87,7 @@ EXPORT_SYMBOL_GPL(ovs_vport_ops_unregister);
/**
* ovs_vport_locate - find a port that has already been created
*
+ * @net: network namespace
* @name: name of port to find
*
* Must be called with ovs or RCU read lock.
@@ -418,7 +419,7 @@ u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb)
*
* @vport: vport that received the packet
* @skb: skb that was received
- * @tun_key: tunnel (if any) that carried packet
+ * @tun_info: tunnel (if any) that carried packet
*
* Must be called with rcu_read_lock. The packet cannot be shared and
* skb->data should point to the Ethernet header.
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 29bd405adbbd..0b8160d1a6e0 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -593,6 +593,7 @@ static void init_prb_bdqc(struct packet_sock *po,
req_u->req3.tp_block_size);
p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
+ rwlock_init(&p1->blk_fill_in_prog_lock);
p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
prb_init_ft_ops(p1, req_u);
@@ -659,10 +660,9 @@ static void prb_retire_rx_blk_timer_expired(struct timer_list *t)
*
*/
if (BLOCK_NUM_PKTS(pbd)) {
- while (atomic_read(&pkc->blk_fill_in_prog)) {
- /* Waiting for skb_copy_bits to finish... */
- cpu_relax();
- }
+ /* Waiting for skb_copy_bits to finish... */
+ write_lock(&pkc->blk_fill_in_prog_lock);
+ write_unlock(&pkc->blk_fill_in_prog_lock);
}
if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
@@ -921,10 +921,9 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
* the timer-handler already handled this case.
*/
if (!(status & TP_STATUS_BLK_TMO)) {
- while (atomic_read(&pkc->blk_fill_in_prog)) {
- /* Waiting for skb_copy_bits to finish... */
- cpu_relax();
- }
+ /* Waiting for skb_copy_bits to finish... */
+ write_lock(&pkc->blk_fill_in_prog_lock);
+ write_unlock(&pkc->blk_fill_in_prog_lock);
}
prb_close_block(pkc, pbd, po, status);
return;
@@ -944,7 +943,8 @@ static int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
{
struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
- atomic_dec(&pkc->blk_fill_in_prog);
+
+ read_unlock(&pkc->blk_fill_in_prog_lock);
}
static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
@@ -998,7 +998,7 @@ static void prb_fill_curr_block(char *curr,
pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
BLOCK_NUM_PKTS(pbd) += 1;
- atomic_inc(&pkc->blk_fill_in_prog);
+ read_lock(&pkc->blk_fill_in_prog_lock);
prb_run_all_ft_ops(pkc, ppd);
}
@@ -1536,7 +1536,7 @@ static void __fanout_set_data_bpf(struct packet_fanout *f, struct bpf_prog *new)
}
}
-static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data,
+static int fanout_set_data_cbpf(struct packet_sock *po, sockptr_t data,
unsigned int len)
{
struct bpf_prog *new;
@@ -1545,10 +1545,10 @@ static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data,
if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
return -EPERM;
- if (len != sizeof(fprog))
- return -EINVAL;
- if (copy_from_user(&fprog, data, len))
- return -EFAULT;
+
+ ret = copy_bpf_fprog_from_user(&fprog, data, len);
+ if (ret)
+ return ret;
ret = bpf_prog_create_from_user(&new, &fprog, NULL, false);
if (ret)
@@ -1558,7 +1558,7 @@ static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data,
return 0;
}
-static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data,
+static int fanout_set_data_ebpf(struct packet_sock *po, sockptr_t data,
unsigned int len)
{
struct bpf_prog *new;
@@ -1568,7 +1568,7 @@ static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data,
return -EPERM;
if (len != sizeof(fd))
return -EINVAL;
- if (copy_from_user(&fd, data, len))
+ if (copy_from_sockptr(&fd, data, len))
return -EFAULT;
new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
@@ -1579,7 +1579,7 @@ static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data,
return 0;
}
-static int fanout_set_data(struct packet_sock *po, char __user *data,
+static int fanout_set_data(struct packet_sock *po, sockptr_t data,
unsigned int len)
{
switch (po->fanout->type) {
@@ -3652,7 +3652,8 @@ static void packet_flush_mclist(struct sock *sk)
}
static int
-packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
+packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
+ unsigned int optlen)
{
struct sock *sk = sock->sk;
struct packet_sock *po = pkt_sk(sk);
@@ -3672,7 +3673,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
return -EINVAL;
if (len > sizeof(mreq))
len = sizeof(mreq);
- if (copy_from_user(&mreq, optval, len))
+ if (copy_from_sockptr(&mreq, optval, len))
return -EFAULT;
if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
return -EINVAL;
@@ -3703,7 +3704,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen < len) {
ret = -EINVAL;
} else {
- if (copy_from_user(&req_u.req, optval, len))
+ if (copy_from_sockptr(&req_u.req, optval, len))
ret = -EFAULT;
else
ret = packet_set_ring(sk, &req_u, 0,
@@ -3718,7 +3719,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
pkt_sk(sk)->copy_thresh = val;
@@ -3730,7 +3731,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
switch (val) {
case TPACKET_V1:
@@ -3756,7 +3757,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
if (val > INT_MAX)
return -EINVAL;
@@ -3776,7 +3777,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
lock_sock(sk);
@@ -3795,7 +3796,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen < sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
lock_sock(sk);
@@ -3809,7 +3810,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen < sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
lock_sock(sk);
@@ -3825,7 +3826,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
return -EINVAL;
if (optlen < sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
lock_sock(sk);
@@ -3844,7 +3845,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
po->tp_tstamp = val;
@@ -3856,7 +3857,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
return fanout_add(sk, val & 0xffff, val >> 16);
@@ -3874,7 +3875,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
if (val < 0 || val > 1)
return -EINVAL;
@@ -3888,7 +3889,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
lock_sock(sk);
@@ -3907,7 +3908,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(val)))
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
po->xmit = val ? packet_direct_xmit : dev_queue_xmit;
@@ -4040,28 +4041,6 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
return 0;
}
-
-#ifdef CONFIG_COMPAT
-static int compat_packet_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
-{
- struct packet_sock *po = pkt_sk(sock->sk);
-
- if (level != SOL_PACKET)
- return -ENOPROTOOPT;
-
- if (optname == PACKET_FANOUT_DATA &&
- po->fanout && po->fanout->type == PACKET_FANOUT_CBPF) {
- optval = (char __user *)get_compat_bpf_fprog(optval);
- if (!optval)
- return -EFAULT;
- optlen = sizeof(struct sock_fprog);
- }
-
- return packet_setsockopt(sock, level, optname, optval, optlen);
-}
-#endif
-
static int packet_notifier(struct notifier_block *this,
unsigned long msg, void *ptr)
{
@@ -4293,7 +4272,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
struct packet_ring_buffer *rb;
struct sk_buff_head *rb_queue;
__be16 num;
- int err = -EINVAL;
+ int err;
/* Added to avoid minimal code churn */
struct tpacket_req *req = &req_u->req;
@@ -4525,8 +4504,6 @@ static const struct proto_ops packet_ops_spkt = {
.gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
- .setsockopt = sock_no_setsockopt,
- .getsockopt = sock_no_getsockopt,
.sendmsg = packet_sendmsg_spkt,
.recvmsg = packet_recvmsg,
.mmap = sock_no_mmap,
@@ -4549,9 +4526,6 @@ static const struct proto_ops packet_ops = {
.shutdown = sock_no_shutdown,
.setsockopt = packet_setsockopt,
.getsockopt = packet_getsockopt,
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_packet_setsockopt,
-#endif
.sendmsg = packet_sendmsg,
.recvmsg = packet_recvmsg,
.mmap = packet_mmap,
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 907f4cd2a718..fd41ecb7f605 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -39,7 +39,7 @@ struct tpacket_kbdq_core {
char *nxt_offset;
struct sk_buff *skb;
- atomic_t blk_fill_in_prog;
+ rwlock_t blk_fill_in_prog_lock;
/* Default is set to 8ms */
#define DEFAULT_PRB_RETIRE_TOV (8)
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 4577e43cb777..e47d09aca4af 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -975,7 +975,7 @@ static int pep_init(struct sock *sk)
}
static int pep_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct pep_sock *pn = pep_sk(sk);
int val = 0, err = 0;
@@ -983,7 +983,7 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
if (level != SOL_PNPIPE)
return -ENOPROTOOPT;
if (optlen >= sizeof(int)) {
- if (get_user(val, (int __user *) optval))
+ if (copy_from_sockptr(&val, optval, sizeof(int)))
return -EFAULT;
}
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 76d499f6af9a..2599235d592e 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -439,12 +439,6 @@ const struct proto_ops phonet_dgram_ops = {
.ioctl = pn_socket_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
- .setsockopt = sock_no_setsockopt,
- .getsockopt = sock_no_getsockopt,
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = sock_no_setsockopt,
- .compat_getsockopt = sock_no_getsockopt,
-#endif
.sendmsg = pn_socket_sendmsg,
.recvmsg = sock_common_recvmsg,
.mmap = sock_no_mmap,
@@ -466,10 +460,6 @@ const struct proto_ops phonet_stream_ops = {
.shutdown = sock_no_shutdown,
.setsockopt = sock_common_setsockopt,
.getsockopt = sock_common_getsockopt,
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_sock_common_setsockopt,
- .compat_getsockopt = compat_sock_common_getsockopt,
-#endif
.sendmsg = pn_socket_sendmsg,
.recvmsg = sock_common_recvmsg,
.mmap = sock_no_mmap,
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 300a104b9a0f..b4c0db0b7d31 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -1209,8 +1209,6 @@ static const struct proto_ops qrtr_proto_ops = {
.gettstamp = sock_gettstamp,
.poll = datagram_poll,
.shutdown = sock_no_shutdown,
- .setsockopt = sock_no_setsockopt,
- .getsockopt = sock_no_getsockopt,
.release = qrtr_release,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 1a5bf3fa4578..b239120dd9ca 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -290,8 +290,7 @@ static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
return 0;
}
-static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
- int len)
+static int rds_cancel_sent_to(struct rds_sock *rs, sockptr_t optval, int len)
{
struct sockaddr_in6 sin6;
struct sockaddr_in sin;
@@ -308,14 +307,15 @@ static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
goto out;
} else if (len < sizeof(struct sockaddr_in6)) {
/* Assume IPv4 */
- if (copy_from_user(&sin, optval, sizeof(struct sockaddr_in))) {
+ if (copy_from_sockptr(&sin, optval,
+ sizeof(struct sockaddr_in))) {
ret = -EFAULT;
goto out;
}
ipv6_addr_set_v4mapped(sin.sin_addr.s_addr, &sin6.sin6_addr);
sin6.sin6_port = sin.sin_port;
} else {
- if (copy_from_user(&sin6, optval,
+ if (copy_from_sockptr(&sin6, optval,
sizeof(struct sockaddr_in6))) {
ret = -EFAULT;
goto out;
@@ -327,21 +327,20 @@ out:
return ret;
}
-static int rds_set_bool_option(unsigned char *optvar, char __user *optval,
+static int rds_set_bool_option(unsigned char *optvar, sockptr_t optval,
int optlen)
{
int value;
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(value, (int __user *) optval))
+ if (copy_from_sockptr(&value, optval, sizeof(int)))
return -EFAULT;
*optvar = !!value;
return 0;
}
-static int rds_cong_monitor(struct rds_sock *rs, char __user *optval,
- int optlen)
+static int rds_cong_monitor(struct rds_sock *rs, sockptr_t optval, int optlen)
{
int ret;
@@ -358,8 +357,7 @@ static int rds_cong_monitor(struct rds_sock *rs, char __user *optval,
return ret;
}
-static int rds_set_transport(struct rds_sock *rs, char __user *optval,
- int optlen)
+static int rds_set_transport(struct rds_sock *rs, sockptr_t optval, int optlen)
{
int t_type;
@@ -369,7 +367,7 @@ static int rds_set_transport(struct rds_sock *rs, char __user *optval,
if (optlen != sizeof(int))
return -EINVAL;
- if (copy_from_user(&t_type, (int __user *)optval, sizeof(t_type)))
+ if (copy_from_sockptr(&t_type, optval, sizeof(t_type)))
return -EFAULT;
if (t_type < 0 || t_type >= RDS_TRANS_COUNT)
@@ -380,7 +378,7 @@ static int rds_set_transport(struct rds_sock *rs, char __user *optval,
return rs->rs_transport ? 0 : -ENOPROTOOPT;
}
-static int rds_enable_recvtstamp(struct sock *sk, char __user *optval,
+static int rds_enable_recvtstamp(struct sock *sk, sockptr_t optval,
int optlen, int optname)
{
int val, valbool;
@@ -388,7 +386,7 @@ static int rds_enable_recvtstamp(struct sock *sk, char __user *optval,
if (optlen != sizeof(int))
return -EFAULT;
- if (get_user(val, (int __user *)optval))
+ if (copy_from_sockptr(&val, optval, sizeof(int)))
return -EFAULT;
valbool = val ? 1 : 0;
@@ -404,7 +402,7 @@ static int rds_enable_recvtstamp(struct sock *sk, char __user *optval,
return 0;
}
-static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval,
+static int rds_recv_track_latency(struct rds_sock *rs, sockptr_t optval,
int optlen)
{
struct rds_rx_trace_so trace;
@@ -413,7 +411,7 @@ static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval,
if (optlen != sizeof(struct rds_rx_trace_so))
return -EFAULT;
- if (copy_from_user(&trace, optval, sizeof(trace)))
+ if (copy_from_sockptr(&trace, optval, sizeof(trace)))
return -EFAULT;
if (trace.rx_traces > RDS_MSG_RX_DGRAM_TRACE_MAX)
@@ -432,7 +430,7 @@ static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval,
}
static int rds_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct rds_sock *rs = rds_sk_to_rs(sock->sk);
int ret;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index a7ae11846cd7..ccdd304eae0a 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -353,21 +353,20 @@ out:
return ret;
}
-int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen)
+int rds_get_mr(struct rds_sock *rs, sockptr_t optval, int optlen)
{
struct rds_get_mr_args args;
if (optlen != sizeof(struct rds_get_mr_args))
return -EINVAL;
- if (copy_from_user(&args, (struct rds_get_mr_args __user *)optval,
- sizeof(struct rds_get_mr_args)))
+ if (copy_from_sockptr(&args, optval, sizeof(struct rds_get_mr_args)))
return -EFAULT;
return __rds_rdma_map(rs, &args, NULL, NULL, NULL);
}
-int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen)
+int rds_get_mr_for_dest(struct rds_sock *rs, sockptr_t optval, int optlen)
{
struct rds_get_mr_for_dest_args args;
struct rds_get_mr_args new_args;
@@ -375,7 +374,7 @@ int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen)
if (optlen != sizeof(struct rds_get_mr_for_dest_args))
return -EINVAL;
- if (copy_from_user(&args, (struct rds_get_mr_for_dest_args __user *)optval,
+ if (copy_from_sockptr(&args, optval,
sizeof(struct rds_get_mr_for_dest_args)))
return -EFAULT;
@@ -394,7 +393,7 @@ int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen)
/*
* Free the MR indicated by the given R_Key
*/
-int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen)
+int rds_free_mr(struct rds_sock *rs, sockptr_t optval, int optlen)
{
struct rds_free_mr_args args;
struct rds_mr *mr;
@@ -403,8 +402,7 @@ int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen)
if (optlen != sizeof(struct rds_free_mr_args))
return -EINVAL;
- if (copy_from_user(&args, (struct rds_free_mr_args __user *)optval,
- sizeof(struct rds_free_mr_args)))
+ if (copy_from_sockptr(&args, optval, sizeof(struct rds_free_mr_args)))
return -EFAULT;
/* Special case - a null cookie means flush all unused MRs */
diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h
index bfafd4a6d827..ca4c3a667091 100644
--- a/net/rds/rdma_transport.h
+++ b/net/rds/rdma_transport.h
@@ -13,7 +13,7 @@
/* Below reject reason is for legacy interoperability issue with non-linux
* RDS endpoints where older version incompatibility is conveyed via value 1.
- * For future version(s), proper encoded reject reason should be be used.
+ * For future version(s), proper encoded reject reason should be used.
*/
#define RDS_RDMA_REJ_INCOMPAT 1
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 106e862996b9..d35d1fc39807 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -924,9 +924,9 @@ int rds_send_pong(struct rds_conn_path *cp, __be16 dport);
/* rdma.c */
void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force);
-int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen);
-int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen);
-int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen);
+int rds_get_mr(struct rds_sock *rs, sockptr_t optval, int optlen);
+int rds_get_mr_for_dest(struct rds_sock *rs, sockptr_t optval, int optlen);
+int rds_free_mr(struct rds_sock *rs, sockptr_t optval, int optlen);
void rds_rdma_drop_keys(struct rds_sock *rs);
int rds_rdma_extra_size(struct rds_rdma_args *args,
struct rds_iov_vector *iov);
diff --git a/net/rds/recv.c b/net/rds/recv.c
index c8404971d5ab..aba4afe4dfed 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -450,12 +450,13 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc,
int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr)
{
struct rds_notifier *notifier;
- struct rds_rdma_notify cmsg = { 0 }; /* fill holes with zero */
+ struct rds_rdma_notify cmsg;
unsigned int count = 0, max_messages = ~0U;
unsigned long flags;
LIST_HEAD(copy);
int err = 0;
+ memset(&cmsg, 0, sizeof(cmsg)); /* fill holes with zero */
/* put_cmsg copies to user space and thus may sleep. We can't do this
* with rs_lock held, so first grab as many notifications as we can stuff
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index ce85656ac9c1..cf7d974e0f61 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -365,7 +365,7 @@ void rose_destroy_socket(struct sock *sk)
*/
static int rose_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct rose_sock *rose = rose_sk(sk);
@@ -377,7 +377,7 @@ static int rose_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(opt, (int __user *)optval))
+ if (copy_from_sockptr(&opt, optval, sizeof(int)))
return -EFAULT;
switch (optname) {
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 394189b81849..e6725a6de015 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -267,7 +267,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
* @gfp: The allocation constraints
* @notify_rx: Where to send notifications instead of socket queue
* @upgrade: Request service upgrade for call
- * @intr: The call is interruptible
+ * @interruptibility: The call is interruptible, or can be canceled.
* @debug_id: The debug ID for tracing to be assigned to the call
*
* Allow a kernel service to begin a call on the nominated socket. This just
@@ -588,7 +588,7 @@ EXPORT_SYMBOL(rxrpc_sock_set_min_security_level);
* set RxRPC socket options
*/
static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
unsigned int min_sec_level;
@@ -639,8 +639,8 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
ret = -EISCONN;
if (rx->sk.sk_state != RXRPC_UNBOUND)
goto error;
- ret = get_user(min_sec_level,
- (unsigned int __user *) optval);
+ ret = copy_from_sockptr(&min_sec_level, optval,
+ sizeof(unsigned int));
if (ret < 0)
goto error;
ret = -EINVAL;
@@ -658,7 +658,7 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
if (rx->sk.sk_state != RXRPC_SERVER_BOUND2)
goto error;
ret = -EFAULT;
- if (copy_from_user(service_upgrade, optval,
+ if (copy_from_sockptr(service_upgrade, optval,
sizeof(service_upgrade)) != 0)
goto error;
ret = -EINVAL;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 9a2139ebd67d..6d29a3603a3e 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -909,8 +909,8 @@ extern const struct rxrpc_security rxrpc_no_security;
extern struct key_type key_type_rxrpc;
extern struct key_type key_type_rxrpc_s;
-int rxrpc_request_key(struct rxrpc_sock *, char __user *, int);
-int rxrpc_server_keyring(struct rxrpc_sock *, char __user *, int);
+int rxrpc_request_key(struct rxrpc_sock *, sockptr_t , int);
+int rxrpc_server_keyring(struct rxrpc_sock *, sockptr_t, int);
int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time64_t,
u32);
diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c
index 0c98313dd7a8..94c3df392651 100644
--- a/net/rxrpc/key.c
+++ b/net/rxrpc/key.c
@@ -896,7 +896,7 @@ static void rxrpc_describe(const struct key *key, struct seq_file *m)
/*
* grab the security key for a socket
*/
-int rxrpc_request_key(struct rxrpc_sock *rx, char __user *optval, int optlen)
+int rxrpc_request_key(struct rxrpc_sock *rx, sockptr_t optval, int optlen)
{
struct key *key;
char *description;
@@ -906,7 +906,7 @@ int rxrpc_request_key(struct rxrpc_sock *rx, char __user *optval, int optlen)
if (optlen <= 0 || optlen > PAGE_SIZE - 1)
return -EINVAL;
- description = memdup_user_nul(optval, optlen);
+ description = memdup_sockptr_nul(optval, optlen);
if (IS_ERR(description))
return PTR_ERR(description);
@@ -926,8 +926,7 @@ int rxrpc_request_key(struct rxrpc_sock *rx, char __user *optval, int optlen)
/*
* grab the security keyring for a server socket
*/
-int rxrpc_server_keyring(struct rxrpc_sock *rx, char __user *optval,
- int optlen)
+int rxrpc_server_keyring(struct rxrpc_sock *rx, sockptr_t optval, int optlen)
{
struct key *key;
char *description;
@@ -937,7 +936,7 @@ int rxrpc_server_keyring(struct rxrpc_sock *rx, char __user *optval,
if (optlen <= 0 || optlen > PAGE_SIZE - 1)
return -EINVAL;
- description = memdup_user_nul(optval, optlen);
+ description = memdup_sockptr_nul(optval, optlen);
if (IS_ERR(description))
return PTR_ERR(description);
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 84badf00647e..a3b37d88800e 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -468,6 +468,9 @@ choice
config DEFAULT_FQ_CODEL
bool "Fair Queue Controlled Delay" if NET_SCH_FQ_CODEL
+ config DEFAULT_FQ_PIE
+ bool "Flow Queue Proportional Integral controller Enhanced" if NET_SCH_FQ_PIE
+
config DEFAULT_SFQ
bool "Stochastic Fair Queue" if NET_SCH_SFQ
@@ -480,6 +483,7 @@ config DEFAULT_NET_SCH
default "pfifo_fast" if DEFAULT_PFIFO_FAST
default "fq" if DEFAULT_FQ
default "fq_codel" if DEFAULT_FQ_CODEL
+ default "fq_pie" if DEFAULT_FQ_PIE
default "sfq" if DEFAULT_SFQ
default "pfifo_fast"
endif
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 8ac7eb0a8309..063d8aaf2900 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1059,14 +1059,13 @@ err:
return err;
}
-void tcf_action_update_stats(struct tc_action *a, u64 bytes, u32 packets,
- bool drop, bool hw)
+void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets,
+ u64 drops, bool hw)
{
if (a->cpu_bstats) {
_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
- if (drop)
- this_cpu_ptr(a->cpu_qstats)->drops += packets;
+ this_cpu_ptr(a->cpu_qstats)->drops += drops;
if (hw)
_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
@@ -1075,8 +1074,7 @@ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u32 packets,
}
_bstats_update(&a->tcfa_bstats, bytes, packets);
- if (drop)
- a->tcfa_qstats.drops += packets;
+ a->tcfa_qstats.drops += drops;
if (hw)
_bstats_update(&a->tcfa_bstats_hw, bytes, packets);
}
@@ -1475,7 +1473,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_ROOT_MAX + 1];
- u32 portid = skb ? NETLINK_CB(skb).portid : 0;
+ u32 portid = NETLINK_CB(skb).portid;
int ret = 0, ovr = 0;
if ((n->nlmsg_type != RTM_GETACTION) &&
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index c60674cf25c4..f5826e457679 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -598,7 +598,8 @@ again:
if (!tcf_csum_ipv6(skb, update_flags))
goto drop;
break;
- case cpu_to_be16(ETH_P_8021AD): /* fall through */
+ case cpu_to_be16(ETH_P_8021AD):
+ fallthrough;
case cpu_to_be16(ETH_P_8021Q):
if (skb_vlan_tag_present(skb) && !orig_vlan_tag_present) {
protocol = skb->protocol;
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 5928efb6449c..e6ad42b11835 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -706,8 +706,10 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
if (err && err != -EINPROGRESS)
goto out_free;
- if (!err)
+ if (!err) {
*defrag = true;
+ cb.mru = IPCB(skb)->frag_max_size;
+ }
} else { /* NFPROTO_IPV6 */
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
@@ -717,8 +719,10 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
if (err && err != -EINPROGRESS)
goto out_free;
- if (!err)
+ if (!err) {
*defrag = true;
+ cb.mru = IP6CB(skb)->frag_max_size;
+ }
#else
err = -EOPNOTSUPP;
goto out_free;
@@ -792,7 +796,7 @@ static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
}
}
/* Non-ICMP, fall thru to initialize if needed. */
- /* fall through */
+ fallthrough;
case IP_CT_NEW:
/* Seen it before? This can happen for loopback, retrans,
* or local packets.
@@ -1464,12 +1468,12 @@ static int tcf_ct_search(struct net *net, struct tc_action **a, u32 index)
return tcf_idr_search(tn, a, index);
}
-static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
- u64 lastuse, bool hw)
+static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets,
+ u64 drops, u64 lastuse, bool hw)
{
struct tcf_ct *c = to_ct(a);
- tcf_action_update_stats(a, bytes, packets, false, hw);
+ tcf_action_update_stats(a, bytes, packets, drops, hw);
c->tcf_tm.lastuse = max_t(u64, c->tcf_tm.lastuse, lastuse);
}
@@ -1543,10 +1547,10 @@ static int __init ct_init_module(void)
return 0;
-err_tbl_init:
- destroy_workqueue(act_ct_wq);
err_register:
tcf_ct_flow_tables_uninit();
+err_tbl_init:
+ destroy_workqueue(act_ct_wq);
return err;
}
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 416065772719..410e3bbfb9ca 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -171,14 +171,15 @@ static int tcf_gact_act(struct sk_buff *skb, const struct tc_action *a,
return action;
}
-static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
- u64 lastuse, bool hw)
+static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u64 packets,
+ u64 drops, u64 lastuse, bool hw)
{
struct tcf_gact *gact = to_gact(a);
int action = READ_ONCE(gact->tcf_action);
struct tcf_t *tm = &gact->tcf_tm;
- tcf_action_update_stats(a, bytes, packets, action == TC_ACT_SHOT, hw);
+ tcf_action_update_stats(a, bytes, packets,
+ action == TC_ACT_SHOT ? packets : drops, hw);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index 323ae7f6315d..1fb8d428d2c1 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -578,13 +578,13 @@ static int tcf_gate_walker(struct net *net, struct sk_buff *skb,
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static void tcf_gate_stats_update(struct tc_action *a, u64 bytes, u32 packets,
- u64 lastuse, bool hw)
+static void tcf_gate_stats_update(struct tc_action *a, u64 bytes, u64 packets,
+ u64 drops, u64 lastuse, bool hw)
{
struct tcf_gate *gact = to_gate(a);
struct tcf_t *tm = &gact->tcf_tm;
- tcf_action_update_stats(a, bytes, packets, false, hw);
+ tcf_action_update_stats(a, bytes, packets, drops, hw);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 83dd82fc9f40..b2705318993b 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -312,13 +312,13 @@ out:
return retval;
}
-static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
- u64 lastuse, bool hw)
+static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets,
+ u64 drops, u64 lastuse, bool hw)
{
struct tcf_mirred *m = to_mirred(a);
struct tcf_t *tm = &m->tcf_tm;
- tcf_action_update_stats(a, bytes, packets, false, hw);
+ tcf_action_update_stats(a, bytes, packets, drops, hw);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index d41d6200d9de..c158bfed86d5 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -409,13 +409,13 @@ done:
return p->tcf_action;
}
-static void tcf_pedit_stats_update(struct tc_action *a, u64 bytes, u32 packets,
- u64 lastuse, bool hw)
+static void tcf_pedit_stats_update(struct tc_action *a, u64 bytes, u64 packets,
+ u64 drops, u64 lastuse, bool hw)
{
struct tcf_pedit *d = to_pedit(a);
struct tcf_t *tm = &d->tcf_tm;
- tcf_action_update_stats(a, bytes, packets, false, hw);
+ tcf_action_update_stats(a, bytes, packets, drops, hw);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
@@ -436,8 +436,7 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
return -ENOBUFS;
spin_lock_bh(&p->tcf_lock);
- memcpy(opt->keys, p->tcfp_keys,
- p->tcfp_nkeys * sizeof(struct tc_pedit_key));
+ memcpy(opt->keys, p->tcfp_keys, flex_array_size(opt, keys, p->tcfp_nkeys));
opt->index = p->tcf_index;
opt->nkeys = p->tcfp_nkeys;
opt->flags = p->tcfp_flags;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 8b7a0ac96c51..0b431d493768 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -288,13 +288,13 @@ static void tcf_police_cleanup(struct tc_action *a)
}
static void tcf_police_stats_update(struct tc_action *a,
- u64 bytes, u32 packets,
+ u64 bytes, u64 packets, u64 drops,
u64 lastuse, bool hw)
{
struct tcf_police *police = to_police(a);
struct tcf_t *tm = &police->tcf_tm;
- tcf_action_update_stats(a, bytes, packets, false, hw);
+ tcf_action_update_stats(a, bytes, packets, drops, hw);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index b2b3faa57294..d0652386c6e2 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -74,12 +74,13 @@ err:
}
static void tcf_skbedit_stats_update(struct tc_action *a, u64 bytes,
- u32 packets, u64 lastuse, bool hw)
+ u64 packets, u64 drops,
+ u64 lastuse, bool hw)
{
struct tcf_skbedit *d = to_skbedit(a);
struct tcf_t *tm = &d->tcf_tm;
- tcf_action_update_stats(a, bytes, packets, false, hw);
+ tcf_action_update_stats(a, bytes, packets, drops, hw);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index c91d3958fcbb..a5ff9f68ab02 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -302,13 +302,13 @@ static int tcf_vlan_walker(struct net *net, struct sk_buff *skb,
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static void tcf_vlan_stats_update(struct tc_action *a, u64 bytes, u32 packets,
- u64 lastuse, bool hw)
+static void tcf_vlan_stats_update(struct tc_action *a, u64 bytes, u64 packets,
+ u64 drops, u64 lastuse, bool hw)
{
struct tcf_vlan *v = to_vlan(a);
struct tcf_t *tm = &v->tcf_tm;
- tcf_action_update_stats(a, bytes, packets, false, hw);
+ tcf_action_update_stats(a, bytes, packets, drops, hw);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 4619cb3cb0a8..41a55c6cbeb8 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -621,7 +621,7 @@ static int tcf_block_setup(struct tcf_block *block,
struct flow_block_offload *bo);
static void tcf_block_offload_init(struct flow_block_offload *bo,
- struct net_device *dev,
+ struct net_device *dev, struct Qdisc *sch,
enum flow_block_command command,
enum flow_block_binder_type binder_type,
struct flow_block *flow_block,
@@ -633,6 +633,7 @@ static void tcf_block_offload_init(struct flow_block_offload *bo,
bo->block = flow_block;
bo->block_shared = shared;
bo->extack = extack;
+ bo->sch = sch;
INIT_LIST_HEAD(&bo->cb_list);
}
@@ -643,10 +644,11 @@ static void tc_block_indr_cleanup(struct flow_block_cb *block_cb)
{
struct tcf_block *block = block_cb->indr.data;
struct net_device *dev = block_cb->indr.dev;
+ struct Qdisc *sch = block_cb->indr.sch;
struct netlink_ext_ack extack = {};
struct flow_block_offload bo;
- tcf_block_offload_init(&bo, dev, FLOW_BLOCK_UNBIND,
+ tcf_block_offload_init(&bo, dev, sch, FLOW_BLOCK_UNBIND,
block_cb->indr.binder_type,
&block->flow_block, tcf_block_shared(block),
&extack);
@@ -665,14 +667,14 @@ static bool tcf_block_offload_in_use(struct tcf_block *block)
}
static int tcf_block_offload_cmd(struct tcf_block *block,
- struct net_device *dev,
+ struct net_device *dev, struct Qdisc *sch,
struct tcf_block_ext_info *ei,
enum flow_block_command command,
struct netlink_ext_ack *extack)
{
struct flow_block_offload bo = {};
- tcf_block_offload_init(&bo, dev, command, ei->binder_type,
+ tcf_block_offload_init(&bo, dev, sch, command, ei->binder_type,
&block->flow_block, tcf_block_shared(block),
extack);
@@ -689,7 +691,7 @@ static int tcf_block_offload_cmd(struct tcf_block *block,
return tcf_block_setup(block, &bo);
}
- flow_indr_dev_setup_offload(dev, TC_SETUP_BLOCK, block, &bo,
+ flow_indr_dev_setup_offload(dev, sch, TC_SETUP_BLOCK, block, &bo,
tc_block_indr_cleanup);
tcf_block_setup(block, &bo);
@@ -716,7 +718,7 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
goto err_unlock;
}
- err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_BIND, extack);
+ err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_BIND, extack);
if (err == -EOPNOTSUPP)
goto no_offload_dev_inc;
if (err)
@@ -743,7 +745,7 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
int err;
down_write(&block->cb_lock);
- err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_UNBIND, NULL);
+ err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_UNBIND, NULL);
if (err == -EOPNOTSUPP)
goto no_offload_dev_dec;
up_write(&block->cb_lock);
@@ -1627,6 +1629,7 @@ int tcf_classify_ingress(struct sk_buff *skb,
if (WARN_ON_ONCE(!ext))
return TC_ACT_SHOT;
ext->chain = last_executed_chain;
+ ext->mru = qdisc_skb_cb(skb)->mru;
}
return ret;
@@ -3659,9 +3662,11 @@ int tc_setup_flow_action(struct flow_action *flow_action,
tcf_sample_get_group(entry, act);
} else if (is_tcf_police(act)) {
entry->id = FLOW_ACTION_POLICE;
- entry->police.burst = tcf_police_tcfp_burst(act);
+ entry->police.burst = tcf_police_burst(act);
entry->police.rate_bytes_ps =
tcf_police_rate_bytes_ps(act);
+ entry->police.mtu = tcf_police_tcfp_mtu(act);
+ entry->police.index = act->tcfa_index;
} else if (is_tcf_ct(act)) {
entry->id = FLOW_ACTION_CT;
entry->ct.action = tcf_ct_action(act);
@@ -3745,6 +3750,119 @@ unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
}
EXPORT_SYMBOL(tcf_exts_num_actions);
+#ifdef CONFIG_NET_CLS_ACT
+static int tcf_qevent_parse_block_index(struct nlattr *block_index_attr,
+ u32 *p_block_index,
+ struct netlink_ext_ack *extack)
+{
+ *p_block_index = nla_get_u32(block_index_attr);
+ if (!*p_block_index) {
+ NL_SET_ERR_MSG(extack, "Block number may not be zero");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch,
+ enum flow_block_binder_type binder_type,
+ struct nlattr *block_index_attr,
+ struct netlink_ext_ack *extack)
+{
+ u32 block_index;
+ int err;
+
+ if (!block_index_attr)
+ return 0;
+
+ err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack);
+ if (err)
+ return err;
+
+ if (!block_index)
+ return 0;
+
+ qe->info.binder_type = binder_type;
+ qe->info.chain_head_change = tcf_chain_head_change_dflt;
+ qe->info.chain_head_change_priv = &qe->filter_chain;
+ qe->info.block_index = block_index;
+
+ return tcf_block_get_ext(&qe->block, sch, &qe->info, extack);
+}
+EXPORT_SYMBOL(tcf_qevent_init);
+
+void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch)
+{
+ if (qe->info.block_index)
+ tcf_block_put_ext(qe->block, sch, &qe->info);
+}
+EXPORT_SYMBOL(tcf_qevent_destroy);
+
+int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr,
+ struct netlink_ext_ack *extack)
+{
+ u32 block_index;
+ int err;
+
+ if (!block_index_attr)
+ return 0;
+
+ err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack);
+ if (err)
+ return err;
+
+ /* Bounce newly-configured block or change in block. */
+ if (block_index != qe->info.block_index) {
+ NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(tcf_qevent_validate_change);
+
+struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb,
+ struct sk_buff **to_free, int *ret)
+{
+ struct tcf_result cl_res;
+ struct tcf_proto *fl;
+
+ if (!qe->info.block_index)
+ return skb;
+
+ fl = rcu_dereference_bh(qe->filter_chain);
+
+ switch (tcf_classify(skb, fl, &cl_res, false)) {
+ case TC_ACT_SHOT:
+ qdisc_qstats_drop(sch);
+ __qdisc_drop(skb, to_free);
+ *ret = __NET_XMIT_BYPASS;
+ return NULL;
+ case TC_ACT_STOLEN:
+ case TC_ACT_QUEUED:
+ case TC_ACT_TRAP:
+ __qdisc_drop(skb, to_free);
+ *ret = __NET_XMIT_STOLEN;
+ return NULL;
+ case TC_ACT_REDIRECT:
+ skb_do_redirect(skb);
+ *ret = __NET_XMIT_STOLEN;
+ return NULL;
+ }
+
+ return skb;
+}
+EXPORT_SYMBOL(tcf_qevent_handle);
+
+int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe)
+{
+ if (!qe->info.block_index)
+ return 0;
+ return nla_put_u32(skb, attr_name, qe->info.block_index);
+}
+EXPORT_SYMBOL(tcf_qevent_dump);
+#endif
+
static __net_init int tcf_net_init(struct net *net)
{
struct tcf_net *tn = net_generic(net, tcf_net_id);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index e30bd969fc48..a4f7ef1de7e7 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -64,6 +64,7 @@ struct fl_flow_key {
};
} tp_range;
struct flow_dissector_key_ct ct;
+ struct flow_dissector_key_hash hash;
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
struct fl_flow_mask_range {
@@ -318,6 +319,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
skb_flow_dissect_ct(skb, &mask->dissector, &skb_key,
fl_ct_info_to_flower_map,
ARRAY_SIZE(fl_ct_info_to_flower_map));
+ skb_flow_dissect_hash(skb, &mask->dissector, &skb_key);
skb_flow_dissect(skb, &mask->dissector, &skb_key, 0);
f = fl_mask_lookup(mask, &skb_key);
@@ -491,6 +493,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f,
tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes,
cls_flower.stats.pkts,
+ cls_flower.stats.drops,
cls_flower.stats.lastused,
cls_flower.stats.used_hw_stats,
cls_flower.stats.used_hw_stats_valid);
@@ -694,6 +697,9 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_CT_LABELS_MASK] = { .type = NLA_BINARY,
.len = 128 / BITS_PER_BYTE },
[TCA_FLOWER_FLAGS] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_HASH] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_HASH_MASK] = { .type = NLA_U32 },
+
};
static const struct nla_policy
@@ -1625,6 +1631,10 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
fl_set_key_ip(tb, true, &key->enc_ip, &mask->enc_ip);
+ fl_set_key_val(tb, &key->hash.hash, TCA_FLOWER_KEY_HASH,
+ &mask->hash.hash, TCA_FLOWER_KEY_HASH_MASK,
+ sizeof(key->hash.hash));
+
if (tb[TCA_FLOWER_KEY_ENC_OPTS]) {
ret = fl_set_enc_opt(tb, key, mask, extack);
if (ret)
@@ -1739,6 +1749,8 @@ static void fl_init_dissector(struct flow_dissector *dissector,
FLOW_DISSECTOR_KEY_ENC_OPTS, enc_opts);
FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_CT, ct);
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_HASH, hash);
skb_flow_dissector_init(dissector, keys, cnt);
}
@@ -2959,6 +2971,11 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))
goto nla_put_failure;
+ if (fl_dump_key_val(skb, &key->hash.hash, TCA_FLOWER_KEY_HASH,
+ &mask->hash.hash, TCA_FLOWER_KEY_HASH_MASK,
+ sizeof(key->hash.hash)))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 8d39dbcf1746..cafb84480bab 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -338,7 +338,8 @@ static void mall_stats_hw_filter(struct tcf_proto *tp,
tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false, true);
tcf_exts_stats_update(&head->exts, cls_mall.stats.bytes,
- cls_mall.stats.pkts, cls_mall.stats.lastused,
+ cls_mall.stats.pkts, cls_mall.stats.drops,
+ cls_mall.stats.lastused,
cls_mall.stats.used_hw_stats,
cls_mall.stats.used_hw_stats_valid);
}
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 61e95029c18f..78bec347b8b6 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -533,7 +533,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
"p %p,r %p,*arg %p\n",
- tp, handle, tca, arg, opt, p, r, arg ? *arg : NULL);
+ tp, handle, tca, arg, opt, p, r, *arg);
if (!opt)
return 0;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index e15ff335953d..7b69ab1993ba 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -796,9 +796,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
struct tc_u32_sel *s = &n->sel;
struct tc_u_knode *new;
- new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
- GFP_KERNEL);
-
+ new = kzalloc(struct_size(new, sel.keys, s->nkeys), GFP_KERNEL);
if (!new)
return NULL;
@@ -854,9 +852,6 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
u32 htid, flags = 0;
size_t sel_size;
int err;
-#ifdef CONFIG_CLS_U32_PERF
- size_t size;
-#endif
if (!opt) {
if (handle) {
@@ -1024,15 +1019,15 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
goto erridr;
}
- n = kzalloc(offsetof(typeof(*n), sel) + sel_size, GFP_KERNEL);
+ n = kzalloc(struct_size(n, sel.keys, s->nkeys), GFP_KERNEL);
if (n == NULL) {
err = -ENOBUFS;
goto erridr;
}
#ifdef CONFIG_CLS_U32_PERF
- size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64);
- n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt));
+ n->pf = __alloc_percpu(struct_size(n->pf, kcnts, s->nkeys),
+ __alignof__(struct tc_u32_pcnt));
if (!n->pf) {
err = -ENOBUFS;
goto errfree;
@@ -1296,8 +1291,7 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
int cpu;
#endif
- if (nla_put(skb, TCA_U32_SEL,
- sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
+ if (nla_put(skb, TCA_U32_SEL, struct_size(&n->sel, keys, n->sel.nkeys),
&n->sel))
goto nla_put_failure;
@@ -1347,9 +1341,7 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
goto nla_put_failure;
}
#ifdef CONFIG_CLS_U32_PERF
- gpf = kzalloc(sizeof(struct tc_u32_pcnt) +
- n->sel.nkeys * sizeof(u64),
- GFP_KERNEL);
+ gpf = kzalloc(struct_size(gpf, kcnts, n->sel.nkeys), GFP_KERNEL);
if (!gpf)
goto nla_put_failure;
@@ -1363,9 +1355,7 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
gpf->kcnts[i] += pf->kcnts[i];
}
- if (nla_put_64bit(skb, TCA_U32_PCNT,
- sizeof(struct tc_u32_pcnt) +
- n->sel.nkeys * sizeof(u64),
+ if (nla_put_64bit(skb, TCA_U32_PCNT, struct_size(gpf, kcnts, n->sel.nkeys),
gpf, TCA_U32_PAD)) {
kfree(gpf);
goto nla_put_failure;
diff --git a/net/sched/em_canid.c b/net/sched/em_canid.c
index b9a94fdf9397..5ea84decec19 100644
--- a/net/sched/em_canid.c
+++ b/net/sched/em_canid.c
@@ -40,6 +40,7 @@ struct canid_match {
/**
* em_canid_get_id() - Extracts Can ID out of the sk_buff structure.
+ * @skb: buffer to extract Can ID from
*/
static canid_t em_canid_get_id(struct sk_buff *skb)
{
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index dd3b8c11a2e0..f885bea5b452 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -389,7 +389,6 @@ EXPORT_SYMBOL(tcf_em_tree_validate);
/**
* tcf_em_tree_destroy - destroy an ematch tree
*
- * @tp: classifier kind handle
* @tree: ematch tree to be deleted
*
* This functions destroys an ematch tree previously created by
@@ -425,7 +424,7 @@ EXPORT_SYMBOL(tcf_em_tree_destroy);
* tcf_em_tree_dump - dump ematch tree into a rtnl message
*
* @skb: skb holding the rtnl message
- * @t: ematch tree to be dumped
+ * @tree: ematch tree to be dumped
* @tlv: TLV type to be used to encapsulate the tree
*
* This function dumps a ematch tree into a rtnl message. It is valid to
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 9a3449b56bd6..2a76a2f5ed88 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -267,7 +267,8 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
root->handle == handle)
return root;
- hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
+ hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle,
+ lockdep_rtnl_is_held()) {
if (q->handle == handle)
return q;
}
@@ -1093,8 +1094,7 @@ skip:
int err;
/* Only support running class lockless if parent is lockless */
- if (new && (new->flags & TCQ_F_NOLOCK) &&
- parent && !(parent->flags & TCQ_F_NOLOCK))
+ if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK))
qdisc_clear_nolock(new);
if (!cops || !cops->graft)
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index ebaeec1e5c82..561d20c9adca 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -312,8 +312,8 @@ static const u8 precedence[] = {
};
static const u8 diffserv8[] = {
- 2, 5, 1, 2, 4, 2, 2, 2,
- 0, 2, 1, 2, 1, 2, 1, 2,
+ 2, 0, 1, 2, 4, 2, 2, 2,
+ 1, 2, 1, 2, 1, 2, 1, 2,
5, 2, 4, 2, 4, 2, 4, 2,
3, 2, 3, 2, 3, 2, 3, 2,
6, 2, 3, 2, 3, 2, 3, 2,
@@ -323,7 +323,7 @@ static const u8 diffserv8[] = {
};
static const u8 diffserv4[] = {
- 0, 2, 0, 0, 2, 0, 0, 0,
+ 0, 1, 0, 0, 2, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0,
2, 0, 2, 0, 2, 0, 2, 0,
2, 0, 2, 0, 2, 0, 2, 0,
@@ -334,7 +334,7 @@ static const u8 diffserv4[] = {
};
static const u8 diffserv3[] = {
- 0, 0, 0, 0, 2, 0, 0, 0,
+ 0, 1, 0, 0, 2, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 39b427dc7512..b2130df933a7 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -250,7 +250,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
case TC_ACT_STOLEN:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
- /* fall through */
+ fallthrough;
case TC_ACT_SHOT:
return NULL;
case TC_ACT_RECLASSIFY:
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 07a2b0b35495..dde564670ad8 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -324,7 +324,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_STOLEN:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
- /* fall through */
+ fallthrough;
case TC_ACT_SHOT:
return NULL;
}
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index a87e9159338c..c1e84d1eeaba 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -397,7 +397,7 @@ static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
- /* fall through */
+ fallthrough;
case TC_ACT_SHOT:
return NULL;
}
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 459a784056c0..3106653c17f3 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -99,7 +99,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
- /* fall through */
+ fallthrough;
case TC_ACT_SHOT:
return 0;
}
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index fb760cee824e..f98c74018805 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -102,7 +102,7 @@ static unsigned int fq_pie_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
- /* fall through */
+ fallthrough;
case TC_ACT_SHOT:
return 0;
}
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 433f2190960f..0f5f121404f3 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1136,7 +1136,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
case TC_ACT_STOLEN:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
- /* fall through */
+ fallthrough;
case TC_ACT_SHOT:
return NULL;
}
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 8184c87da8be..ba37defaca7a 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -239,7 +239,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_STOLEN:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
- /* fall through */
+ fallthrough;
case TC_ACT_SHOT:
return NULL;
}
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 1330ad224931..5c27b4270b90 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -43,7 +43,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
- /* fall through */
+ fallthrough;
case TC_ACT_SHOT:
return NULL;
}
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 647941702f9f..3eabb871a1d5 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -46,7 +46,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
- /* fall through */
+ fallthrough;
case TC_ACT_SHOT:
return NULL;
}
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 0b05ac7c848e..6335230a971e 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -699,7 +699,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_STOLEN:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
- /* fall through */
+ fallthrough;
case TC_ACT_SHOT:
return NULL;
}
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 555a1b9e467f..deac82f3ad7b 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -46,6 +46,8 @@ struct red_sched_data {
struct red_vars vars;
struct red_stats stats;
struct Qdisc *qdisc;
+ struct tcf_qevent qe_early_drop;
+ struct tcf_qevent qe_mark;
};
#define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
@@ -92,6 +94,9 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (INET_ECN_set_ce(skb)) {
q->stats.prob_mark++;
+ skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
+ if (!skb)
+ return NET_XMIT_CN | ret;
} else if (!red_use_nodrop(q)) {
q->stats.prob_drop++;
goto congestion_drop;
@@ -109,6 +114,9 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (INET_ECN_set_ce(skb)) {
q->stats.forced_mark++;
+ skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
+ if (!skb)
+ return NET_XMIT_CN | ret;
} else if (!red_use_nodrop(q)) {
q->stats.forced_drop++;
goto congestion_drop;
@@ -129,6 +137,10 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return ret;
congestion_drop:
+ skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, to_free, &ret);
+ if (!skb)
+ return NET_XMIT_CN | ret;
+
qdisc_drop(skb, sch, to_free);
return NET_XMIT_CN;
}
@@ -202,6 +214,8 @@ static void red_destroy(struct Qdisc *sch)
{
struct red_sched_data *q = qdisc_priv(sch);
+ tcf_qevent_destroy(&q->qe_mark, sch);
+ tcf_qevent_destroy(&q->qe_early_drop, sch);
del_timer_sync(&q->adapt_timer);
red_offload(sch, false);
qdisc_put(q->qdisc);
@@ -213,14 +227,15 @@ static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
[TCA_RED_STAB] = { .len = RED_STAB_SIZE },
[TCA_RED_MAX_P] = { .type = NLA_U32 },
[TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
+ [TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 },
+ [TCA_RED_MARK_BLOCK] = { .type = NLA_U32 },
};
-static int red_change(struct Qdisc *sch, struct nlattr *opt,
- struct netlink_ext_ack *extack)
+static int __red_change(struct Qdisc *sch, struct nlattr **tb,
+ struct netlink_ext_ack *extack)
{
struct Qdisc *old_child = NULL, *child = NULL;
struct red_sched_data *q = qdisc_priv(sch);
- struct nlattr *tb[TCA_RED_MAX + 1];
struct nla_bitfield32 flags_bf;
struct tc_red_qopt *ctl;
unsigned char userbits;
@@ -228,14 +243,6 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
int err;
u32 max_P;
- if (opt == NULL)
- return -EINVAL;
-
- err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
- NULL);
- if (err < 0)
- return err;
-
if (tb[TCA_RED_PARMS] == NULL ||
tb[TCA_RED_STAB] == NULL)
return -EINVAL;
@@ -323,11 +330,74 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
struct red_sched_data *q = qdisc_priv(sch);
+ struct nlattr *tb[TCA_RED_MAX + 1];
+ int err;
q->qdisc = &noop_qdisc;
q->sch = sch;
timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
- return red_change(sch, opt, extack);
+
+ if (!opt)
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
+ extack);
+ if (err < 0)
+ return err;
+
+ err = __red_change(sch, tb, extack);
+ if (err)
+ return err;
+
+ err = tcf_qevent_init(&q->qe_early_drop, sch,
+ FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
+ tb[TCA_RED_EARLY_DROP_BLOCK], extack);
+ if (err)
+ goto err_early_drop_init;
+
+ err = tcf_qevent_init(&q->qe_mark, sch,
+ FLOW_BLOCK_BINDER_TYPE_RED_MARK,
+ tb[TCA_RED_MARK_BLOCK], extack);
+ if (err)
+ goto err_mark_init;
+
+ return 0;
+
+err_mark_init:
+ tcf_qevent_destroy(&q->qe_early_drop, sch);
+err_early_drop_init:
+ del_timer_sync(&q->adapt_timer);
+ red_offload(sch, false);
+ qdisc_put(q->qdisc);
+ return err;
+}
+
+static int red_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct red_sched_data *q = qdisc_priv(sch);
+ struct nlattr *tb[TCA_RED_MAX + 1];
+ int err;
+
+ if (!opt)
+ return -EINVAL;
+
+ err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
+ extack);
+ if (err < 0)
+ return err;
+
+ err = tcf_qevent_validate_change(&q->qe_early_drop,
+ tb[TCA_RED_EARLY_DROP_BLOCK], extack);
+ if (err)
+ return err;
+
+ err = tcf_qevent_validate_change(&q->qe_mark,
+ tb[TCA_RED_MARK_BLOCK], extack);
+ if (err)
+ return err;
+
+ return __red_change(sch, tb, extack);
}
static int red_dump_offload_stats(struct Qdisc *sch)
@@ -371,7 +441,9 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
nla_put_bitfield32(skb, TCA_RED_FLAGS,
- q->flags, TC_RED_SUPPORTED_FLAGS))
+ q->flags, TC_RED_SUPPORTED_FLAGS) ||
+ tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) ||
+ tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop))
goto nla_put_failure;
return nla_nest_end(skb, opts);
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 4074c50ac3d7..da047a37a3bf 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -265,7 +265,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
- /* fall through */
+ fallthrough;
case TC_ACT_SHOT:
return false;
}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 5a6def5e4e6d..cae5dbbadc1c 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -186,7 +186,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
- /* fall through */
+ fallthrough;
case TC_ACT_SHOT:
return 0;
}
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index b1eb12d33b9a..e981992634dd 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1108,11 +1108,10 @@ static void setup_txtime(struct taprio_sched *q,
static struct tc_taprio_qopt_offload *taprio_offload_alloc(int num_entries)
{
- size_t size = sizeof(struct tc_taprio_sched_entry) * num_entries +
- sizeof(struct __tc_taprio_qopt_offload);
struct __tc_taprio_qopt_offload *__offload;
- __offload = kzalloc(size, GFP_KERNEL);
+ __offload = kzalloc(struct_size(__offload, offload.entries, num_entries),
+ GFP_KERNEL);
if (!__offload)
return NULL;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index ccfa0ab3e7f4..aea2a982984d 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -1033,8 +1033,6 @@ static const struct proto_ops inet6_seqpacket_ops = {
.mmap = sock_no_mmap,
#ifdef CONFIG_COMPAT
.compat_ioctl = inet6_compat_ioctl,
- .compat_setsockopt = compat_sock_common_setsockopt,
- .compat_getsockopt = compat_sock_common_getsockopt,
#endif
};
@@ -1089,10 +1087,6 @@ static struct sctp_af sctp_af_inet6 = {
.net_header_len = sizeof(struct ipv6hdr),
.sockaddr_len = sizeof(struct sockaddr_in6),
.ip_options_len = sctp_v6_ip_options_len,
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_ipv6_setsockopt,
- .compat_getsockopt = compat_ipv6_getsockopt,
-#endif
};
static struct sctp_pf sctp_pf_inet6 = {
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index cde29f3c7fb3..d19db22262fd 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1036,10 +1036,6 @@ static const struct proto_ops inet_seqpacket_ops = {
.recvmsg = inet_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_sock_common_setsockopt,
- .compat_getsockopt = compat_sock_common_getsockopt,
-#endif
};
/* Registration with AF_INET family. */
@@ -1093,10 +1089,6 @@ static struct sctp_af sctp_af_inet = {
.net_header_len = sizeof(struct iphdr),
.sockaddr_len = sizeof(struct sockaddr_in),
.ip_options_len = sctp_v4_ip_options_len,
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_ip_setsockopt,
- .compat_getsockopt = compat_ip_getsockopt,
-#endif
};
struct sctp_pf *sctp_get_pf_specific(sa_family_t family)
@@ -1375,15 +1367,15 @@ static struct pernet_operations sctp_ctrlsock_ops = {
/* Initialize the universe into something sensible. */
static __init int sctp_init(void)
{
- int i;
- int status = -EINVAL;
- unsigned long goal;
- unsigned long limit;
unsigned long nr_pages = totalram_pages();
+ unsigned long limit;
+ unsigned long goal;
+ int max_entry_order;
+ int num_entries;
int max_share;
+ int status;
int order;
- int num_entries;
- int max_entry_order;
+ int i;
sock_skb_cb_check_size(sizeof(struct sctp_ulpevent));
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d57e1a002ffc..ec1fba1fbe71 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -979,9 +979,8 @@ int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw)
*
* Returns 0 if ok, <0 errno code on error.
*/
-static int sctp_setsockopt_bindx_kernel(struct sock *sk,
- struct sockaddr *addrs, int addrs_size,
- int op)
+static int sctp_setsockopt_bindx(struct sock *sk, struct sockaddr *addrs,
+ int addrs_size, int op)
{
int err;
int addrcnt = 0;
@@ -991,7 +990,7 @@ static int sctp_setsockopt_bindx_kernel(struct sock *sk,
struct sctp_af *af;
pr_debug("%s: sk:%p addrs:%p addrs_size:%d opt:%d\n",
- __func__, sk, addrs, addrs_size, op);
+ __func__, sk, addr_buf, addrs_size, op);
if (unlikely(addrs_size <= 0))
return -EINVAL;
@@ -1037,29 +1036,13 @@ static int sctp_setsockopt_bindx_kernel(struct sock *sk,
}
}
-static int sctp_setsockopt_bindx(struct sock *sk,
- struct sockaddr __user *addrs,
- int addrs_size, int op)
-{
- struct sockaddr *kaddrs;
- int err;
-
- kaddrs = memdup_user(addrs, addrs_size);
- if (IS_ERR(kaddrs))
- return PTR_ERR(kaddrs);
- err = sctp_setsockopt_bindx_kernel(sk, kaddrs, addrs_size, op);
- kfree(kaddrs);
- return err;
-}
-
static int sctp_bind_add(struct sock *sk, struct sockaddr *addrs,
int addrlen)
{
int err;
lock_sock(sk);
- err = sctp_setsockopt_bindx_kernel(sk, addrs, addrlen,
- SCTP_BINDX_ADD_ADDR);
+ err = sctp_setsockopt_bindx(sk, addrs, addrlen, SCTP_BINDX_ADD_ADDR);
release_sock(sk);
return err;
}
@@ -1303,36 +1286,29 @@ out_free:
* it.
*
* sk The sk of the socket
- * addrs The pointer to the addresses in user land
+ * addrs The pointer to the addresses
* addrssize Size of the addrs buffer
*
* Returns >=0 if ok, <0 errno code on error.
*/
-static int __sctp_setsockopt_connectx(struct sock *sk,
- struct sockaddr __user *addrs,
- int addrs_size,
- sctp_assoc_t *assoc_id)
+static int __sctp_setsockopt_connectx(struct sock *sk, struct sockaddr *kaddrs,
+ int addrs_size, sctp_assoc_t *assoc_id)
{
- struct sockaddr *kaddrs;
int err = 0, flags = 0;
pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n",
- __func__, sk, addrs, addrs_size);
+ __func__, sk, kaddrs, addrs_size);
/* make sure the 1st addr's sa_family is accessible later */
if (unlikely(addrs_size < sizeof(sa_family_t)))
return -EINVAL;
- kaddrs = memdup_user(addrs, addrs_size);
- if (IS_ERR(kaddrs))
- return PTR_ERR(kaddrs);
-
/* Allow security module to validate connectx addresses. */
err = security_sctp_bind_connect(sk, SCTP_SOCKOPT_CONNECTX,
(struct sockaddr *)kaddrs,
addrs_size);
if (err)
- goto out_free;
+ return err;
/* in-kernel sockets don't generally have a file allocated to them
* if all they do is call sock_create_kern().
@@ -1340,12 +1316,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
if (sk->sk_socket->file)
flags = sk->sk_socket->file->f_flags;
- err = __sctp_connect(sk, kaddrs, addrs_size, flags, assoc_id);
-
-out_free:
- kfree(kaddrs);
-
- return err;
+ return __sctp_connect(sk, kaddrs, addrs_size, flags, assoc_id);
}
/*
@@ -1353,10 +1324,10 @@ out_free:
* to the option that doesn't provide association id.
*/
static int sctp_setsockopt_connectx_old(struct sock *sk,
- struct sockaddr __user *addrs,
+ struct sockaddr *kaddrs,
int addrs_size)
{
- return __sctp_setsockopt_connectx(sk, addrs, addrs_size, NULL);
+ return __sctp_setsockopt_connectx(sk, kaddrs, addrs_size, NULL);
}
/*
@@ -1366,13 +1337,13 @@ static int sctp_setsockopt_connectx_old(struct sock *sk,
* always positive.
*/
static int sctp_setsockopt_connectx(struct sock *sk,
- struct sockaddr __user *addrs,
+ struct sockaddr *kaddrs,
int addrs_size)
{
sctp_assoc_t assoc_id = 0;
int err = 0;
- err = __sctp_setsockopt_connectx(sk, addrs, addrs_size, &assoc_id);
+ err = __sctp_setsockopt_connectx(sk, kaddrs, addrs_size, &assoc_id);
if (err)
return err;
@@ -1402,6 +1373,7 @@ static int sctp_getsockopt_connectx3(struct sock *sk, int len,
{
struct sctp_getaddrs_old param;
sctp_assoc_t assoc_id = 0;
+ struct sockaddr *kaddrs;
int err = 0;
#ifdef CONFIG_COMPAT
@@ -1425,9 +1397,12 @@ static int sctp_getsockopt_connectx3(struct sock *sk, int len,
return -EFAULT;
}
- err = __sctp_setsockopt_connectx(sk, (struct sockaddr __user *)
- param.addrs, param.addr_num,
- &assoc_id);
+ kaddrs = memdup_user(param.addrs, param.addr_num);
+ if (IS_ERR(kaddrs))
+ return PTR_ERR(kaddrs);
+
+ err = __sctp_setsockopt_connectx(sk, kaddrs, param.addr_num, &assoc_id);
+ kfree(kaddrs);
if (err == 0 || err == -EINPROGRESS) {
if (copy_to_user(optval, &assoc_id, sizeof(assoc_id)))
return -EFAULT;
@@ -2209,28 +2184,18 @@ out:
* exceeds the current PMTU size, the message will NOT be sent and
* instead a error will be indicated to the user.
*/
-static int sctp_setsockopt_disable_fragments(struct sock *sk,
- char __user *optval,
+static int sctp_setsockopt_disable_fragments(struct sock *sk, int *val,
unsigned int optlen)
{
- int val;
-
if (optlen < sizeof(int))
return -EINVAL;
-
- if (get_user(val, (int __user *)optval))
- return -EFAULT;
-
- sctp_sk(sk)->disable_fragments = (val == 0) ? 0 : 1;
-
+ sctp_sk(sk)->disable_fragments = (*val == 0) ? 0 : 1;
return 0;
}
-static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
+static int sctp_setsockopt_events(struct sock *sk, __u8 *sn_type,
unsigned int optlen)
{
- struct sctp_event_subscribe subscribe;
- __u8 *sn_type = (__u8 *)&subscribe;
struct sctp_sock *sp = sctp_sk(sk);
struct sctp_association *asoc;
int i;
@@ -2238,9 +2203,6 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
if (optlen > sizeof(struct sctp_event_subscribe))
return -EINVAL;
- if (copy_from_user(&subscribe, optval, optlen))
- return -EFAULT;
-
for (i = 0; i < optlen; i++)
sctp_ulpevent_type_set(&sp->subscribe, SCTP_SN_TYPE_BASE + i,
sn_type[i]);
@@ -2280,7 +2242,7 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
* integer defining the number of seconds of idle time before an
* association is closed.
*/
-static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
+static int sctp_setsockopt_autoclose(struct sock *sk, u32 *optval,
unsigned int optlen)
{
struct sctp_sock *sp = sctp_sk(sk);
@@ -2291,9 +2253,8 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
return -EOPNOTSUPP;
if (optlen != sizeof(int))
return -EINVAL;
- if (copy_from_user(&sp->autoclose, optval, optlen))
- return -EFAULT;
+ sp->autoclose = *optval;
if (sp->autoclose > net->sctp.max_autoclose)
sp->autoclose = net->sctp.max_autoclose;
@@ -2628,48 +2589,42 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
}
static int sctp_setsockopt_peer_addr_params(struct sock *sk,
- char __user *optval,
+ struct sctp_paddrparams *params,
unsigned int optlen)
{
- struct sctp_paddrparams params;
struct sctp_transport *trans = NULL;
struct sctp_association *asoc = NULL;
struct sctp_sock *sp = sctp_sk(sk);
int error;
int hb_change, pmtud_change, sackdelay_change;
- if (optlen == sizeof(params)) {
- if (copy_from_user(&params, optval, optlen))
- return -EFAULT;
- } else if (optlen == ALIGN(offsetof(struct sctp_paddrparams,
+ if (optlen == ALIGN(offsetof(struct sctp_paddrparams,
spp_ipv6_flowlabel), 4)) {
- if (copy_from_user(&params, optval, optlen))
- return -EFAULT;
- if (params.spp_flags & (SPP_DSCP | SPP_IPV6_FLOWLABEL))
+ if (params->spp_flags & (SPP_DSCP | SPP_IPV6_FLOWLABEL))
return -EINVAL;
- } else {
+ } else if (optlen != sizeof(*params)) {
return -EINVAL;
}
/* Validate flags and value parameters. */
- hb_change = params.spp_flags & SPP_HB;
- pmtud_change = params.spp_flags & SPP_PMTUD;
- sackdelay_change = params.spp_flags & SPP_SACKDELAY;
+ hb_change = params->spp_flags & SPP_HB;
+ pmtud_change = params->spp_flags & SPP_PMTUD;
+ sackdelay_change = params->spp_flags & SPP_SACKDELAY;
if (hb_change == SPP_HB ||
pmtud_change == SPP_PMTUD ||
sackdelay_change == SPP_SACKDELAY ||
- params.spp_sackdelay > 500 ||
- (params.spp_pathmtu &&
- params.spp_pathmtu < SCTP_DEFAULT_MINSEGMENT))
+ params->spp_sackdelay > 500 ||
+ (params->spp_pathmtu &&
+ params->spp_pathmtu < SCTP_DEFAULT_MINSEGMENT))
return -EINVAL;
/* If an address other than INADDR_ANY is specified, and
* no transport is found, then the request is invalid.
*/
- if (!sctp_is_any(sk, (union sctp_addr *)&params.spp_address)) {
- trans = sctp_addr_id2transport(sk, &params.spp_address,
- params.spp_assoc_id);
+ if (!sctp_is_any(sk, (union sctp_addr *)&params->spp_address)) {
+ trans = sctp_addr_id2transport(sk, &params->spp_address,
+ params->spp_assoc_id);
if (!trans)
return -EINVAL;
}
@@ -2678,19 +2633,19 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk,
* socket is a one to many style socket, and an association
* was not found, then the id was invalid.
*/
- asoc = sctp_id2assoc(sk, params.spp_assoc_id);
- if (!asoc && params.spp_assoc_id != SCTP_FUTURE_ASSOC &&
+ asoc = sctp_id2assoc(sk, params->spp_assoc_id);
+ if (!asoc && params->spp_assoc_id != SCTP_FUTURE_ASSOC &&
sctp_style(sk, UDP))
return -EINVAL;
/* Heartbeat demand can only be sent on a transport or
* association, but not a socket.
*/
- if (params.spp_flags & SPP_HB_DEMAND && !trans && !asoc)
+ if (params->spp_flags & SPP_HB_DEMAND && !trans && !asoc)
return -EINVAL;
/* Process parameters. */
- error = sctp_apply_peer_addr_params(&params, trans, asoc, sp,
+ error = sctp_apply_peer_addr_params(params, trans, asoc, sp,
hb_change, pmtud_change,
sackdelay_change);
@@ -2703,7 +2658,7 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk,
if (!trans && asoc) {
list_for_each_entry(trans, &asoc->peer.transport_addr_list,
transports) {
- sctp_apply_peer_addr_params(&params, trans, asoc, sp,
+ sctp_apply_peer_addr_params(params, trans, asoc, sp,
hb_change, pmtud_change,
sackdelay_change);
}
@@ -2794,83 +2749,86 @@ static void sctp_apply_asoc_delayed_ack(struct sctp_sack_info *params,
* timer to expire. The default value for this is 2, setting this
* value to 1 will disable the delayed sack algorithm.
*/
-
-static int sctp_setsockopt_delayed_ack(struct sock *sk,
- char __user *optval, unsigned int optlen)
+static int __sctp_setsockopt_delayed_ack(struct sock *sk,
+ struct sctp_sack_info *params)
{
struct sctp_sock *sp = sctp_sk(sk);
struct sctp_association *asoc;
- struct sctp_sack_info params;
-
- if (optlen == sizeof(struct sctp_sack_info)) {
- if (copy_from_user(&params, optval, optlen))
- return -EFAULT;
-
- if (params.sack_delay == 0 && params.sack_freq == 0)
- return 0;
- } else if (optlen == sizeof(struct sctp_assoc_value)) {
- pr_warn_ratelimited(DEPRECATED
- "%s (pid %d) "
- "Use of struct sctp_assoc_value in delayed_ack socket option.\n"
- "Use struct sctp_sack_info instead\n",
- current->comm, task_pid_nr(current));
- if (copy_from_user(&params, optval, optlen))
- return -EFAULT;
-
- if (params.sack_delay == 0)
- params.sack_freq = 1;
- else
- params.sack_freq = 0;
- } else
- return -EINVAL;
/* Validate value parameter. */
- if (params.sack_delay > 500)
+ if (params->sack_delay > 500)
return -EINVAL;
/* Get association, if sack_assoc_id != SCTP_FUTURE_ASSOC and the
* socket is a one to many style socket, and an association
* was not found, then the id was invalid.
*/
- asoc = sctp_id2assoc(sk, params.sack_assoc_id);
- if (!asoc && params.sack_assoc_id > SCTP_ALL_ASSOC &&
+ asoc = sctp_id2assoc(sk, params->sack_assoc_id);
+ if (!asoc && params->sack_assoc_id > SCTP_ALL_ASSOC &&
sctp_style(sk, UDP))
return -EINVAL;
if (asoc) {
- sctp_apply_asoc_delayed_ack(&params, asoc);
+ sctp_apply_asoc_delayed_ack(params, asoc);
return 0;
}
if (sctp_style(sk, TCP))
- params.sack_assoc_id = SCTP_FUTURE_ASSOC;
+ params->sack_assoc_id = SCTP_FUTURE_ASSOC;
- if (params.sack_assoc_id == SCTP_FUTURE_ASSOC ||
- params.sack_assoc_id == SCTP_ALL_ASSOC) {
- if (params.sack_delay) {
- sp->sackdelay = params.sack_delay;
+ if (params->sack_assoc_id == SCTP_FUTURE_ASSOC ||
+ params->sack_assoc_id == SCTP_ALL_ASSOC) {
+ if (params->sack_delay) {
+ sp->sackdelay = params->sack_delay;
sp->param_flags =
sctp_spp_sackdelay_enable(sp->param_flags);
}
- if (params.sack_freq == 1) {
+ if (params->sack_freq == 1) {
sp->param_flags =
sctp_spp_sackdelay_disable(sp->param_flags);
- } else if (params.sack_freq > 1) {
- sp->sackfreq = params.sack_freq;
+ } else if (params->sack_freq > 1) {
+ sp->sackfreq = params->sack_freq;
sp->param_flags =
sctp_spp_sackdelay_enable(sp->param_flags);
}
}
- if (params.sack_assoc_id == SCTP_CURRENT_ASSOC ||
- params.sack_assoc_id == SCTP_ALL_ASSOC)
+ if (params->sack_assoc_id == SCTP_CURRENT_ASSOC ||
+ params->sack_assoc_id == SCTP_ALL_ASSOC)
list_for_each_entry(asoc, &sp->ep->asocs, asocs)
- sctp_apply_asoc_delayed_ack(&params, asoc);
+ sctp_apply_asoc_delayed_ack(params, asoc);
return 0;
}
+static int sctp_setsockopt_delayed_ack(struct sock *sk,
+ struct sctp_sack_info *params,
+ unsigned int optlen)
+{
+ if (optlen == sizeof(struct sctp_assoc_value)) {
+ struct sctp_assoc_value *v = (struct sctp_assoc_value *)params;
+ struct sctp_sack_info p;
+
+ pr_warn_ratelimited(DEPRECATED
+ "%s (pid %d) "
+ "Use of struct sctp_assoc_value in delayed_ack socket option.\n"
+ "Use struct sctp_sack_info instead\n",
+ current->comm, task_pid_nr(current));
+
+ p.sack_assoc_id = v->assoc_id;
+ p.sack_delay = v->assoc_value;
+ p.sack_freq = v->assoc_value ? 0 : 1;
+ return __sctp_setsockopt_delayed_ack(sk, &p);
+ }
+
+ if (optlen != sizeof(struct sctp_sack_info))
+ return -EINVAL;
+ if (params->sack_delay == 0 && params->sack_freq == 0)
+ return 0;
+ return __sctp_setsockopt_delayed_ack(sk, params);
+}
+
/* 7.1.3 Initialization Parameters (SCTP_INITMSG)
*
* Applications can specify protocol parameters for the default association
@@ -2882,24 +2840,22 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk,
* by the change). With TCP-style sockets, this option is inherited by
* sockets derived from a listener socket.
*/
-static int sctp_setsockopt_initmsg(struct sock *sk, char __user *optval, unsigned int optlen)
+static int sctp_setsockopt_initmsg(struct sock *sk, struct sctp_initmsg *sinit,
+ unsigned int optlen)
{
- struct sctp_initmsg sinit;
struct sctp_sock *sp = sctp_sk(sk);
if (optlen != sizeof(struct sctp_initmsg))
return -EINVAL;
- if (copy_from_user(&sinit, optval, optlen))
- return -EFAULT;
- if (sinit.sinit_num_ostreams)
- sp->initmsg.sinit_num_ostreams = sinit.sinit_num_ostreams;
- if (sinit.sinit_max_instreams)
- sp->initmsg.sinit_max_instreams = sinit.sinit_max_instreams;
- if (sinit.sinit_max_attempts)
- sp->initmsg.sinit_max_attempts = sinit.sinit_max_attempts;
- if (sinit.sinit_max_init_timeo)
- sp->initmsg.sinit_max_init_timeo = sinit.sinit_max_init_timeo;
+ if (sinit->sinit_num_ostreams)
+ sp->initmsg.sinit_num_ostreams = sinit->sinit_num_ostreams;
+ if (sinit->sinit_max_instreams)
+ sp->initmsg.sinit_max_instreams = sinit->sinit_max_instreams;
+ if (sinit->sinit_max_attempts)
+ sp->initmsg.sinit_max_attempts = sinit->sinit_max_attempts;
+ if (sinit->sinit_max_init_timeo)
+ sp->initmsg.sinit_max_init_timeo = sinit->sinit_max_init_timeo;
return 0;
}
@@ -2919,57 +2875,54 @@ static int sctp_setsockopt_initmsg(struct sock *sk, char __user *optval, unsigne
* to this call if the caller is using the UDP model.
*/
static int sctp_setsockopt_default_send_param(struct sock *sk,
- char __user *optval,
+ struct sctp_sndrcvinfo *info,
unsigned int optlen)
{
struct sctp_sock *sp = sctp_sk(sk);
struct sctp_association *asoc;
- struct sctp_sndrcvinfo info;
- if (optlen != sizeof(info))
+ if (optlen != sizeof(*info))
return -EINVAL;
- if (copy_from_user(&info, optval, optlen))
- return -EFAULT;
- if (info.sinfo_flags &
+ if (info->sinfo_flags &
~(SCTP_UNORDERED | SCTP_ADDR_OVER |
SCTP_ABORT | SCTP_EOF))
return -EINVAL;
- asoc = sctp_id2assoc(sk, info.sinfo_assoc_id);
- if (!asoc && info.sinfo_assoc_id > SCTP_ALL_ASSOC &&
+ asoc = sctp_id2assoc(sk, info->sinfo_assoc_id);
+ if (!asoc && info->sinfo_assoc_id > SCTP_ALL_ASSOC &&
sctp_style(sk, UDP))
return -EINVAL;
if (asoc) {
- asoc->default_stream = info.sinfo_stream;
- asoc->default_flags = info.sinfo_flags;
- asoc->default_ppid = info.sinfo_ppid;
- asoc->default_context = info.sinfo_context;
- asoc->default_timetolive = info.sinfo_timetolive;
+ asoc->default_stream = info->sinfo_stream;
+ asoc->default_flags = info->sinfo_flags;
+ asoc->default_ppid = info->sinfo_ppid;
+ asoc->default_context = info->sinfo_context;
+ asoc->default_timetolive = info->sinfo_timetolive;
return 0;
}
if (sctp_style(sk, TCP))
- info.sinfo_assoc_id = SCTP_FUTURE_ASSOC;
+ info->sinfo_assoc_id = SCTP_FUTURE_ASSOC;
- if (info.sinfo_assoc_id == SCTP_FUTURE_ASSOC ||
- info.sinfo_assoc_id == SCTP_ALL_ASSOC) {
- sp->default_stream = info.sinfo_stream;
- sp->default_flags = info.sinfo_flags;
- sp->default_ppid = info.sinfo_ppid;
- sp->default_context = info.sinfo_context;
- sp->default_timetolive = info.sinfo_timetolive;
+ if (info->sinfo_assoc_id == SCTP_FUTURE_ASSOC ||
+ info->sinfo_assoc_id == SCTP_ALL_ASSOC) {
+ sp->default_stream = info->sinfo_stream;
+ sp->default_flags = info->sinfo_flags;
+ sp->default_ppid = info->sinfo_ppid;
+ sp->default_context = info->sinfo_context;
+ sp->default_timetolive = info->sinfo_timetolive;
}
- if (info.sinfo_assoc_id == SCTP_CURRENT_ASSOC ||
- info.sinfo_assoc_id == SCTP_ALL_ASSOC) {
+ if (info->sinfo_assoc_id == SCTP_CURRENT_ASSOC ||
+ info->sinfo_assoc_id == SCTP_ALL_ASSOC) {
list_for_each_entry(asoc, &sp->ep->asocs, asocs) {
- asoc->default_stream = info.sinfo_stream;
- asoc->default_flags = info.sinfo_flags;
- asoc->default_ppid = info.sinfo_ppid;
- asoc->default_context = info.sinfo_context;
- asoc->default_timetolive = info.sinfo_timetolive;
+ asoc->default_stream = info->sinfo_stream;
+ asoc->default_flags = info->sinfo_flags;
+ asoc->default_ppid = info->sinfo_ppid;
+ asoc->default_context = info->sinfo_context;
+ asoc->default_timetolive = info->sinfo_timetolive;
}
}
@@ -2980,54 +2933,51 @@ static int sctp_setsockopt_default_send_param(struct sock *sk,
* (SCTP_DEFAULT_SNDINFO)
*/
static int sctp_setsockopt_default_sndinfo(struct sock *sk,
- char __user *optval,
+ struct sctp_sndinfo *info,
unsigned int optlen)
{
struct sctp_sock *sp = sctp_sk(sk);
struct sctp_association *asoc;
- struct sctp_sndinfo info;
- if (optlen != sizeof(info))
+ if (optlen != sizeof(*info))
return -EINVAL;
- if (copy_from_user(&info, optval, optlen))
- return -EFAULT;
- if (info.snd_flags &
+ if (info->snd_flags &
~(SCTP_UNORDERED | SCTP_ADDR_OVER |
SCTP_ABORT | SCTP_EOF))
return -EINVAL;
- asoc = sctp_id2assoc(sk, info.snd_assoc_id);
- if (!asoc && info.snd_assoc_id > SCTP_ALL_ASSOC &&
+ asoc = sctp_id2assoc(sk, info->snd_assoc_id);
+ if (!asoc && info->snd_assoc_id > SCTP_ALL_ASSOC &&
sctp_style(sk, UDP))
return -EINVAL;
if (asoc) {
- asoc->default_stream = info.snd_sid;
- asoc->default_flags = info.snd_flags;
- asoc->default_ppid = info.snd_ppid;
- asoc->default_context = info.snd_context;
+ asoc->default_stream = info->snd_sid;
+ asoc->default_flags = info->snd_flags;
+ asoc->default_ppid = info->snd_ppid;
+ asoc->default_context = info->snd_context;
return 0;
}
if (sctp_style(sk, TCP))
- info.snd_assoc_id = SCTP_FUTURE_ASSOC;
+ info->snd_assoc_id = SCTP_FUTURE_ASSOC;
- if (info.snd_assoc_id == SCTP_FUTURE_ASSOC ||
- info.snd_assoc_id == SCTP_ALL_ASSOC) {
- sp->default_stream = info.snd_sid;
- sp->default_flags = info.snd_flags;
- sp->default_ppid = info.snd_ppid;
- sp->default_context = info.snd_context;
+ if (info->snd_assoc_id == SCTP_FUTURE_ASSOC ||
+ info->snd_assoc_id == SCTP_ALL_ASSOC) {
+ sp->default_stream = info->snd_sid;
+ sp->default_flags = info->snd_flags;
+ sp->default_ppid = info->snd_ppid;
+ sp->default_context = info->snd_context;
}
- if (info.snd_assoc_id == SCTP_CURRENT_ASSOC ||
- info.snd_assoc_id == SCTP_ALL_ASSOC) {
+ if (info->snd_assoc_id == SCTP_CURRENT_ASSOC ||
+ info->snd_assoc_id == SCTP_ALL_ASSOC) {
list_for_each_entry(asoc, &sp->ep->asocs, asocs) {
- asoc->default_stream = info.snd_sid;
- asoc->default_flags = info.snd_flags;
- asoc->default_ppid = info.snd_ppid;
- asoc->default_context = info.snd_context;
+ asoc->default_stream = info->snd_sid;
+ asoc->default_flags = info->snd_flags;
+ asoc->default_ppid = info->snd_ppid;
+ asoc->default_context = info->snd_context;
}
}
@@ -3040,10 +2990,9 @@ static int sctp_setsockopt_default_sndinfo(struct sock *sk,
* the association primary. The enclosed address must be one of the
* association peer's addresses.
*/
-static int sctp_setsockopt_primary_addr(struct sock *sk, char __user *optval,
+static int sctp_setsockopt_primary_addr(struct sock *sk, struct sctp_prim *prim,
unsigned int optlen)
{
- struct sctp_prim prim;
struct sctp_transport *trans;
struct sctp_af *af;
int err;
@@ -3051,21 +3000,18 @@ static int sctp_setsockopt_primary_addr(struct sock *sk, char __user *optval,
if (optlen != sizeof(struct sctp_prim))
return -EINVAL;
- if (copy_from_user(&prim, optval, sizeof(struct sctp_prim)))
- return -EFAULT;
-
/* Allow security module to validate address but need address len. */
- af = sctp_get_af_specific(prim.ssp_addr.ss_family);
+ af = sctp_get_af_specific(prim->ssp_addr.ss_family);
if (!af)
return -EINVAL;
err = security_sctp_bind_connect(sk, SCTP_PRIMARY_ADDR,
- (struct sockaddr *)&prim.ssp_addr,
+ (struct sockaddr *)&prim->ssp_addr,
af->sockaddr_len);
if (err)
return err;
- trans = sctp_addr_id2transport(sk, &prim.ssp_addr, prim.ssp_assoc_id);
+ trans = sctp_addr_id2transport(sk, &prim->ssp_addr, prim->ssp_assoc_id);
if (!trans)
return -EINVAL;
@@ -3082,17 +3028,12 @@ static int sctp_setsockopt_primary_addr(struct sock *sk, char __user *optval,
* introduced, at the cost of more packets in the network. Expects an
* integer boolean flag.
*/
-static int sctp_setsockopt_nodelay(struct sock *sk, char __user *optval,
+static int sctp_setsockopt_nodelay(struct sock *sk, int *val,
unsigned int optlen)
{
- int val;
-
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *)optval))
- return -EFAULT;
-
- sctp_sk(sk)->nodelay = (val == 0) ? 0 : 1;
+ sctp_sk(sk)->nodelay = (*val == 0) ? 0 : 1;
return 0;
}
@@ -3108,9 +3049,10 @@ static int sctp_setsockopt_nodelay(struct sock *sk, char __user *optval,
* be changed.
*
*/
-static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigned int optlen)
+static int sctp_setsockopt_rtoinfo(struct sock *sk,
+ struct sctp_rtoinfo *rtoinfo,
+ unsigned int optlen)
{
- struct sctp_rtoinfo rtoinfo;
struct sctp_association *asoc;
unsigned long rto_min, rto_max;
struct sctp_sock *sp = sctp_sk(sk);
@@ -3118,18 +3060,15 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigne
if (optlen != sizeof (struct sctp_rtoinfo))
return -EINVAL;
- if (copy_from_user(&rtoinfo, optval, optlen))
- return -EFAULT;
-
- asoc = sctp_id2assoc(sk, rtoinfo.srto_assoc_id);
+ asoc = sctp_id2assoc(sk, rtoinfo->srto_assoc_id);
/* Set the values to the specific association */
- if (!asoc && rtoinfo.srto_assoc_id != SCTP_FUTURE_ASSOC &&
+ if (!asoc && rtoinfo->srto_assoc_id != SCTP_FUTURE_ASSOC &&
sctp_style(sk, UDP))
return -EINVAL;
- rto_max = rtoinfo.srto_max;
- rto_min = rtoinfo.srto_min;
+ rto_max = rtoinfo->srto_max;
+ rto_min = rtoinfo->srto_min;
if (rto_max)
rto_max = asoc ? msecs_to_jiffies(rto_max) : rto_max;
@@ -3145,17 +3084,17 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigne
return -EINVAL;
if (asoc) {
- if (rtoinfo.srto_initial != 0)
+ if (rtoinfo->srto_initial != 0)
asoc->rto_initial =
- msecs_to_jiffies(rtoinfo.srto_initial);
+ msecs_to_jiffies(rtoinfo->srto_initial);
asoc->rto_max = rto_max;
asoc->rto_min = rto_min;
} else {
/* If there is no association or the association-id = 0
* set the values to the endpoint.
*/
- if (rtoinfo.srto_initial != 0)
- sp->rtoinfo.srto_initial = rtoinfo.srto_initial;
+ if (rtoinfo->srto_initial != 0)
+ sp->rtoinfo.srto_initial = rtoinfo->srto_initial;
sp->rtoinfo.srto_max = rto_max;
sp->rtoinfo.srto_min = rto_min;
}
@@ -3174,26 +3113,25 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigne
* See [SCTP] for more information.
*
*/
-static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, unsigned int optlen)
+static int sctp_setsockopt_associnfo(struct sock *sk,
+ struct sctp_assocparams *assocparams,
+ unsigned int optlen)
{
- struct sctp_assocparams assocparams;
struct sctp_association *asoc;
if (optlen != sizeof(struct sctp_assocparams))
return -EINVAL;
- if (copy_from_user(&assocparams, optval, optlen))
- return -EFAULT;
- asoc = sctp_id2assoc(sk, assocparams.sasoc_assoc_id);
+ asoc = sctp_id2assoc(sk, assocparams->sasoc_assoc_id);
- if (!asoc && assocparams.sasoc_assoc_id != SCTP_FUTURE_ASSOC &&
+ if (!asoc && assocparams->sasoc_assoc_id != SCTP_FUTURE_ASSOC &&
sctp_style(sk, UDP))
return -EINVAL;
/* Set the values to the specific association */
if (asoc) {
- if (assocparams.sasoc_asocmaxrxt != 0) {
+ if (assocparams->sasoc_asocmaxrxt != 0) {
__u32 path_sum = 0;
int paths = 0;
struct sctp_transport *peer_addr;
@@ -3210,24 +3148,25 @@ static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, unsig
* then one path.
*/
if (paths > 1 &&
- assocparams.sasoc_asocmaxrxt > path_sum)
+ assocparams->sasoc_asocmaxrxt > path_sum)
return -EINVAL;
- asoc->max_retrans = assocparams.sasoc_asocmaxrxt;
+ asoc->max_retrans = assocparams->sasoc_asocmaxrxt;
}
- if (assocparams.sasoc_cookie_life != 0)
- asoc->cookie_life = ms_to_ktime(assocparams.sasoc_cookie_life);
+ if (assocparams->sasoc_cookie_life != 0)
+ asoc->cookie_life =
+ ms_to_ktime(assocparams->sasoc_cookie_life);
} else {
/* Set the values to the endpoint */
struct sctp_sock *sp = sctp_sk(sk);
- if (assocparams.sasoc_asocmaxrxt != 0)
+ if (assocparams->sasoc_asocmaxrxt != 0)
sp->assocparams.sasoc_asocmaxrxt =
- assocparams.sasoc_asocmaxrxt;
- if (assocparams.sasoc_cookie_life != 0)
+ assocparams->sasoc_asocmaxrxt;
+ if (assocparams->sasoc_cookie_life != 0)
sp->assocparams.sasoc_cookie_life =
- assocparams.sasoc_cookie_life;
+ assocparams->sasoc_cookie_life;
}
return 0;
}
@@ -3242,16 +3181,14 @@ static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, unsig
* addresses and a user will receive both PF_INET6 and PF_INET type
* addresses on the socket.
*/
-static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, unsigned int optlen)
+static int sctp_setsockopt_mappedv4(struct sock *sk, int *val,
+ unsigned int optlen)
{
- int val;
struct sctp_sock *sp = sctp_sk(sk);
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *)optval))
- return -EFAULT;
- if (val)
+ if (*val)
sp->v4mapped = 1;
else
sp->v4mapped = 0;
@@ -3286,11 +3223,13 @@ static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, unsign
* changed (effecting future associations only).
* assoc_value: This parameter specifies the maximum size in bytes.
*/
-static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int optlen)
+static int sctp_setsockopt_maxseg(struct sock *sk,
+ struct sctp_assoc_value *params,
+ unsigned int optlen)
{
struct sctp_sock *sp = sctp_sk(sk);
- struct sctp_assoc_value params;
struct sctp_association *asoc;
+ sctp_assoc_t assoc_id;
int val;
if (optlen == sizeof(int)) {
@@ -3299,19 +3238,17 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
"Use of int in maxseg socket option.\n"
"Use struct sctp_assoc_value instead\n",
current->comm, task_pid_nr(current));
- if (copy_from_user(&val, optval, optlen))
- return -EFAULT;
- params.assoc_id = SCTP_FUTURE_ASSOC;
+ assoc_id = SCTP_FUTURE_ASSOC;
+ val = *(int *)params;
} else if (optlen == sizeof(struct sctp_assoc_value)) {
- if (copy_from_user(&params, optval, optlen))
- return -EFAULT;
- val = params.assoc_value;
+ assoc_id = params->assoc_id;
+ val = params->assoc_value;
} else {
return -EINVAL;
}
- asoc = sctp_id2assoc(sk, params.assoc_id);
- if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+ asoc = sctp_id2assoc(sk, assoc_id);
+ if (!asoc && assoc_id != SCTP_FUTURE_ASSOC &&
sctp_style(sk, UDP))
return -EINVAL;
@@ -3346,12 +3283,12 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
* locally bound addresses. The following structure is used to make a
* set primary request:
*/
-static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optval,
+static int sctp_setsockopt_peer_primary_addr(struct sock *sk,
+ struct sctp_setpeerprim *prim,
unsigned int optlen)
{
struct sctp_sock *sp;
struct sctp_association *asoc = NULL;
- struct sctp_setpeerprim prim;
struct sctp_chunk *chunk;
struct sctp_af *af;
int err;
@@ -3364,10 +3301,7 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
if (optlen != sizeof(struct sctp_setpeerprim))
return -EINVAL;
- if (copy_from_user(&prim, optval, optlen))
- return -EFAULT;
-
- asoc = sctp_id2assoc(sk, prim.sspp_assoc_id);
+ asoc = sctp_id2assoc(sk, prim->sspp_assoc_id);
if (!asoc)
return -EINVAL;
@@ -3380,26 +3314,26 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
if (!sctp_state(asoc, ESTABLISHED))
return -ENOTCONN;
- af = sctp_get_af_specific(prim.sspp_addr.ss_family);
+ af = sctp_get_af_specific(prim->sspp_addr.ss_family);
if (!af)
return -EINVAL;
- if (!af->addr_valid((union sctp_addr *)&prim.sspp_addr, sp, NULL))
+ if (!af->addr_valid((union sctp_addr *)&prim->sspp_addr, sp, NULL))
return -EADDRNOTAVAIL;
- if (!sctp_assoc_lookup_laddr(asoc, (union sctp_addr *)&prim.sspp_addr))
+ if (!sctp_assoc_lookup_laddr(asoc, (union sctp_addr *)&prim->sspp_addr))
return -EADDRNOTAVAIL;
/* Allow security module to validate address. */
err = security_sctp_bind_connect(sk, SCTP_SET_PEER_PRIMARY_ADDR,
- (struct sockaddr *)&prim.sspp_addr,
+ (struct sockaddr *)&prim->sspp_addr,
af->sockaddr_len);
if (err)
return err;
/* Create an ASCONF chunk with SET_PRIMARY parameter */
chunk = sctp_make_asconf_set_prim(asoc,
- (union sctp_addr *)&prim.sspp_addr);
+ (union sctp_addr *)&prim->sspp_addr);
if (!chunk)
return -ENOMEM;
@@ -3410,17 +3344,14 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
return err;
}
-static int sctp_setsockopt_adaptation_layer(struct sock *sk, char __user *optval,
+static int sctp_setsockopt_adaptation_layer(struct sock *sk,
+ struct sctp_setadaptation *adapt,
unsigned int optlen)
{
- struct sctp_setadaptation adaptation;
-
if (optlen != sizeof(struct sctp_setadaptation))
return -EINVAL;
- if (copy_from_user(&adaptation, optval, optlen))
- return -EFAULT;
- sctp_sk(sk)->adaptation_ind = adaptation.ssb_adaptation_ind;
+ sctp_sk(sk)->adaptation_ind = adapt->ssb_adaptation_ind;
return 0;
}
@@ -3439,40 +3370,38 @@ static int sctp_setsockopt_adaptation_layer(struct sock *sk, char __user *optval
* received messages from the peer and does not effect the value that is
* saved with outbound messages.
*/
-static int sctp_setsockopt_context(struct sock *sk, char __user *optval,
+static int sctp_setsockopt_context(struct sock *sk,
+ struct sctp_assoc_value *params,
unsigned int optlen)
{
struct sctp_sock *sp = sctp_sk(sk);
- struct sctp_assoc_value params;
struct sctp_association *asoc;
if (optlen != sizeof(struct sctp_assoc_value))
return -EINVAL;
- if (copy_from_user(&params, optval, optlen))
- return -EFAULT;
- asoc = sctp_id2assoc(sk, params.assoc_id);
- if (!asoc && params.assoc_id > SCTP_ALL_ASSOC &&
+ asoc = sctp_id2assoc(sk, params->assoc_id);
+ if (!asoc && params->assoc_id > SCTP_ALL_ASSOC &&
sctp_style(sk, UDP))
return -EINVAL;
if (asoc) {
- asoc->default_rcv_context = params.assoc_value;
+ asoc->default_rcv_context = params->assoc_value;
return 0;
}
if (sctp_style(sk, TCP))
- params.assoc_id = SCTP_FUTURE_ASSOC;
+ params->assoc_id = SCTP_FUTURE_ASSOC;
- if (params.assoc_id == SCTP_FUTURE_ASSOC ||
- params.assoc_id == SCTP_ALL_ASSOC)
- sp->default_rcv_context = params.assoc_value;
+ if (params->assoc_id == SCTP_FUTURE_ASSOC ||
+ params->assoc_id == SCTP_ALL_ASSOC)
+ sp->default_rcv_context = params->assoc_value;
- if (params.assoc_id == SCTP_CURRENT_ASSOC ||
- params.assoc_id == SCTP_ALL_ASSOC)
+ if (params->assoc_id == SCTP_CURRENT_ASSOC ||
+ params->assoc_id == SCTP_ALL_ASSOC)
list_for_each_entry(asoc, &sp->ep->asocs, asocs)
- asoc->default_rcv_context = params.assoc_value;
+ asoc->default_rcv_context = params->assoc_value;
return 0;
}
@@ -3501,18 +3430,13 @@ static int sctp_setsockopt_context(struct sock *sk, char __user *optval,
* application using the one to many model may become confused and act
* incorrectly.
*/
-static int sctp_setsockopt_fragment_interleave(struct sock *sk,
- char __user *optval,
+static int sctp_setsockopt_fragment_interleave(struct sock *sk, int *val,
unsigned int optlen)
{
- int val;
-
if (optlen != sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *)optval))
- return -EFAULT;
- sctp_sk(sk)->frag_interleave = !!val;
+ sctp_sk(sk)->frag_interleave = !!*val;
if (!sctp_sk(sk)->frag_interleave)
sctp_sk(sk)->ep->intl_enable = 0;
@@ -3537,24 +3461,19 @@ static int sctp_setsockopt_fragment_interleave(struct sock *sk,
* call as long as the user provided buffer is large enough to hold the
* message.
*/
-static int sctp_setsockopt_partial_delivery_point(struct sock *sk,
- char __user *optval,
+static int sctp_setsockopt_partial_delivery_point(struct sock *sk, u32 *val,
unsigned int optlen)
{
- u32 val;
-
if (optlen != sizeof(u32))
return -EINVAL;
- if (get_user(val, (int __user *)optval))
- return -EFAULT;
/* Note: We double the receive buffer from what the user sets
* it to be, also initial rwnd is based on rcvbuf/2.
*/
- if (val > (sk->sk_rcvbuf >> 1))
+ if (*val > (sk->sk_rcvbuf >> 1))
return -EINVAL;
- sctp_sk(sk)->pd_point = val;
+ sctp_sk(sk)->pd_point = *val;
return 0; /* is this the right error code? */
}
@@ -3571,12 +3490,13 @@ static int sctp_setsockopt_partial_delivery_point(struct sock *sk,
* future associations inheriting the socket value.
*/
static int sctp_setsockopt_maxburst(struct sock *sk,
- char __user *optval,
+ struct sctp_assoc_value *params,
unsigned int optlen)
{
struct sctp_sock *sp = sctp_sk(sk);
- struct sctp_assoc_value params;
struct sctp_association *asoc;
+ sctp_assoc_t assoc_id;
+ u32 assoc_value;
if (optlen == sizeof(int)) {
pr_warn_ratelimited(DEPRECATED
@@ -3584,37 +3504,33 @@ static int sctp_setsockopt_maxburst(struct sock *sk,
"Use of int in max_burst socket option deprecated.\n"
"Use struct sctp_assoc_value instead\n",
current->comm, task_pid_nr(current));
- if (copy_from_user(&params.assoc_value, optval, optlen))
- return -EFAULT;
- params.assoc_id = SCTP_FUTURE_ASSOC;
+ assoc_id = SCTP_FUTURE_ASSOC;
+ assoc_value = *((int *)params);
} else if (optlen == sizeof(struct sctp_assoc_value)) {
- if (copy_from_user(&params, optval, optlen))
- return -EFAULT;
+ assoc_id = params->assoc_id;
+ assoc_value = params->assoc_value;
} else
return -EINVAL;
- asoc = sctp_id2assoc(sk, params.assoc_id);
- if (!asoc && params.assoc_id > SCTP_ALL_ASSOC &&
- sctp_style(sk, UDP))
+ asoc = sctp_id2assoc(sk, assoc_id);
+ if (!asoc && assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP))
return -EINVAL;
if (asoc) {
- asoc->max_burst = params.assoc_value;
+ asoc->max_burst = assoc_value;
return 0;
}
if (sctp_style(sk, TCP))
- params.assoc_id = SCTP_FUTURE_ASSOC;
+ assoc_id = SCTP_FUTURE_ASSOC;
- if (params.assoc_id == SCTP_FUTURE_ASSOC ||
- params.assoc_id == SCTP_ALL_ASSOC)
- sp->max_burst = params.assoc_value;
+ if (assoc_id == SCTP_FUTURE_ASSOC || assoc_id == SCTP_ALL_ASSOC)
+ sp->max_burst = assoc_value;
- if (params.assoc_id == SCTP_CURRENT_ASSOC ||
- params.assoc_id == SCTP_ALL_ASSOC)
+ if (assoc_id == SCTP_CURRENT_ASSOC || assoc_id == SCTP_ALL_ASSOC)
list_for_each_entry(asoc, &sp->ep->asocs, asocs)
- asoc->max_burst = params.assoc_value;
+ asoc->max_burst = assoc_value;
return 0;
}
@@ -3627,21 +3543,18 @@ static int sctp_setsockopt_maxburst(struct sock *sk,
* will only effect future associations on the socket.
*/
static int sctp_setsockopt_auth_chunk(struct sock *sk,
- char __user *optval,
+ struct sctp_authchunk *val,
unsigned int optlen)
{
struct sctp_endpoint *ep = sctp_sk(sk)->ep;
- struct sctp_authchunk val;
if (!ep->auth_enable)
return -EACCES;
if (optlen != sizeof(struct sctp_authchunk))
return -EINVAL;
- if (copy_from_user(&val, optval, optlen))
- return -EFAULT;
- switch (val.sauth_chunk) {
+ switch (val->sauth_chunk) {
case SCTP_CID_INIT:
case SCTP_CID_INIT_ACK:
case SCTP_CID_SHUTDOWN_COMPLETE:
@@ -3650,7 +3563,7 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk,
}
/* add this chunk id to the endpoint */
- return sctp_auth_ep_add_chunkid(ep, val.sauth_chunk);
+ return sctp_auth_ep_add_chunkid(ep, val->sauth_chunk);
}
/*
@@ -3660,13 +3573,11 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk,
* endpoint requires the peer to use.
*/
static int sctp_setsockopt_hmac_ident(struct sock *sk,
- char __user *optval,
+ struct sctp_hmacalgo *hmacs,
unsigned int optlen)
{
struct sctp_endpoint *ep = sctp_sk(sk)->ep;
- struct sctp_hmacalgo *hmacs;
u32 idents;
- int err;
if (!ep->auth_enable)
return -EACCES;
@@ -3676,21 +3587,12 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk,
optlen = min_t(unsigned int, optlen, sizeof(struct sctp_hmacalgo) +
SCTP_AUTH_NUM_HMACS * sizeof(u16));
- hmacs = memdup_user(optval, optlen);
- if (IS_ERR(hmacs))
- return PTR_ERR(hmacs);
-
idents = hmacs->shmac_num_idents;
if (idents == 0 || idents > SCTP_AUTH_NUM_HMACS ||
- (idents * sizeof(u16)) > (optlen - sizeof(struct sctp_hmacalgo))) {
- err = -EINVAL;
- goto out;
- }
+ (idents * sizeof(u16)) > (optlen - sizeof(struct sctp_hmacalgo)))
+ return -EINVAL;
- err = sctp_auth_ep_set_hmacs(ep, hmacs);
-out:
- kfree(hmacs);
- return err;
+ return sctp_auth_ep_set_hmacs(ep, hmacs);
}
/*
@@ -3700,11 +3602,10 @@ out:
* association shared key.
*/
static int sctp_setsockopt_auth_key(struct sock *sk,
- char __user *optval,
+ struct sctp_authkey *authkey,
unsigned int optlen)
{
struct sctp_endpoint *ep = sctp_sk(sk)->ep;
- struct sctp_authkey *authkey;
struct sctp_association *asoc;
int ret = -EINVAL;
@@ -3715,10 +3616,6 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
*/
optlen = min_t(unsigned int, optlen, USHRT_MAX + sizeof(*authkey));
- authkey = memdup_user(optval, optlen);
- if (IS_ERR(authkey))
- return PTR_ERR(authkey);
-
if (authkey->sca_keylength > optlen - sizeof(*authkey))
goto out;
@@ -3755,7 +3652,7 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
}
out:
- kzfree(authkey);
+ memzero_explicit(authkey, optlen);
return ret;
}
@@ -3766,42 +3663,39 @@ out:
* the association shared key.
*/
static int sctp_setsockopt_active_key(struct sock *sk,
- char __user *optval,
+ struct sctp_authkeyid *val,
unsigned int optlen)
{
struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_association *asoc;
- struct sctp_authkeyid val;
int ret = 0;
if (optlen != sizeof(struct sctp_authkeyid))
return -EINVAL;
- if (copy_from_user(&val, optval, optlen))
- return -EFAULT;
- asoc = sctp_id2assoc(sk, val.scact_assoc_id);
- if (!asoc && val.scact_assoc_id > SCTP_ALL_ASSOC &&
+ asoc = sctp_id2assoc(sk, val->scact_assoc_id);
+ if (!asoc && val->scact_assoc_id > SCTP_ALL_ASSOC &&
sctp_style(sk, UDP))
return -EINVAL;
if (asoc)
- return sctp_auth_set_active_key(ep, asoc, val.scact_keynumber);
+ return sctp_auth_set_active_key(ep, asoc, val->scact_keynumber);
if (sctp_style(sk, TCP))
- val.scact_assoc_id = SCTP_FUTURE_ASSOC;
+ val->scact_assoc_id = SCTP_FUTURE_ASSOC;
- if (val.scact_assoc_id == SCTP_FUTURE_ASSOC ||
- val.scact_assoc_id == SCTP_ALL_ASSOC) {
- ret = sctp_auth_set_active_key(ep, asoc, val.scact_keynumber);
+ if (val->scact_assoc_id == SCTP_FUTURE_ASSOC ||
+ val->scact_assoc_id == SCTP_ALL_ASSOC) {
+ ret = sctp_auth_set_active_key(ep, asoc, val->scact_keynumber);
if (ret)
return ret;
}
- if (val.scact_assoc_id == SCTP_CURRENT_ASSOC ||
- val.scact_assoc_id == SCTP_ALL_ASSOC) {
+ if (val->scact_assoc_id == SCTP_CURRENT_ASSOC ||
+ val->scact_assoc_id == SCTP_ALL_ASSOC) {
list_for_each_entry(asoc, &ep->asocs, asocs) {
int res = sctp_auth_set_active_key(ep, asoc,
- val.scact_keynumber);
+ val->scact_keynumber);
if (res && !ret)
ret = res;
@@ -3817,42 +3711,39 @@ static int sctp_setsockopt_active_key(struct sock *sk,
* This set option will delete a shared secret key from use.
*/
static int sctp_setsockopt_del_key(struct sock *sk,
- char __user *optval,
+ struct sctp_authkeyid *val,
unsigned int optlen)
{
struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_association *asoc;
- struct sctp_authkeyid val;
int ret = 0;
if (optlen != sizeof(struct sctp_authkeyid))
return -EINVAL;
- if (copy_from_user(&val, optval, optlen))
- return -EFAULT;
- asoc = sctp_id2assoc(sk, val.scact_assoc_id);
- if (!asoc && val.scact_assoc_id > SCTP_ALL_ASSOC &&
+ asoc = sctp_id2assoc(sk, val->scact_assoc_id);
+ if (!asoc && val->scact_assoc_id > SCTP_ALL_ASSOC &&
sctp_style(sk, UDP))
return -EINVAL;
if (asoc)
- return sctp_auth_del_key_id(ep, asoc, val.scact_keynumber);
+ return sctp_auth_del_key_id(ep, asoc, val->scact_keynumber);
if (sctp_style(sk, TCP))
- val.scact_assoc_id = SCTP_FUTURE_ASSOC;
+ val->scact_assoc_id = SCTP_FUTURE_ASSOC;
- if (val.scact_assoc_id == SCTP_FUTURE_ASSOC ||
- val.scact_assoc_id == SCTP_ALL_ASSOC) {
- ret = sctp_auth_del_key_id(ep, asoc, val.scact_keynumber);
+ if (val->scact_assoc_id == SCTP_FUTURE_ASSOC ||
+ val->scact_assoc_id == SCTP_ALL_ASSOC) {
+ ret = sctp_auth_del_key_id(ep, asoc, val->scact_keynumber);
if (ret)
return ret;
}
- if (val.scact_assoc_id == SCTP_CURRENT_ASSOC ||
- val.scact_assoc_id == SCTP_ALL_ASSOC) {
+ if (val->scact_assoc_id == SCTP_CURRENT_ASSOC ||
+ val->scact_assoc_id == SCTP_ALL_ASSOC) {
list_for_each_entry(asoc, &ep->asocs, asocs) {
int res = sctp_auth_del_key_id(ep, asoc,
- val.scact_keynumber);
+ val->scact_keynumber);
if (res && !ret)
ret = res;
@@ -3867,42 +3758,40 @@ static int sctp_setsockopt_del_key(struct sock *sk,
*
* This set option will deactivate a shared secret key.
*/
-static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval,
+static int sctp_setsockopt_deactivate_key(struct sock *sk,
+ struct sctp_authkeyid *val,
unsigned int optlen)
{
struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_association *asoc;
- struct sctp_authkeyid val;
int ret = 0;
if (optlen != sizeof(struct sctp_authkeyid))
return -EINVAL;
- if (copy_from_user(&val, optval, optlen))
- return -EFAULT;
- asoc = sctp_id2assoc(sk, val.scact_assoc_id);
- if (!asoc && val.scact_assoc_id > SCTP_ALL_ASSOC &&
+ asoc = sctp_id2assoc(sk, val->scact_assoc_id);
+ if (!asoc && val->scact_assoc_id > SCTP_ALL_ASSOC &&
sctp_style(sk, UDP))
return -EINVAL;
if (asoc)
- return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber);
+ return sctp_auth_deact_key_id(ep, asoc, val->scact_keynumber);
if (sctp_style(sk, TCP))
- val.scact_assoc_id = SCTP_FUTURE_ASSOC;
+ val->scact_assoc_id = SCTP_FUTURE_ASSOC;
- if (val.scact_assoc_id == SCTP_FUTURE_ASSOC ||
- val.scact_assoc_id == SCTP_ALL_ASSOC) {
- ret = sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber);
+ if (val->scact_assoc_id == SCTP_FUTURE_ASSOC ||
+ val->scact_assoc_id == SCTP_ALL_ASSOC) {
+ ret = sctp_auth_deact_key_id(ep, asoc, val->scact_keynumber);
if (ret)
return ret;
}
- if (val.scact_assoc_id == SCTP_CURRENT_ASSOC ||
- val.scact_assoc_id == SCTP_ALL_ASSOC) {
+ if (val->scact_assoc_id == SCTP_CURRENT_ASSOC ||
+ val->scact_assoc_id == SCTP_ALL_ASSOC) {
list_for_each_entry(asoc, &ep->asocs, asocs) {
int res = sctp_auth_deact_key_id(ep, asoc,
- val.scact_keynumber);
+ val->scact_keynumber);
if (res && !ret)
ret = res;
@@ -3926,26 +3815,23 @@ static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval,
* Note. In this implementation, socket operation overrides default parameter
* being set by sysctl as well as FreeBSD implementation
*/
-static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval,
+static int sctp_setsockopt_auto_asconf(struct sock *sk, int *val,
unsigned int optlen)
{
- int val;
struct sctp_sock *sp = sctp_sk(sk);
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *)optval))
- return -EFAULT;
- if (!sctp_is_ep_boundall(sk) && val)
+ if (!sctp_is_ep_boundall(sk) && *val)
return -EINVAL;
- if ((val && sp->do_auto_asconf) || (!val && !sp->do_auto_asconf))
+ if ((*val && sp->do_auto_asconf) || (!*val && !sp->do_auto_asconf))
return 0;
spin_lock_bh(&sock_net(sk)->sctp.addr_wq_lock);
- if (val == 0 && sp->do_auto_asconf) {
+ if (*val == 0 && sp->do_auto_asconf) {
list_del(&sp->auto_asconf_list);
sp->do_auto_asconf = 0;
- } else if (val && !sp->do_auto_asconf) {
+ } else if (*val && !sp->do_auto_asconf) {
list_add_tail(&sp->auto_asconf_list,
&sock_net(sk)->sctp.auto_asconf_splist);
sp->do_auto_asconf = 1;
@@ -3962,176 +3848,154 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval,
* http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt
*/
static int sctp_setsockopt_paddr_thresholds(struct sock *sk,
- char __user *optval,
+ struct sctp_paddrthlds_v2 *val,
unsigned int optlen, bool v2)
{
- struct sctp_paddrthlds_v2 val;
struct sctp_transport *trans;
struct sctp_association *asoc;
int len;
- len = v2 ? sizeof(val) : sizeof(struct sctp_paddrthlds);
+ len = v2 ? sizeof(*val) : sizeof(struct sctp_paddrthlds);
if (optlen < len)
return -EINVAL;
- if (copy_from_user(&val, optval, len))
- return -EFAULT;
- if (v2 && val.spt_pathpfthld > val.spt_pathcpthld)
+ if (v2 && val->spt_pathpfthld > val->spt_pathcpthld)
return -EINVAL;
- if (!sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) {
- trans = sctp_addr_id2transport(sk, &val.spt_address,
- val.spt_assoc_id);
+ if (!sctp_is_any(sk, (const union sctp_addr *)&val->spt_address)) {
+ trans = sctp_addr_id2transport(sk, &val->spt_address,
+ val->spt_assoc_id);
if (!trans)
return -ENOENT;
- if (val.spt_pathmaxrxt)
- trans->pathmaxrxt = val.spt_pathmaxrxt;
+ if (val->spt_pathmaxrxt)
+ trans->pathmaxrxt = val->spt_pathmaxrxt;
if (v2)
- trans->ps_retrans = val.spt_pathcpthld;
- trans->pf_retrans = val.spt_pathpfthld;
+ trans->ps_retrans = val->spt_pathcpthld;
+ trans->pf_retrans = val->spt_pathpfthld;
return 0;
}
- asoc = sctp_id2assoc(sk, val.spt_assoc_id);
- if (!asoc && val.spt_assoc_id != SCTP_FUTURE_ASSOC &&
+ asoc = sctp_id2assoc(sk, val->spt_assoc_id);
+ if (!asoc && val->spt_assoc_id != SCTP_FUTURE_ASSOC &&
sctp_style(sk, UDP))
return -EINVAL;
if (asoc) {
list_for_each_entry(trans, &asoc->peer.transport_addr_list,
transports) {
- if (val.spt_pathmaxrxt)
- trans->pathmaxrxt = val.spt_pathmaxrxt;
+ if (val->spt_pathmaxrxt)
+ trans->pathmaxrxt = val->spt_pathmaxrxt;
if (v2)
- trans->ps_retrans = val.spt_pathcpthld;
- trans->pf_retrans = val.spt_pathpfthld;
+ trans->ps_retrans = val->spt_pathcpthld;
+ trans->pf_retrans = val->spt_pathpfthld;
}
- if (val.spt_pathmaxrxt)
- asoc->pathmaxrxt = val.spt_pathmaxrxt;
+ if (val->spt_pathmaxrxt)
+ asoc->pathmaxrxt = val->spt_pathmaxrxt;
if (v2)
- asoc->ps_retrans = val.spt_pathcpthld;
- asoc->pf_retrans = val.spt_pathpfthld;
+ asoc->ps_retrans = val->spt_pathcpthld;
+ asoc->pf_retrans = val->spt_pathpfthld;
} else {
struct sctp_sock *sp = sctp_sk(sk);
- if (val.spt_pathmaxrxt)
- sp->pathmaxrxt = val.spt_pathmaxrxt;
+ if (val->spt_pathmaxrxt)
+ sp->pathmaxrxt = val->spt_pathmaxrxt;
if (v2)
- sp->ps_retrans = val.spt_pathcpthld;
- sp->pf_retrans = val.spt_pathpfthld;
+ sp->ps_retrans = val->spt_pathcpthld;
+ sp->pf_retrans = val->spt_pathpfthld;
}
return 0;
}
-static int sctp_setsockopt_recvrcvinfo(struct sock *sk,
- char __user *optval,
+static int sctp_setsockopt_recvrcvinfo(struct sock *sk, int *val,
unsigned int optlen)
{
- int val;
-
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *) optval))
- return -EFAULT;
- sctp_sk(sk)->recvrcvinfo = (val == 0) ? 0 : 1;
+ sctp_sk(sk)->recvrcvinfo = (*val == 0) ? 0 : 1;
return 0;
}
-static int sctp_setsockopt_recvnxtinfo(struct sock *sk,
- char __user *optval,
+static int sctp_setsockopt_recvnxtinfo(struct sock *sk, int *val,
unsigned int optlen)
{
- int val;
-
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *) optval))
- return -EFAULT;
- sctp_sk(sk)->recvnxtinfo = (val == 0) ? 0 : 1;
+ sctp_sk(sk)->recvnxtinfo = (*val == 0) ? 0 : 1;
return 0;
}
static int sctp_setsockopt_pr_supported(struct sock *sk,
- char __user *optval,
+ struct sctp_assoc_value *params,
unsigned int optlen)
{
- struct sctp_assoc_value params;
struct sctp_association *asoc;
- if (optlen != sizeof(params))
+ if (optlen != sizeof(*params))
return -EINVAL;
- if (copy_from_user(&params, optval, optlen))
- return -EFAULT;
-
- asoc = sctp_id2assoc(sk, params.assoc_id);
- if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+ asoc = sctp_id2assoc(sk, params->assoc_id);
+ if (!asoc && params->assoc_id != SCTP_FUTURE_ASSOC &&
sctp_style(sk, UDP))
return -EINVAL;
- sctp_sk(sk)->ep->prsctp_enable = !!params.assoc_value;
+ sctp_sk(sk)->ep->prsctp_enable = !!params->assoc_value;
return 0;
}
static int sctp_setsockopt_default_prinfo(struct sock *sk,
- char __user *optval,
+ struct sctp_default_prinfo *info,
unsigned int optlen)
{
struct sctp_sock *sp = sctp_sk(sk);
- struct sctp_default_prinfo info;
struct sctp_association *asoc;
int retval = -EINVAL;
- if (optlen != sizeof(info))
+ if (optlen != sizeof(*info))
goto out;
- if (copy_from_user(&info, optval, sizeof(info))) {
- retval = -EFAULT;
+ if (info->pr_policy & ~SCTP_PR_SCTP_MASK)
goto out;
- }
- if (info.pr_policy & ~SCTP_PR_SCTP_MASK)
- goto out;
+ if (info->pr_policy == SCTP_PR_SCTP_NONE)
+ info->pr_value = 0;
- if (info.pr_policy == SCTP_PR_SCTP_NONE)
- info.pr_value = 0;
-
- asoc = sctp_id2assoc(sk, info.pr_assoc_id);
- if (!asoc && info.pr_assoc_id > SCTP_ALL_ASSOC &&
+ asoc = sctp_id2assoc(sk, info->pr_assoc_id);
+ if (!asoc && info->pr_assoc_id > SCTP_ALL_ASSOC &&
sctp_style(sk, UDP))
goto out;
retval = 0;
if (asoc) {
- SCTP_PR_SET_POLICY(asoc->default_flags, info.pr_policy);
- asoc->default_timetolive = info.pr_value;
+ SCTP_PR_SET_POLICY(asoc->default_flags, info->pr_policy);
+ asoc->default_timetolive = info->pr_value;
goto out;
}
if (sctp_style(sk, TCP))
- info.pr_assoc_id = SCTP_FUTURE_ASSOC;
+ info->pr_assoc_id = SCTP_FUTURE_ASSOC;
- if (info.pr_assoc_id == SCTP_FUTURE_ASSOC ||
- info.pr_assoc_id == SCTP_ALL_ASSOC) {
- SCTP_PR_SET_POLICY(sp->default_flags, info.pr_policy);
- sp->default_timetolive = info.pr_value;
+ if (info->pr_assoc_id == SCTP_FUTURE_ASSOC ||
+ info->pr_assoc_id == SCTP_ALL_ASSOC) {
+ SCTP_PR_SET_POLICY(sp->default_flags, info->pr_policy);
+ sp->default_timetolive = info->pr_value;
}
- if (info.pr_assoc_id == SCTP_CURRENT_ASSOC ||
- info.pr_assoc_id == SCTP_ALL_ASSOC) {
+ if (info->pr_assoc_id == SCTP_CURRENT_ASSOC ||
+ info->pr_assoc_id == SCTP_ALL_ASSOC) {
list_for_each_entry(asoc, &sp->ep->asocs, asocs) {
- SCTP_PR_SET_POLICY(asoc->default_flags, info.pr_policy);
- asoc->default_timetolive = info.pr_value;
+ SCTP_PR_SET_POLICY(asoc->default_flags,
+ info->pr_policy);
+ asoc->default_timetolive = info->pr_value;
}
}
@@ -4140,27 +4004,21 @@ out:
}
static int sctp_setsockopt_reconfig_supported(struct sock *sk,
- char __user *optval,
+ struct sctp_assoc_value *params,
unsigned int optlen)
{
- struct sctp_assoc_value params;
struct sctp_association *asoc;
int retval = -EINVAL;
- if (optlen != sizeof(params))
- goto out;
-
- if (copy_from_user(&params, optval, optlen)) {
- retval = -EFAULT;
+ if (optlen != sizeof(*params))
goto out;
- }
- asoc = sctp_id2assoc(sk, params.assoc_id);
- if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+ asoc = sctp_id2assoc(sk, params->assoc_id);
+ if (!asoc && params->assoc_id != SCTP_FUTURE_ASSOC &&
sctp_style(sk, UDP))
goto out;
- sctp_sk(sk)->ep->reconf_enable = !!params.assoc_value;
+ sctp_sk(sk)->ep->reconf_enable = !!params->assoc_value;
retval = 0;
@@ -4169,60 +4027,52 @@ out:
}
static int sctp_setsockopt_enable_strreset(struct sock *sk,
- char __user *optval,
+ struct sctp_assoc_value *params,
unsigned int optlen)
{
struct sctp_endpoint *ep = sctp_sk(sk)->ep;
- struct sctp_assoc_value params;
struct sctp_association *asoc;
int retval = -EINVAL;
- if (optlen != sizeof(params))
+ if (optlen != sizeof(*params))
goto out;
- if (copy_from_user(&params, optval, optlen)) {
- retval = -EFAULT;
- goto out;
- }
-
- if (params.assoc_value & (~SCTP_ENABLE_STRRESET_MASK))
+ if (params->assoc_value & (~SCTP_ENABLE_STRRESET_MASK))
goto out;
- asoc = sctp_id2assoc(sk, params.assoc_id);
- if (!asoc && params.assoc_id > SCTP_ALL_ASSOC &&
+ asoc = sctp_id2assoc(sk, params->assoc_id);
+ if (!asoc && params->assoc_id > SCTP_ALL_ASSOC &&
sctp_style(sk, UDP))
goto out;
retval = 0;
if (asoc) {
- asoc->strreset_enable = params.assoc_value;
+ asoc->strreset_enable = params->assoc_value;
goto out;
}
if (sctp_style(sk, TCP))
- params.assoc_id = SCTP_FUTURE_ASSOC;
+ params->assoc_id = SCTP_FUTURE_ASSOC;
- if (params.assoc_id == SCTP_FUTURE_ASSOC ||
- params.assoc_id == SCTP_ALL_ASSOC)
- ep->strreset_enable = params.assoc_value;
+ if (params->assoc_id == SCTP_FUTURE_ASSOC ||
+ params->assoc_id == SCTP_ALL_ASSOC)
+ ep->strreset_enable = params->assoc_value;
- if (params.assoc_id == SCTP_CURRENT_ASSOC ||
- params.assoc_id == SCTP_ALL_ASSOC)
+ if (params->assoc_id == SCTP_CURRENT_ASSOC ||
+ params->assoc_id == SCTP_ALL_ASSOC)
list_for_each_entry(asoc, &ep->asocs, asocs)
- asoc->strreset_enable = params.assoc_value;
+ asoc->strreset_enable = params->assoc_value;
out:
return retval;
}
static int sctp_setsockopt_reset_streams(struct sock *sk,
- char __user *optval,
+ struct sctp_reset_streams *params,
unsigned int optlen)
{
- struct sctp_reset_streams *params;
struct sctp_association *asoc;
- int retval = -EINVAL;
if (optlen < sizeof(*params))
return -EINVAL;
@@ -4230,116 +4080,82 @@ static int sctp_setsockopt_reset_streams(struct sock *sk,
optlen = min_t(unsigned int, optlen, USHRT_MAX +
sizeof(__u16) * sizeof(*params));
- params = memdup_user(optval, optlen);
- if (IS_ERR(params))
- return PTR_ERR(params);
-
if (params->srs_number_streams * sizeof(__u16) >
optlen - sizeof(*params))
- goto out;
+ return -EINVAL;
asoc = sctp_id2assoc(sk, params->srs_assoc_id);
if (!asoc)
- goto out;
-
- retval = sctp_send_reset_streams(asoc, params);
+ return -EINVAL;
-out:
- kfree(params);
- return retval;
+ return sctp_send_reset_streams(asoc, params);
}
-static int sctp_setsockopt_reset_assoc(struct sock *sk,
- char __user *optval,
+static int sctp_setsockopt_reset_assoc(struct sock *sk, sctp_assoc_t *associd,
unsigned int optlen)
{
struct sctp_association *asoc;
- sctp_assoc_t associd;
- int retval = -EINVAL;
- if (optlen != sizeof(associd))
- goto out;
-
- if (copy_from_user(&associd, optval, optlen)) {
- retval = -EFAULT;
- goto out;
- }
+ if (optlen != sizeof(*associd))
+ return -EINVAL;
- asoc = sctp_id2assoc(sk, associd);
+ asoc = sctp_id2assoc(sk, *associd);
if (!asoc)
- goto out;
-
- retval = sctp_send_reset_assoc(asoc);
+ return -EINVAL;
-out:
- return retval;
+ return sctp_send_reset_assoc(asoc);
}
static int sctp_setsockopt_add_streams(struct sock *sk,
- char __user *optval,
+ struct sctp_add_streams *params,
unsigned int optlen)
{
struct sctp_association *asoc;
- struct sctp_add_streams params;
- int retval = -EINVAL;
- if (optlen != sizeof(params))
- goto out;
-
- if (copy_from_user(&params, optval, optlen)) {
- retval = -EFAULT;
- goto out;
- }
+ if (optlen != sizeof(*params))
+ return -EINVAL;
- asoc = sctp_id2assoc(sk, params.sas_assoc_id);
+ asoc = sctp_id2assoc(sk, params->sas_assoc_id);
if (!asoc)
- goto out;
-
- retval = sctp_send_add_streams(asoc, &params);
+ return -EINVAL;
-out:
- return retval;
+ return sctp_send_add_streams(asoc, params);
}
static int sctp_setsockopt_scheduler(struct sock *sk,
- char __user *optval,
+ struct sctp_assoc_value *params,
unsigned int optlen)
{
struct sctp_sock *sp = sctp_sk(sk);
struct sctp_association *asoc;
- struct sctp_assoc_value params;
int retval = 0;
- if (optlen < sizeof(params))
+ if (optlen < sizeof(*params))
return -EINVAL;
- optlen = sizeof(params);
- if (copy_from_user(&params, optval, optlen))
- return -EFAULT;
-
- if (params.assoc_value > SCTP_SS_MAX)
+ if (params->assoc_value > SCTP_SS_MAX)
return -EINVAL;
- asoc = sctp_id2assoc(sk, params.assoc_id);
- if (!asoc && params.assoc_id > SCTP_ALL_ASSOC &&
+ asoc = sctp_id2assoc(sk, params->assoc_id);
+ if (!asoc && params->assoc_id > SCTP_ALL_ASSOC &&
sctp_style(sk, UDP))
return -EINVAL;
if (asoc)
- return sctp_sched_set_sched(asoc, params.assoc_value);
+ return sctp_sched_set_sched(asoc, params->assoc_value);
if (sctp_style(sk, TCP))
- params.assoc_id = SCTP_FUTURE_ASSOC;
+ params->assoc_id = SCTP_FUTURE_ASSOC;
- if (params.assoc_id == SCTP_FUTURE_ASSOC ||
- params.assoc_id == SCTP_ALL_ASSOC)
- sp->default_ss = params.assoc_value;
+ if (params->assoc_id == SCTP_FUTURE_ASSOC ||
+ params->assoc_id == SCTP_ALL_ASSOC)
+ sp->default_ss = params->assoc_value;
- if (params.assoc_id == SCTP_CURRENT_ASSOC ||
- params.assoc_id == SCTP_ALL_ASSOC) {
+ if (params->assoc_id == SCTP_CURRENT_ASSOC ||
+ params->assoc_id == SCTP_ALL_ASSOC) {
list_for_each_entry(asoc, &sp->ep->asocs, asocs) {
int ret = sctp_sched_set_sched(asoc,
- params.assoc_value);
+ params->assoc_value);
if (ret && !retval)
retval = ret;
@@ -4350,38 +4166,32 @@ static int sctp_setsockopt_scheduler(struct sock *sk,
}
static int sctp_setsockopt_scheduler_value(struct sock *sk,
- char __user *optval,
+ struct sctp_stream_value *params,
unsigned int optlen)
{
- struct sctp_stream_value params;
struct sctp_association *asoc;
int retval = -EINVAL;
- if (optlen < sizeof(params))
- goto out;
-
- optlen = sizeof(params);
- if (copy_from_user(&params, optval, optlen)) {
- retval = -EFAULT;
+ if (optlen < sizeof(*params))
goto out;
- }
- asoc = sctp_id2assoc(sk, params.assoc_id);
- if (!asoc && params.assoc_id != SCTP_CURRENT_ASSOC &&
+ asoc = sctp_id2assoc(sk, params->assoc_id);
+ if (!asoc && params->assoc_id != SCTP_CURRENT_ASSOC &&
sctp_style(sk, UDP))
goto out;
if (asoc) {
- retval = sctp_sched_set_value(asoc, params.stream_id,
- params.stream_value, GFP_KERNEL);
+ retval = sctp_sched_set_value(asoc, params->stream_id,
+ params->stream_value, GFP_KERNEL);
goto out;
}
retval = 0;
list_for_each_entry(asoc, &sctp_sk(sk)->ep->asocs, asocs) {
- int ret = sctp_sched_set_value(asoc, params.stream_id,
- params.stream_value, GFP_KERNEL);
+ int ret = sctp_sched_set_value(asoc, params->stream_id,
+ params->stream_value,
+ GFP_KERNEL);
if (ret && !retval) /* try to return the 1st error. */
retval = ret;
}
@@ -4391,46 +4201,30 @@ out:
}
static int sctp_setsockopt_interleaving_supported(struct sock *sk,
- char __user *optval,
+ struct sctp_assoc_value *p,
unsigned int optlen)
{
struct sctp_sock *sp = sctp_sk(sk);
- struct sctp_assoc_value params;
struct sctp_association *asoc;
- int retval = -EINVAL;
- if (optlen < sizeof(params))
- goto out;
-
- optlen = sizeof(params);
- if (copy_from_user(&params, optval, optlen)) {
- retval = -EFAULT;
- goto out;
- }
+ if (optlen < sizeof(*p))
+ return -EINVAL;
- asoc = sctp_id2assoc(sk, params.assoc_id);
- if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
- sctp_style(sk, UDP))
- goto out;
+ asoc = sctp_id2assoc(sk, p->assoc_id);
+ if (!asoc && p->assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP))
+ return -EINVAL;
if (!sock_net(sk)->sctp.intl_enable || !sp->frag_interleave) {
- retval = -EPERM;
- goto out;
+ return -EPERM;
}
- sp->ep->intl_enable = !!params.assoc_value;
-
- retval = 0;
-
-out:
- return retval;
+ sp->ep->intl_enable = !!p->assoc_value;
+ return 0;
}
-static int sctp_setsockopt_reuse_port(struct sock *sk, char __user *optval,
+static int sctp_setsockopt_reuse_port(struct sock *sk, int *val,
unsigned int optlen)
{
- int val;
-
if (!sctp_style(sk, TCP))
return -EOPNOTSUPP;
@@ -4440,10 +4234,7 @@ static int sctp_setsockopt_reuse_port(struct sock *sk, char __user *optval,
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *)optval))
- return -EFAULT;
-
- sctp_sk(sk)->reuse = !!val;
+ sctp_sk(sk)->reuse = !!*val;
return 0;
}
@@ -4469,45 +4260,40 @@ static int sctp_assoc_ulpevent_type_set(struct sctp_event *param,
return 0;
}
-static int sctp_setsockopt_event(struct sock *sk, char __user *optval,
+static int sctp_setsockopt_event(struct sock *sk, struct sctp_event *param,
unsigned int optlen)
{
struct sctp_sock *sp = sctp_sk(sk);
struct sctp_association *asoc;
- struct sctp_event param;
int retval = 0;
- if (optlen < sizeof(param))
+ if (optlen < sizeof(*param))
return -EINVAL;
- optlen = sizeof(param);
- if (copy_from_user(&param, optval, optlen))
- return -EFAULT;
-
- if (param.se_type < SCTP_SN_TYPE_BASE ||
- param.se_type > SCTP_SN_TYPE_MAX)
+ if (param->se_type < SCTP_SN_TYPE_BASE ||
+ param->se_type > SCTP_SN_TYPE_MAX)
return -EINVAL;
- asoc = sctp_id2assoc(sk, param.se_assoc_id);
- if (!asoc && param.se_assoc_id > SCTP_ALL_ASSOC &&
+ asoc = sctp_id2assoc(sk, param->se_assoc_id);
+ if (!asoc && param->se_assoc_id > SCTP_ALL_ASSOC &&
sctp_style(sk, UDP))
return -EINVAL;
if (asoc)
- return sctp_assoc_ulpevent_type_set(&param, asoc);
+ return sctp_assoc_ulpevent_type_set(param, asoc);
if (sctp_style(sk, TCP))
- param.se_assoc_id = SCTP_FUTURE_ASSOC;
+ param->se_assoc_id = SCTP_FUTURE_ASSOC;
- if (param.se_assoc_id == SCTP_FUTURE_ASSOC ||
- param.se_assoc_id == SCTP_ALL_ASSOC)
+ if (param->se_assoc_id == SCTP_FUTURE_ASSOC ||
+ param->se_assoc_id == SCTP_ALL_ASSOC)
sctp_ulpevent_type_set(&sp->subscribe,
- param.se_type, param.se_on);
+ param->se_type, param->se_on);
- if (param.se_assoc_id == SCTP_CURRENT_ASSOC ||
- param.se_assoc_id == SCTP_ALL_ASSOC) {
+ if (param->se_assoc_id == SCTP_CURRENT_ASSOC ||
+ param->se_assoc_id == SCTP_ALL_ASSOC) {
list_for_each_entry(asoc, &sp->ep->asocs, asocs) {
- int ret = sctp_assoc_ulpevent_type_set(&param, asoc);
+ int ret = sctp_assoc_ulpevent_type_set(param, asoc);
if (ret && !retval)
retval = ret;
@@ -4518,29 +4304,23 @@ static int sctp_setsockopt_event(struct sock *sk, char __user *optval,
}
static int sctp_setsockopt_asconf_supported(struct sock *sk,
- char __user *optval,
+ struct sctp_assoc_value *params,
unsigned int optlen)
{
- struct sctp_assoc_value params;
struct sctp_association *asoc;
struct sctp_endpoint *ep;
int retval = -EINVAL;
- if (optlen != sizeof(params))
+ if (optlen != sizeof(*params))
goto out;
- if (copy_from_user(&params, optval, optlen)) {
- retval = -EFAULT;
- goto out;
- }
-
- asoc = sctp_id2assoc(sk, params.assoc_id);
- if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+ asoc = sctp_id2assoc(sk, params->assoc_id);
+ if (!asoc && params->assoc_id != SCTP_FUTURE_ASSOC &&
sctp_style(sk, UDP))
goto out;
ep = sctp_sk(sk)->ep;
- ep->asconf_enable = !!params.assoc_value;
+ ep->asconf_enable = !!params->assoc_value;
if (ep->asconf_enable && ep->auth_enable) {
sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF);
@@ -4554,29 +4334,23 @@ out:
}
static int sctp_setsockopt_auth_supported(struct sock *sk,
- char __user *optval,
+ struct sctp_assoc_value *params,
unsigned int optlen)
{
- struct sctp_assoc_value params;
struct sctp_association *asoc;
struct sctp_endpoint *ep;
int retval = -EINVAL;
- if (optlen != sizeof(params))
- goto out;
-
- if (copy_from_user(&params, optval, optlen)) {
- retval = -EFAULT;
+ if (optlen != sizeof(*params))
goto out;
- }
- asoc = sctp_id2assoc(sk, params.assoc_id);
- if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+ asoc = sctp_id2assoc(sk, params->assoc_id);
+ if (!asoc && params->assoc_id != SCTP_FUTURE_ASSOC &&
sctp_style(sk, UDP))
goto out;
ep = sctp_sk(sk)->ep;
- if (params.assoc_value) {
+ if (params->assoc_value) {
retval = sctp_auth_init(ep, GFP_KERNEL);
if (retval)
goto out;
@@ -4586,7 +4360,7 @@ static int sctp_setsockopt_auth_supported(struct sock *sk,
}
}
- ep->auth_enable = !!params.assoc_value;
+ ep->auth_enable = !!params->assoc_value;
retval = 0;
out:
@@ -4594,27 +4368,21 @@ out:
}
static int sctp_setsockopt_ecn_supported(struct sock *sk,
- char __user *optval,
+ struct sctp_assoc_value *params,
unsigned int optlen)
{
- struct sctp_assoc_value params;
struct sctp_association *asoc;
int retval = -EINVAL;
- if (optlen != sizeof(params))
- goto out;
-
- if (copy_from_user(&params, optval, optlen)) {
- retval = -EFAULT;
+ if (optlen != sizeof(*params))
goto out;
- }
- asoc = sctp_id2assoc(sk, params.assoc_id);
- if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+ asoc = sctp_id2assoc(sk, params->assoc_id);
+ if (!asoc && params->assoc_id != SCTP_FUTURE_ASSOC &&
sctp_style(sk, UDP))
goto out;
- sctp_sk(sk)->ep->ecn_enable = !!params.assoc_value;
+ sctp_sk(sk)->ep->ecn_enable = !!params->assoc_value;
retval = 0;
out:
@@ -4622,33 +4390,27 @@ out:
}
static int sctp_setsockopt_pf_expose(struct sock *sk,
- char __user *optval,
+ struct sctp_assoc_value *params,
unsigned int optlen)
{
- struct sctp_assoc_value params;
struct sctp_association *asoc;
int retval = -EINVAL;
- if (optlen != sizeof(params))
+ if (optlen != sizeof(*params))
goto out;
- if (copy_from_user(&params, optval, optlen)) {
- retval = -EFAULT;
- goto out;
- }
-
- if (params.assoc_value > SCTP_PF_EXPOSE_MAX)
+ if (params->assoc_value > SCTP_PF_EXPOSE_MAX)
goto out;
- asoc = sctp_id2assoc(sk, params.assoc_id);
- if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+ asoc = sctp_id2assoc(sk, params->assoc_id);
+ if (!asoc && params->assoc_id != SCTP_FUTURE_ASSOC &&
sctp_style(sk, UDP))
goto out;
if (asoc)
- asoc->pf_expose = params.assoc_value;
+ asoc->pf_expose = params->assoc_value;
else
- sctp_sk(sk)->pf_expose = params.assoc_value;
+ sctp_sk(sk)->pf_expose = params->assoc_value;
retval = 0;
out:
@@ -4675,8 +4437,9 @@ out:
* optlen - the size of the buffer.
*/
static int sctp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
+ void *kopt = NULL;
int retval = 0;
pr_debug("%s: sk:%p, optname:%d\n", __func__, sk, optname);
@@ -4689,8 +4452,14 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
*/
if (level != SOL_SCTP) {
struct sctp_af *af = sctp_sk(sk)->pf->af;
- retval = af->setsockopt(sk, level, optname, optval, optlen);
- goto out_nounlock;
+
+ return af->setsockopt(sk, level, optname, optval, optlen);
+ }
+
+ if (optlen > 0) {
+ kopt = memdup_sockptr(optval, optlen);
+ if (IS_ERR(kopt))
+ return PTR_ERR(kopt);
}
lock_sock(sk);
@@ -4698,179 +4467,174 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case SCTP_SOCKOPT_BINDX_ADD:
/* 'optlen' is the size of the addresses buffer. */
- retval = sctp_setsockopt_bindx(sk, (struct sockaddr __user *)optval,
- optlen, SCTP_BINDX_ADD_ADDR);
+ retval = sctp_setsockopt_bindx(sk, kopt, optlen,
+ SCTP_BINDX_ADD_ADDR);
break;
case SCTP_SOCKOPT_BINDX_REM:
/* 'optlen' is the size of the addresses buffer. */
- retval = sctp_setsockopt_bindx(sk, (struct sockaddr __user *)optval,
- optlen, SCTP_BINDX_REM_ADDR);
+ retval = sctp_setsockopt_bindx(sk, kopt, optlen,
+ SCTP_BINDX_REM_ADDR);
break;
case SCTP_SOCKOPT_CONNECTX_OLD:
/* 'optlen' is the size of the addresses buffer. */
- retval = sctp_setsockopt_connectx_old(sk,
- (struct sockaddr __user *)optval,
- optlen);
+ retval = sctp_setsockopt_connectx_old(sk, kopt, optlen);
break;
case SCTP_SOCKOPT_CONNECTX:
/* 'optlen' is the size of the addresses buffer. */
- retval = sctp_setsockopt_connectx(sk,
- (struct sockaddr __user *)optval,
- optlen);
+ retval = sctp_setsockopt_connectx(sk, kopt, optlen);
break;
case SCTP_DISABLE_FRAGMENTS:
- retval = sctp_setsockopt_disable_fragments(sk, optval, optlen);
+ retval = sctp_setsockopt_disable_fragments(sk, kopt, optlen);
break;
case SCTP_EVENTS:
- retval = sctp_setsockopt_events(sk, optval, optlen);
+ retval = sctp_setsockopt_events(sk, kopt, optlen);
break;
case SCTP_AUTOCLOSE:
- retval = sctp_setsockopt_autoclose(sk, optval, optlen);
+ retval = sctp_setsockopt_autoclose(sk, kopt, optlen);
break;
case SCTP_PEER_ADDR_PARAMS:
- retval = sctp_setsockopt_peer_addr_params(sk, optval, optlen);
+ retval = sctp_setsockopt_peer_addr_params(sk, kopt, optlen);
break;
case SCTP_DELAYED_SACK:
- retval = sctp_setsockopt_delayed_ack(sk, optval, optlen);
+ retval = sctp_setsockopt_delayed_ack(sk, kopt, optlen);
break;
case SCTP_PARTIAL_DELIVERY_POINT:
- retval = sctp_setsockopt_partial_delivery_point(sk, optval, optlen);
+ retval = sctp_setsockopt_partial_delivery_point(sk, kopt, optlen);
break;
case SCTP_INITMSG:
- retval = sctp_setsockopt_initmsg(sk, optval, optlen);
+ retval = sctp_setsockopt_initmsg(sk, kopt, optlen);
break;
case SCTP_DEFAULT_SEND_PARAM:
- retval = sctp_setsockopt_default_send_param(sk, optval,
- optlen);
+ retval = sctp_setsockopt_default_send_param(sk, kopt, optlen);
break;
case SCTP_DEFAULT_SNDINFO:
- retval = sctp_setsockopt_default_sndinfo(sk, optval, optlen);
+ retval = sctp_setsockopt_default_sndinfo(sk, kopt, optlen);
break;
case SCTP_PRIMARY_ADDR:
- retval = sctp_setsockopt_primary_addr(sk, optval, optlen);
+ retval = sctp_setsockopt_primary_addr(sk, kopt, optlen);
break;
case SCTP_SET_PEER_PRIMARY_ADDR:
- retval = sctp_setsockopt_peer_primary_addr(sk, optval, optlen);
+ retval = sctp_setsockopt_peer_primary_addr(sk, kopt, optlen);
break;
case SCTP_NODELAY:
- retval = sctp_setsockopt_nodelay(sk, optval, optlen);
+ retval = sctp_setsockopt_nodelay(sk, kopt, optlen);
break;
case SCTP_RTOINFO:
- retval = sctp_setsockopt_rtoinfo(sk, optval, optlen);
+ retval = sctp_setsockopt_rtoinfo(sk, kopt, optlen);
break;
case SCTP_ASSOCINFO:
- retval = sctp_setsockopt_associnfo(sk, optval, optlen);
+ retval = sctp_setsockopt_associnfo(sk, kopt, optlen);
break;
case SCTP_I_WANT_MAPPED_V4_ADDR:
- retval = sctp_setsockopt_mappedv4(sk, optval, optlen);
+ retval = sctp_setsockopt_mappedv4(sk, kopt, optlen);
break;
case SCTP_MAXSEG:
- retval = sctp_setsockopt_maxseg(sk, optval, optlen);
+ retval = sctp_setsockopt_maxseg(sk, kopt, optlen);
break;
case SCTP_ADAPTATION_LAYER:
- retval = sctp_setsockopt_adaptation_layer(sk, optval, optlen);
+ retval = sctp_setsockopt_adaptation_layer(sk, kopt, optlen);
break;
case SCTP_CONTEXT:
- retval = sctp_setsockopt_context(sk, optval, optlen);
+ retval = sctp_setsockopt_context(sk, kopt, optlen);
break;
case SCTP_FRAGMENT_INTERLEAVE:
- retval = sctp_setsockopt_fragment_interleave(sk, optval, optlen);
+ retval = sctp_setsockopt_fragment_interleave(sk, kopt, optlen);
break;
case SCTP_MAX_BURST:
- retval = sctp_setsockopt_maxburst(sk, optval, optlen);
+ retval = sctp_setsockopt_maxburst(sk, kopt, optlen);
break;
case SCTP_AUTH_CHUNK:
- retval = sctp_setsockopt_auth_chunk(sk, optval, optlen);
+ retval = sctp_setsockopt_auth_chunk(sk, kopt, optlen);
break;
case SCTP_HMAC_IDENT:
- retval = sctp_setsockopt_hmac_ident(sk, optval, optlen);
+ retval = sctp_setsockopt_hmac_ident(sk, kopt, optlen);
break;
case SCTP_AUTH_KEY:
- retval = sctp_setsockopt_auth_key(sk, optval, optlen);
+ retval = sctp_setsockopt_auth_key(sk, kopt, optlen);
break;
case SCTP_AUTH_ACTIVE_KEY:
- retval = sctp_setsockopt_active_key(sk, optval, optlen);
+ retval = sctp_setsockopt_active_key(sk, kopt, optlen);
break;
case SCTP_AUTH_DELETE_KEY:
- retval = sctp_setsockopt_del_key(sk, optval, optlen);
+ retval = sctp_setsockopt_del_key(sk, kopt, optlen);
break;
case SCTP_AUTH_DEACTIVATE_KEY:
- retval = sctp_setsockopt_deactivate_key(sk, optval, optlen);
+ retval = sctp_setsockopt_deactivate_key(sk, kopt, optlen);
break;
case SCTP_AUTO_ASCONF:
- retval = sctp_setsockopt_auto_asconf(sk, optval, optlen);
+ retval = sctp_setsockopt_auto_asconf(sk, kopt, optlen);
break;
case SCTP_PEER_ADDR_THLDS:
- retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen,
+ retval = sctp_setsockopt_paddr_thresholds(sk, kopt, optlen,
false);
break;
case SCTP_PEER_ADDR_THLDS_V2:
- retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen,
+ retval = sctp_setsockopt_paddr_thresholds(sk, kopt, optlen,
true);
break;
case SCTP_RECVRCVINFO:
- retval = sctp_setsockopt_recvrcvinfo(sk, optval, optlen);
+ retval = sctp_setsockopt_recvrcvinfo(sk, kopt, optlen);
break;
case SCTP_RECVNXTINFO:
- retval = sctp_setsockopt_recvnxtinfo(sk, optval, optlen);
+ retval = sctp_setsockopt_recvnxtinfo(sk, kopt, optlen);
break;
case SCTP_PR_SUPPORTED:
- retval = sctp_setsockopt_pr_supported(sk, optval, optlen);
+ retval = sctp_setsockopt_pr_supported(sk, kopt, optlen);
break;
case SCTP_DEFAULT_PRINFO:
- retval = sctp_setsockopt_default_prinfo(sk, optval, optlen);
+ retval = sctp_setsockopt_default_prinfo(sk, kopt, optlen);
break;
case SCTP_RECONFIG_SUPPORTED:
- retval = sctp_setsockopt_reconfig_supported(sk, optval, optlen);
+ retval = sctp_setsockopt_reconfig_supported(sk, kopt, optlen);
break;
case SCTP_ENABLE_STREAM_RESET:
- retval = sctp_setsockopt_enable_strreset(sk, optval, optlen);
+ retval = sctp_setsockopt_enable_strreset(sk, kopt, optlen);
break;
case SCTP_RESET_STREAMS:
- retval = sctp_setsockopt_reset_streams(sk, optval, optlen);
+ retval = sctp_setsockopt_reset_streams(sk, kopt, optlen);
break;
case SCTP_RESET_ASSOC:
- retval = sctp_setsockopt_reset_assoc(sk, optval, optlen);
+ retval = sctp_setsockopt_reset_assoc(sk, kopt, optlen);
break;
case SCTP_ADD_STREAMS:
- retval = sctp_setsockopt_add_streams(sk, optval, optlen);
+ retval = sctp_setsockopt_add_streams(sk, kopt, optlen);
break;
case SCTP_STREAM_SCHEDULER:
- retval = sctp_setsockopt_scheduler(sk, optval, optlen);
+ retval = sctp_setsockopt_scheduler(sk, kopt, optlen);
break;
case SCTP_STREAM_SCHEDULER_VALUE:
- retval = sctp_setsockopt_scheduler_value(sk, optval, optlen);
+ retval = sctp_setsockopt_scheduler_value(sk, kopt, optlen);
break;
case SCTP_INTERLEAVING_SUPPORTED:
- retval = sctp_setsockopt_interleaving_supported(sk, optval,
+ retval = sctp_setsockopt_interleaving_supported(sk, kopt,
optlen);
break;
case SCTP_REUSE_PORT:
- retval = sctp_setsockopt_reuse_port(sk, optval, optlen);
+ retval = sctp_setsockopt_reuse_port(sk, kopt, optlen);
break;
case SCTP_EVENT:
- retval = sctp_setsockopt_event(sk, optval, optlen);
+ retval = sctp_setsockopt_event(sk, kopt, optlen);
break;
case SCTP_ASCONF_SUPPORTED:
- retval = sctp_setsockopt_asconf_supported(sk, optval, optlen);
+ retval = sctp_setsockopt_asconf_supported(sk, kopt, optlen);
break;
case SCTP_AUTH_SUPPORTED:
- retval = sctp_setsockopt_auth_supported(sk, optval, optlen);
+ retval = sctp_setsockopt_auth_supported(sk, kopt, optlen);
break;
case SCTP_ECN_SUPPORTED:
- retval = sctp_setsockopt_ecn_supported(sk, optval, optlen);
+ retval = sctp_setsockopt_ecn_supported(sk, kopt, optlen);
break;
case SCTP_EXPOSE_POTENTIALLY_FAILED_STATE:
- retval = sctp_setsockopt_pf_expose(sk, optval, optlen);
+ retval = sctp_setsockopt_pf_expose(sk, kopt, optlen);
break;
default:
retval = -ENOPROTOOPT;
@@ -4878,8 +4642,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
}
release_sock(sk);
-
-out_nounlock:
+ kfree(kopt);
return retval;
}
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 1163d51196da..e7649bbc2b87 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -719,8 +719,11 @@ static int smc_connect_ism(struct smc_sock *smc,
}
/* Create send and receive buffers */
- if (smc_buf_create(smc, true))
- return smc_connect_abort(smc, SMC_CLC_DECL_MEM,
+ rc = smc_buf_create(smc, true);
+ if (rc)
+ return smc_connect_abort(smc, (rc == -ENOSPC) ?
+ SMC_CLC_DECL_MAX_DMB :
+ SMC_CLC_DECL_MEM,
ini->cln_first_contact);
smc_conn_save_peer_info(smc, aclc);
@@ -1200,12 +1203,14 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
}
/* Create send and receive buffers */
- if (smc_buf_create(new_smc, true)) {
+ rc = smc_buf_create(new_smc, true);
+ if (rc) {
if (ini->cln_first_contact == SMC_FIRST_CONTACT)
smc_lgr_cleanup_early(&new_smc->conn);
else
smc_conn_free(&new_smc->conn);
- return SMC_CLC_DECL_MEM;
+ return (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB :
+ SMC_CLC_DECL_MEM;
}
return 0;
@@ -1735,7 +1740,7 @@ out:
}
static int smc_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct smc_sock *smc;
@@ -1746,8 +1751,11 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
/* generic setsockopts reaching us here always apply to the
* CLC socket
*/
- rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
- optval, optlen);
+ if (unlikely(!smc->clcsock->ops->setsockopt))
+ rc = -EOPNOTSUPP;
+ else
+ rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
+ optval, optlen);
if (smc->clcsock->sk->sk_err) {
sk->sk_err = smc->clcsock->sk->sk_err;
sk->sk_error_report(sk);
@@ -1755,7 +1763,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(int))
return -EINVAL;
- if (get_user(val, (int __user *)optval))
+ if (copy_from_sockptr(&val, optval, sizeof(int)))
return -EFAULT;
lock_sock(sk);
@@ -1812,6 +1820,8 @@ static int smc_getsockopt(struct socket *sock, int level, int optname,
smc = smc_sk(sock->sk);
/* socket options apply to the CLC socket */
+ if (unlikely(!smc->clcsock->ops->getsockopt))
+ return -EOPNOTSUPP;
return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
optval, optlen);
}
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 76c2b150d040..cf7b45306f4e 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -48,6 +48,7 @@
#define SMC_CLC_DECL_NOACTLINK 0x030a0000 /* no active smc-r link in lgr */
#define SMC_CLC_DECL_NOSRVLINK 0x030b0000 /* SMC-R link from srv not found */
#define SMC_CLC_DECL_VERSMISMAT 0x030c0000 /* SMC version mismatch */
+#define SMC_CLC_DECL_MAX_DMB 0x030d0000 /* SMC-D DMB limit exceeded */
#define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */
#define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */
#define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index f82a2e599917..b42fa3b00d00 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1614,7 +1614,7 @@ static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
if (rc) {
kfree(buf_desc);
- return ERR_PTR(-EAGAIN);
+ return (rc == -ENOMEM) ? ERR_PTR(-EAGAIN) : ERR_PTR(rc);
}
buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
/* CDC header stored in buf. So, pretend it was smaller */
@@ -1688,7 +1688,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
}
if (IS_ERR(buf_desc))
- return -ENOMEM;
+ return PTR_ERR(buf_desc);
if (!is_smcd) {
if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
diff --git a/net/socket.c b/net/socket.c
index 976426d03f09..aff52e81653c 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -586,15 +586,6 @@ struct socket *sock_alloc(void)
}
EXPORT_SYMBOL(sock_alloc);
-/**
- * sock_release - close a socket
- * @sock: socket to close
- *
- * The socket is released from the protocol stack if it has a release
- * callback, and the inode is then released if the socket is bound to
- * an inode not a file.
- */
-
static void __sock_release(struct socket *sock, struct inode *inode)
{
if (sock->ops) {
@@ -620,6 +611,14 @@ static void __sock_release(struct socket *sock, struct inode *inode)
sock->file = NULL;
}
+/**
+ * sock_release - close a socket
+ * @sock: socket to close
+ *
+ * The socket is released from the protocol stack if it has a release
+ * callback, and the inode is then released if the socket is bound to
+ * an inode not a file.
+ */
void sock_release(struct socket *sock)
{
__sock_release(sock, NULL);
@@ -2080,15 +2079,25 @@ SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
}
+static bool sock_use_custom_sol_socket(const struct socket *sock)
+{
+ const struct sock *sk = sock->sk;
+
+ /* Use sock->ops->setsockopt() for MPTCP */
+ return IS_ENABLED(CONFIG_MPTCP) &&
+ sk->sk_protocol == IPPROTO_MPTCP &&
+ sk->sk_type == SOCK_STREAM &&
+ (sk->sk_family == AF_INET || sk->sk_family == AF_INET6);
+}
+
/*
* Set a socket option. Because we don't know the option lengths we have
* to pass the user mode parameter for the protocols to sort out.
*/
-
-static int __sys_setsockopt(int fd, int level, int optname,
- char __user *optval, int optlen)
+int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
+ int optlen)
{
- mm_segment_t oldfs = get_fs();
+ sockptr_t optval;
char *kernel_optval = NULL;
int err, fput_needed;
struct socket *sock;
@@ -2096,44 +2105,41 @@ static int __sys_setsockopt(int fd, int level, int optname,
if (optlen < 0)
return -EINVAL;
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (sock != NULL) {
- err = security_socket_setsockopt(sock, level, optname);
- if (err)
- goto out_put;
+ err = init_user_sockptr(&optval, user_optval, optlen);
+ if (err)
+ return err;
- err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level,
- &optname, optval, &optlen,
- &kernel_optval);
+ sock = sockfd_lookup_light(fd, &err, &fput_needed);
+ if (!sock)
+ return err;
- if (err < 0) {
- goto out_put;
- } else if (err > 0) {
- err = 0;
- goto out_put;
- }
+ err = security_socket_setsockopt(sock, level, optname);
+ if (err)
+ goto out_put;
- if (kernel_optval) {
- set_fs(KERNEL_DS);
- optval = (char __user __force *)kernel_optval;
- }
+ if (!in_compat_syscall())
+ err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
+ user_optval, &optlen,
+ &kernel_optval);
+ if (err < 0)
+ goto out_put;
+ if (err > 0) {
+ err = 0;
+ goto out_put;
+ }
- if (level == SOL_SOCKET)
- err =
- sock_setsockopt(sock, level, optname, optval,
+ if (kernel_optval)
+ optval = KERNEL_SOCKPTR(kernel_optval);
+ if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
+ err = sock_setsockopt(sock, level, optname, optval, optlen);
+ else if (unlikely(!sock->ops->setsockopt))
+ err = -EOPNOTSUPP;
+ else
+ err = sock->ops->setsockopt(sock, level, optname, optval,
optlen);
- else
- err =
- sock->ops->setsockopt(sock, level, optname, optval,
- optlen);
-
- if (kernel_optval) {
- set_fs(oldfs);
- kfree(kernel_optval);
- }
+ kfree(kernel_optval);
out_put:
- fput_light(sock->file, fput_needed);
- }
+ fput_light(sock->file, fput_needed);
return err;
}
@@ -2147,37 +2153,38 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
* Get a socket option. Because we don't know the option lengths we have
* to pass a user mode parameter for the protocols to sort out.
*/
-
-static int __sys_getsockopt(int fd, int level, int optname,
- char __user *optval, int __user *optlen)
+int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
+ int __user *optlen)
{
int err, fput_needed;
struct socket *sock;
int max_optlen;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (sock != NULL) {
- err = security_socket_getsockopt(sock, level, optname);
- if (err)
- goto out_put;
+ if (!sock)
+ return err;
+ err = security_socket_getsockopt(sock, level, optname);
+ if (err)
+ goto out_put;
+
+ if (!in_compat_syscall())
max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
- if (level == SOL_SOCKET)
- err =
- sock_getsockopt(sock, level, optname, optval,
+ if (level == SOL_SOCKET)
+ err = sock_getsockopt(sock, level, optname, optval, optlen);
+ else if (unlikely(!sock->ops->getsockopt))
+ err = -EOPNOTSUPP;
+ else
+ err = sock->ops->getsockopt(sock, level, optname, optval,
optlen);
- else
- err =
- sock->ops->getsockopt(sock, level, optname, optval,
- optlen);
+ if (!in_compat_syscall())
err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
- optval, optlen,
- max_optlen, err);
+ optval, optlen, max_optlen,
+ err);
out_put:
- fput_light(sock->file, fput_needed);
- }
+ fput_light(sock->file, fput_needed);
return err;
}
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index f25604d68337..865f3e037425 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -304,8 +304,8 @@ static int switchdev_port_obj_add_defer(struct net_device *dev,
* switchdev_port_obj_add - Add port object
*
* @dev: port device
- * @id: object ID
* @obj: object to add
+ * @extack: netlink extended ack
*
* Use a 2-phase prepare-commit transaction model to ensure
* system is not left in a partially updated state due to
@@ -357,7 +357,6 @@ static int switchdev_port_obj_del_defer(struct net_device *dev,
* switchdev_port_obj_del - Delete port object
*
* @dev: port device
- * @id: object ID
* @obj: object to delete
*
* rtnl_lock must be held and must not be in atomic section,
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 383f87bc1061..940d176e0e87 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -250,8 +250,8 @@ static void tipc_bcast_select_xmit_method(struct net *net, int dests,
* Consumes the buffer chain.
* Returns 0 if success, otherwise errno: -EHOSTUNREACH,-EMSGSIZE
*/
-static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
- u16 *cong_link_cnt)
+int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
+ u16 *cong_link_cnt)
{
struct tipc_link *l = tipc_bc_sndlink(net);
struct sk_buff_head xmitq;
@@ -752,7 +752,7 @@ void tipc_nlist_purge(struct tipc_nlist *nl)
nl->local = false;
}
-u32 tipc_bcast_get_broadcast_mode(struct net *net)
+u32 tipc_bcast_get_mode(struct net *net)
{
struct tipc_bc_base *bb = tipc_bc_base(net);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 4240c95188b1..2d9352dc7b0e 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -90,6 +90,8 @@ void tipc_bcast_toggle_rcast(struct net *net, bool supp);
int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts,
struct tipc_mc_method *method, struct tipc_nlist *dests,
u16 *cong_link_cnt);
+int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
+ u16 *cong_link_cnt);
int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb);
void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l,
struct tipc_msg *hdr);
@@ -101,7 +103,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg,
int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]);
int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l);
-u32 tipc_bcast_get_broadcast_mode(struct net *net);
+u32 tipc_bcast_get_mode(struct net *net);
u32 tipc_bcast_get_broadcast_ratio(struct net *net);
void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq,
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index e366ec9a7e4d..808b147df7d5 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -595,7 +595,7 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
/**
* tipc_l2_rcv_msg - handle incoming TIPC message from an interface
- * @buf: the received packet
+ * @skb: the received message
* @dev: the net device that the packet was received on
* @pt: the packet_type structure which was used to register this handler
* @orig_dev: the original receive net device in case the device is a bond
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index bfe43da127c0..d4ecacddb40c 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -74,7 +74,7 @@ struct tipc_discoverer {
/**
* tipc_disc_init_msg - initialize a link setup message
* @net: the applicable net namespace
- * @type: message type (request or response)
+ * @mtyp: message type (request or response)
* @b: ptr to bearer issuing message
*/
static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb,
@@ -339,7 +339,7 @@ exit:
* @net: the applicable net namespace
* @b: ptr to bearer issuing requests
* @dest: destination address for request messages
- * @dest_domain: network domain to which links can be established
+ * @skb: pointer to created frame
*
* Returns 0 if successful, otherwise -errno.
*/
@@ -393,7 +393,6 @@ void tipc_disc_delete(struct tipc_discoverer *d)
* tipc_disc_reset - reset object to send periodic link setup requests
* @net: the applicable net namespace
* @b: ptr to bearer issuing requests
- * @dest_domain: network domain to which links can be established
*/
void tipc_disc_reset(struct net *net, struct tipc_bearer *b)
{
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 8b0bb600602d..c68019697cfe 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -62,12 +62,10 @@ static int tipc_eth_raw2addr(struct tipc_bearer *b,
struct tipc_media_addr *addr,
char *msg)
{
- char bcast_mac[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
-
memset(addr, 0, sizeof(*addr));
ether_addr_copy(addr->value, msg);
addr->media_id = TIPC_MEDIA_TYPE_ETH;
- addr->broadcast = !memcmp(addr->value, bcast_mac, ETH_ALEN);
+ addr->broadcast = is_broadcast_ether_addr(addr->value);
return 0;
}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index d40f8e5b7683..107578122973 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -445,7 +445,7 @@ u32 tipc_link_state(struct tipc_link *l)
/**
* tipc_link_create - create a new link
- * @n: pointer to associated node
+ * @net: pointer to associated network namespace
* @if_name: associated interface name
* @bearer_id: id (index) of associated bearer
* @tolerance: link tolerance to be used by link
@@ -530,7 +530,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
/**
* tipc_link_bc_create - create new link to be used for broadcast
- * @n: pointer to associated node
+ * @net: pointer to associated network namespace
* @mtu: mtu to be used initially if no peers
* @window: send window to be used
* @inputq: queue to put messages ready for delivery
@@ -989,7 +989,7 @@ void tipc_link_reset(struct tipc_link *l)
/**
* tipc_link_xmit(): enqueue buffer list according to queue situation
- * @link: link to use
+ * @l: link to use
* @list: chain of buffers containing message
* @xmitq: returned list of packets to be sent by caller
*
@@ -1396,12 +1396,12 @@ u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l,
p = (struct tipc_gap_ack_blks *)msg_data(hdr);
sz = ntohs(p->len);
/* Sanity check */
- if (sz == tipc_gap_ack_blks_sz(p->ugack_cnt + p->bgack_cnt)) {
+ if (sz == struct_size(p, gacks, p->ugack_cnt + p->bgack_cnt)) {
/* Good, check if the desired type exists */
if ((uc && p->ugack_cnt) || (!uc && p->bgack_cnt))
goto ok;
/* Backward compatible: peer might not support bc, but uc? */
- } else if (uc && sz == tipc_gap_ack_blks_sz(p->ugack_cnt)) {
+ } else if (uc && sz == struct_size(p, gacks, p->ugack_cnt)) {
if (p->ugack_cnt) {
p->bgack_cnt = 0;
goto ok;
@@ -1483,7 +1483,7 @@ static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr)
__tipc_build_gap_ack_blks(ga, l, ga->bgack_cnt) : 0;
/* Total len */
- len = tipc_gap_ack_blks_sz(ga->bgack_cnt + ga->ugack_cnt);
+ len = struct_size(ga, gacks, ga->bgack_cnt + ga->ugack_cnt);
ga->len = htons(len);
return len;
}
@@ -1532,7 +1532,7 @@ static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r,
gacks = &ga->gacks[ga->bgack_cnt];
} else if (ga) {
/* Copy the Gap ACKs, bc part, for later renewal if needed */
- this_ga = kmemdup(ga, tipc_gap_ack_blks_sz(ga->bgack_cnt),
+ this_ga = kmemdup(ga, struct_size(ga, gacks, ga->bgack_cnt),
GFP_ATOMIC);
if (likely(this_ga)) {
this_ga->start_index = 0;
@@ -2755,7 +2755,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg,
void *hdr;
struct nlattr *attrs;
struct nlattr *prop;
- u32 bc_mode = tipc_bcast_get_broadcast_mode(net);
+ u32 bc_mode = tipc_bcast_get_mode(net);
u32 bc_ratio = tipc_bcast_get_broadcast_ratio(net);
if (!bcl)
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 01b64869a173..848fae674532 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -202,7 +202,7 @@ err:
/**
* tipc_msg_append(): Append data to tail of an existing buffer queue
- * @hdr: header to be used
+ * @_hdr: header to be used
* @m: the data to be appended
* @mss: max allowable size of buffer
* @dlen: size of data to be appended
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 58660d56bc83..1016e96db5c4 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -189,11 +189,9 @@ struct tipc_gap_ack_blks {
struct tipc_gap_ack gacks[];
};
-#define tipc_gap_ack_blks_sz(n) (sizeof(struct tipc_gap_ack_blks) + \
- sizeof(struct tipc_gap_ack) * (n))
-
#define MAX_GAP_ACK_BLKS 128
-#define MAX_GAP_ACK_BLKS_SZ tipc_gap_ack_blks_sz(MAX_GAP_ACK_BLKS)
+#define MAX_GAP_ACK_BLKS_SZ (sizeof(struct tipc_gap_ack_blks) + \
+ sizeof(struct tipc_gap_ack) * MAX_GAP_ACK_BLKS)
static inline struct tipc_msg *buf_msg(struct sk_buff *skb)
{
@@ -438,6 +436,36 @@ static inline void msg_set_errcode(struct tipc_msg *m, u32 err)
msg_set_bits(m, 1, 25, 0xf, err);
}
+static inline void msg_set_bulk(struct tipc_msg *m)
+{
+ msg_set_bits(m, 1, 28, 0x1, 1);
+}
+
+static inline u32 msg_is_bulk(struct tipc_msg *m)
+{
+ return msg_bits(m, 1, 28, 0x1);
+}
+
+static inline void msg_set_last_bulk(struct tipc_msg *m)
+{
+ msg_set_bits(m, 1, 27, 0x1, 1);
+}
+
+static inline u32 msg_is_last_bulk(struct tipc_msg *m)
+{
+ return msg_bits(m, 1, 27, 0x1);
+}
+
+static inline void msg_set_non_legacy(struct tipc_msg *m)
+{
+ msg_set_bits(m, 1, 26, 0x1, 1);
+}
+
+static inline u32 msg_is_legacy(struct tipc_msg *m)
+{
+ return !msg_bits(m, 1, 26, 0x1);
+}
+
static inline u32 msg_reroute_cnt(struct tipc_msg *m)
{
return msg_bits(m, 1, 21, 0xf);
@@ -567,6 +595,16 @@ static inline void msg_set_origport(struct tipc_msg *m, u32 p)
msg_set_word(m, 4, p);
}
+static inline u16 msg_named_seqno(struct tipc_msg *m)
+{
+ return msg_bits(m, 4, 0, 0xffff);
+}
+
+static inline void msg_set_named_seqno(struct tipc_msg *m, u16 n)
+{
+ msg_set_bits(m, 4, 0, 0xffff, n);
+}
+
static inline u32 msg_destport(struct tipc_msg *m)
{
return msg_word(m, 5);
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 5feaf3b67380..2f9c148f17e2 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -102,7 +102,8 @@ struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ)
pr_warn("Publication distribution failure\n");
return NULL;
}
-
+ msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++);
+ msg_set_non_legacy(buf_msg(skb));
item = (struct distr_item *)msg_data(buf_msg(skb));
publ_to_item(item, publ);
return skb;
@@ -114,8 +115,8 @@ struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ)
struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
{
struct name_table *nt = tipc_name_table(net);
- struct sk_buff *buf;
struct distr_item *item;
+ struct sk_buff *skb;
write_lock_bh(&nt->cluster_scope_lock);
list_del(&publ->binding_node);
@@ -123,15 +124,16 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
if (publ->scope == TIPC_NODE_SCOPE)
return NULL;
- buf = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0);
- if (!buf) {
+ skb = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0);
+ if (!skb) {
pr_warn("Withdrawal distribution failure\n");
return NULL;
}
-
- item = (struct distr_item *)msg_data(buf_msg(buf));
+ msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++);
+ msg_set_non_legacy(buf_msg(skb));
+ item = (struct distr_item *)msg_data(buf_msg(skb));
publ_to_item(item, publ);
- return buf;
+ return skb;
}
/**
@@ -141,7 +143,7 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
* @pls: linked list of publication items to be packed into buffer chain
*/
static void named_distribute(struct net *net, struct sk_buff_head *list,
- u32 dnode, struct list_head *pls)
+ u32 dnode, struct list_head *pls, u16 seqno)
{
struct publication *publ;
struct sk_buff *skb = NULL;
@@ -149,6 +151,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0, false) - INT_H_SIZE) /
ITEM_SIZE) * ITEM_SIZE;
u32 msg_rem = msg_dsz;
+ struct tipc_msg *hdr;
list_for_each_entry(publ, pls, binding_node) {
/* Prepare next buffer: */
@@ -159,8 +162,11 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
pr_warn("Bulk publication failure\n");
return;
}
- msg_set_bc_ack_invalid(buf_msg(skb), true);
- item = (struct distr_item *)msg_data(buf_msg(skb));
+ hdr = buf_msg(skb);
+ msg_set_bc_ack_invalid(hdr, true);
+ msg_set_bulk(hdr);
+ msg_set_non_legacy(hdr);
+ item = (struct distr_item *)msg_data(hdr);
}
/* Pack publication into message: */
@@ -176,24 +182,35 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
}
}
if (skb) {
- msg_set_size(buf_msg(skb), INT_H_SIZE + (msg_dsz - msg_rem));
+ hdr = buf_msg(skb);
+ msg_set_size(hdr, INT_H_SIZE + (msg_dsz - msg_rem));
skb_trim(skb, INT_H_SIZE + (msg_dsz - msg_rem));
__skb_queue_tail(list, skb);
}
+ hdr = buf_msg(skb_peek_tail(list));
+ msg_set_last_bulk(hdr);
+ msg_set_named_seqno(hdr, seqno);
}
/**
* tipc_named_node_up - tell specified node about all publications by this node
*/
-void tipc_named_node_up(struct net *net, u32 dnode)
+void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities)
{
struct name_table *nt = tipc_name_table(net);
+ struct tipc_net *tn = tipc_net(net);
struct sk_buff_head head;
+ u16 seqno;
__skb_queue_head_init(&head);
+ spin_lock_bh(&tn->nametbl_lock);
+ if (!(capabilities & TIPC_NAMED_BCAST))
+ nt->rc_dests++;
+ seqno = nt->snd_nxt;
+ spin_unlock_bh(&tn->nametbl_lock);
read_lock_bh(&nt->cluster_scope_lock);
- named_distribute(net, &head, dnode, &nt->cluster_scope);
+ named_distribute(net, &head, dnode, &nt->cluster_scope, seqno);
tipc_node_xmit(net, &head, dnode, 0);
read_unlock_bh(&nt->cluster_scope_lock);
}
@@ -245,13 +262,21 @@ static void tipc_dist_queue_purge(struct net *net, u32 addr)
spin_unlock_bh(&tn->nametbl_lock);
}
-void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr)
+void tipc_publ_notify(struct net *net, struct list_head *nsub_list,
+ u32 addr, u16 capabilities)
{
+ struct name_table *nt = tipc_name_table(net);
+ struct tipc_net *tn = tipc_net(net);
+
struct publication *publ, *tmp;
list_for_each_entry_safe(publ, tmp, nsub_list, binding_node)
tipc_publ_purge(net, publ, addr);
tipc_dist_queue_purge(net, addr);
+ spin_lock_bh(&tn->nametbl_lock);
+ if (!(capabilities & TIPC_NAMED_BCAST))
+ nt->rc_dests--;
+ spin_unlock_bh(&tn->nametbl_lock);
}
/**
@@ -295,29 +320,62 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
return false;
}
+static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq,
+ u16 *rcv_nxt, bool *open)
+{
+ struct sk_buff *skb, *tmp;
+ struct tipc_msg *hdr;
+ u16 seqno;
+
+ skb_queue_walk_safe(namedq, skb, tmp) {
+ skb_linearize(skb);
+ hdr = buf_msg(skb);
+ seqno = msg_named_seqno(hdr);
+ if (msg_is_last_bulk(hdr)) {
+ *rcv_nxt = seqno;
+ *open = true;
+ }
+
+ if (msg_is_bulk(hdr) || msg_is_legacy(hdr)) {
+ __skb_unlink(skb, namedq);
+ return skb;
+ }
+
+ if (*open && (*rcv_nxt == seqno)) {
+ (*rcv_nxt)++;
+ __skb_unlink(skb, namedq);
+ return skb;
+ }
+
+ if (less(seqno, *rcv_nxt)) {
+ __skb_unlink(skb, namedq);
+ kfree_skb(skb);
+ continue;
+ }
+ }
+ return NULL;
+}
+
/**
* tipc_named_rcv - process name table update messages sent by another node
*/
-void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq)
+void tipc_named_rcv(struct net *net, struct sk_buff_head *namedq,
+ u16 *rcv_nxt, bool *open)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_msg *msg;
+ struct tipc_net *tn = tipc_net(net);
struct distr_item *item;
- uint count;
- u32 node;
+ struct tipc_msg *hdr;
struct sk_buff *skb;
- int mtype;
+ u32 count, node;
spin_lock_bh(&tn->nametbl_lock);
- for (skb = skb_dequeue(inputq); skb; skb = skb_dequeue(inputq)) {
- skb_linearize(skb);
- msg = buf_msg(skb);
- mtype = msg_type(msg);
- item = (struct distr_item *)msg_data(msg);
- count = msg_data_sz(msg) / ITEM_SIZE;
- node = msg_orignode(msg);
+ while ((skb = tipc_named_dequeue(namedq, rcv_nxt, open))) {
+ hdr = buf_msg(skb);
+ node = msg_orignode(hdr);
+ item = (struct distr_item *)msg_data(hdr);
+ count = msg_data_sz(hdr) / ITEM_SIZE;
while (count--) {
- tipc_update_nametbl(net, item, node, mtype);
+ tipc_update_nametbl(net, item, node, msg_type(hdr));
item++;
}
kfree_skb(skb);
@@ -345,6 +403,6 @@ void tipc_named_reinit(struct net *net)
publ->node = self;
list_for_each_entry_rcu(publ, &nt->cluster_scope, binding_node)
publ->node = self;
-
+ nt->rc_dests = 0;
spin_unlock_bh(&tn->nametbl_lock);
}
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index 63fc73e0fa6c..092323158f06 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h
@@ -67,11 +67,14 @@ struct distr_item {
__be32 key;
};
+void tipc_named_bcast(struct net *net, struct sk_buff *skb);
struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ);
struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ);
-void tipc_named_node_up(struct net *net, u32 dnode);
-void tipc_named_rcv(struct net *net, struct sk_buff_head *msg_queue);
+void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities);
+void tipc_named_rcv(struct net *net, struct sk_buff_head *namedq,
+ u16 *rcv_nxt, bool *open);
void tipc_named_reinit(struct net *net);
-void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr);
+void tipc_publ_notify(struct net *net, struct list_head *nsub_list,
+ u32 addr, u16 capabilities);
#endif
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 359b2bc888cf..2ac33d32edc2 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -729,6 +729,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
struct tipc_net *tn = tipc_net(net);
struct publication *p = NULL;
struct sk_buff *skb = NULL;
+ u32 rc_dests;
spin_lock_bh(&tn->nametbl_lock);
@@ -743,12 +744,14 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
nt->local_publ_count++;
skb = tipc_named_publish(net, p);
}
+ rc_dests = nt->rc_dests;
exit:
spin_unlock_bh(&tn->nametbl_lock);
if (skb)
- tipc_node_broadcast(net, skb);
+ tipc_node_broadcast(net, skb, rc_dests);
return p;
+
}
/**
@@ -762,6 +765,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower,
u32 self = tipc_own_addr(net);
struct sk_buff *skb = NULL;
struct publication *p;
+ u32 rc_dests;
spin_lock_bh(&tn->nametbl_lock);
@@ -775,10 +779,11 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower,
pr_err("Failed to remove local publication {%u,%u,%u}/%u\n",
type, lower, upper, key);
}
+ rc_dests = nt->rc_dests;
spin_unlock_bh(&tn->nametbl_lock);
if (skb) {
- tipc_node_broadcast(net, skb);
+ tipc_node_broadcast(net, skb, rc_dests);
return 1;
}
return 0;
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 728bc7016c38..8064e1986e2c 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -106,6 +106,8 @@ struct name_table {
struct list_head cluster_scope;
rwlock_t cluster_scope_lock;
u32 local_publ_count;
+ u32 rc_dests;
+ u32 snd_nxt;
};
int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index a4c2816c3746..4edcee3088da 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -75,6 +75,8 @@ struct tipc_bclink_entry {
struct sk_buff_head arrvq;
struct sk_buff_head inputq2;
struct sk_buff_head namedq;
+ u16 named_rcv_nxt;
+ bool named_open;
};
/**
@@ -396,10 +398,10 @@ static void tipc_node_write_unlock(struct tipc_node *n)
write_unlock_bh(&n->lock);
if (flags & TIPC_NOTIFY_NODE_DOWN)
- tipc_publ_notify(net, publ_list, addr);
+ tipc_publ_notify(net, publ_list, addr, n->capabilities);
if (flags & TIPC_NOTIFY_NODE_UP)
- tipc_named_node_up(net, addr);
+ tipc_named_node_up(net, addr, n->capabilities);
if (flags & TIPC_NOTIFY_LINK_UP) {
tipc_mon_peer_up(net, addr, bearer_id);
@@ -1483,6 +1485,7 @@ static void node_lost_contact(struct tipc_node *n,
/* Clean up broadcast state */
tipc_bcast_remove_peer(n->net, n->bc_entry.link);
+ __skb_queue_purge(&n->bc_entry.namedq);
/* Abort any ongoing link failover */
for (i = 0; i < MAX_BEARERS; i++) {
@@ -1512,7 +1515,7 @@ static void node_lost_contact(struct tipc_node *n,
* tipc_node_get_linkname - get the name of a link
*
* @bearer_id: id of the bearer
- * @node: peer node address
+ * @addr: peer node address
* @linkname: link name output buffer
*
* Returns 0 on success
@@ -1729,12 +1732,23 @@ int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *xmitq)
return 0;
}
-void tipc_node_broadcast(struct net *net, struct sk_buff *skb)
+void tipc_node_broadcast(struct net *net, struct sk_buff *skb, int rc_dests)
{
+ struct sk_buff_head xmitq;
struct sk_buff *txskb;
struct tipc_node *n;
+ u16 dummy;
u32 dst;
+ /* Use broadcast if all nodes support it */
+ if (!rc_dests && tipc_bcast_get_mode(net) != BCLINK_MODE_RCAST) {
+ __skb_queue_head_init(&xmitq);
+ __skb_queue_tail(&xmitq, skb);
+ tipc_bcast_xmit(net, &xmitq, &dummy);
+ return;
+ }
+
+ /* Otherwise use legacy replicast method */
rcu_read_lock();
list_for_each_entry_rcu(n, tipc_nodes(net), list) {
dst = n->addr;
@@ -1749,7 +1763,6 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb)
tipc_node_xmit_skb(net, txskb, dst, 0);
}
rcu_read_unlock();
-
kfree_skb(skb);
}
@@ -1844,7 +1857,9 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id
/* Handle NAME_DISTRIBUTOR messages sent from 1.7 nodes */
if (!skb_queue_empty(&n->bc_entry.namedq))
- tipc_named_rcv(net, &n->bc_entry.namedq);
+ tipc_named_rcv(net, &n->bc_entry.namedq,
+ &n->bc_entry.named_rcv_nxt,
+ &n->bc_entry.named_open);
/* If reassembly or retransmission failure => reset all links to peer */
if (rc & TIPC_LINK_DOWN_EVT)
@@ -2007,7 +2022,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
* tipc_rcv - process TIPC packets/messages arriving from off-node
* @net: the applicable net namespace
* @skb: TIPC packet
- * @bearer: pointer to bearer message arrived on
+ * @b: pointer to bearer message arrived on
*
* Invoked with no locks held. Bearer pointer must point to a valid bearer
* structure (i.e. cannot be NULL), but bearer can be inactive.
@@ -2114,7 +2129,9 @@ rcv:
tipc_node_link_down(n, bearer_id, false);
if (unlikely(!skb_queue_empty(&n->bc_entry.namedq)))
- tipc_named_rcv(net, &n->bc_entry.namedq);
+ tipc_named_rcv(net, &n->bc_entry.namedq,
+ &n->bc_entry.named_rcv_nxt,
+ &n->bc_entry.named_open);
if (unlikely(!skb_queue_empty(&n->bc_entry.inputq1)))
tipc_node_mcast_rcv(n);
diff --git a/net/tipc/node.h b/net/tipc/node.h
index a6803b449a2c..9f6f13f1604f 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -55,7 +55,8 @@ enum {
TIPC_MCAST_RBCTL = (1 << 7),
TIPC_GAP_ACK_BLOCK = (1 << 8),
TIPC_TUNNEL_ENHANCED = (1 << 9),
- TIPC_NAGLE = (1 << 10)
+ TIPC_NAGLE = (1 << 10),
+ TIPC_NAMED_BCAST = (1 << 11)
};
#define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT | \
@@ -68,7 +69,8 @@ enum {
TIPC_MCAST_RBCTL | \
TIPC_GAP_ACK_BLOCK | \
TIPC_TUNNEL_ENHANCED | \
- TIPC_NAGLE)
+ TIPC_NAGLE | \
+ TIPC_NAMED_BCAST)
#define INVALID_BEARER_ID -1
@@ -101,7 +103,7 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest,
u32 selector);
void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr);
void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr);
-void tipc_node_broadcast(struct net *net, struct sk_buff *skb);
+void tipc_node_broadcast(struct net *net, struct sk_buff *skb, int rc_dests);
int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port);
void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port);
int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel, bool connected);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index a94f38333698..07419f36116a 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -711,7 +711,6 @@ exit:
* tipc_getname - get port ID of socket or peer socket
* @sock: socket structure
* @uaddr: area for returned socket address
- * @uaddr_len: area for returned length of socket address
* @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID
*
* Returns 0 on success, errno otherwise
@@ -1053,7 +1052,7 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
/**
* tipc_send_group_bcast - send message to all members in communication group
- * @sk: socket structure
+ * @sock: socket structure
* @m: message to send
* @dlen: total length of message data
* @timeout: timeout to wait for wakeup
@@ -1673,7 +1672,7 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
/**
* tipc_sk_set_orig_addr - capture sender's address for received message
* @m: descriptor for message info
- * @hdr: received message header
+ * @skb: received message
*
* Note: Address is not captured if not requested by receiver.
*/
@@ -2095,7 +2094,6 @@ static void tipc_write_space(struct sock *sk)
/**
* tipc_data_ready - wake up threads to indicate messages have been received
* @sk: socket
- * @len: the length of messages
*/
static void tipc_data_ready(struct sock *sk)
{
@@ -2677,7 +2675,7 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
/**
* tipc_accept - wait for connection request
* @sock: listening socket
- * @newsock: new socket that is to be connected
+ * @new_sock: new socket that is to be connected
* @flags: file-related flags associated with socket
*
* Returns 0 on success, errno otherwise
@@ -3105,7 +3103,7 @@ static int tipc_sk_leave(struct tipc_sock *tsk)
* Returns 0 on success, errno otherwise
*/
static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
- char __user *ov, unsigned int ol)
+ sockptr_t ov, unsigned int ol)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
@@ -3126,17 +3124,17 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
case TIPC_NODELAY:
if (ol < sizeof(value))
return -EINVAL;
- if (get_user(value, (u32 __user *)ov))
+ if (copy_from_sockptr(&value, ov, sizeof(u32)))
return -EFAULT;
break;
case TIPC_GROUP_JOIN:
if (ol < sizeof(mreq))
return -EINVAL;
- if (copy_from_user(&mreq, ov, sizeof(mreq)))
+ if (copy_from_sockptr(&mreq, ov, sizeof(mreq)))
return -EFAULT;
break;
default:
- if (ov || ol)
+ if (!sockptr_is_null(ov) || ol)
return -EINVAL;
}
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 28a283f26a8d..d91b7c543e39 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -565,7 +565,7 @@ msg_full:
/**
* tipc_parse_udp_addr - build udp media address from netlink data
- * @nlattr: netlink attribute containing sockaddr storage aligned address
+ * @nla: netlink attribute containing sockaddr storage aligned address
* @addr: tipc media address to fill with address, port and protocol type
* @scope_id: IPv6 scope id pointer, not NULL indicates it's required
*/
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 0e55f8365ce2..18fa6067bb7f 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -690,15 +690,55 @@ static void tls_device_resync_rx(struct tls_context *tls_ctx,
TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICERESYNC);
}
+static bool
+tls_device_rx_resync_async(struct tls_offload_resync_async *resync_async,
+ s64 resync_req, u32 *seq)
+{
+ u32 is_async = resync_req & RESYNC_REQ_ASYNC;
+ u32 req_seq = resync_req >> 32;
+ u32 req_end = req_seq + ((resync_req >> 16) & 0xffff);
+
+ if (is_async) {
+ /* asynchronous stage: log all headers seq such that
+ * req_seq <= seq <= end_seq, and wait for real resync request
+ */
+ if (between(*seq, req_seq, req_end) &&
+ resync_async->loglen < TLS_DEVICE_RESYNC_ASYNC_LOGMAX)
+ resync_async->log[resync_async->loglen++] = *seq;
+
+ return false;
+ }
+
+ /* synchronous stage: check against the logged entries and
+ * proceed to check the next entries if no match was found
+ */
+ while (resync_async->loglen) {
+ if (req_seq == resync_async->log[resync_async->loglen - 1] &&
+ atomic64_try_cmpxchg(&resync_async->req,
+ &resync_req, 0)) {
+ resync_async->loglen = 0;
+ *seq = req_seq;
+ return true;
+ }
+ resync_async->loglen--;
+ }
+
+ if (req_seq == *seq &&
+ atomic64_try_cmpxchg(&resync_async->req,
+ &resync_req, 0))
+ return true;
+
+ return false;
+}
+
void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_offload_context_rx *rx_ctx;
- bool is_req_pending, is_force_resync;
u8 rcd_sn[TLS_MAX_REC_SEQ_SIZE];
+ u32 sock_data, is_req_pending;
struct tls_prot_info *prot;
s64 resync_req;
- u32 sock_data;
u32 req_seq;
if (tls_ctx->rx_conf != TLS_HW)
@@ -713,11 +753,9 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
resync_req = atomic64_read(&rx_ctx->resync_req);
req_seq = resync_req >> 32;
seq += TLS_HEADER_SIZE - 1;
- is_req_pending = resync_req & RESYNC_REQ;
- is_force_resync = resync_req & RESYNC_REQ_FORCE;
+ is_req_pending = resync_req;
- if (likely(!is_req_pending) ||
- (!is_force_resync && req_seq != seq) ||
+ if (likely(!is_req_pending) || req_seq != seq ||
!atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0))
return;
break;
@@ -739,6 +777,16 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
seq += rcd_len;
tls_bigint_increment(rcd_sn, prot->rec_seq_size);
break;
+ case TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ_ASYNC:
+ resync_req = atomic64_read(&rx_ctx->resync_async->req);
+ is_req_pending = resync_req;
+ if (likely(!is_req_pending))
+ return;
+
+ if (!tls_device_rx_resync_async(rx_ctx->resync_async,
+ resync_req, &seq))
+ return;
+ break;
}
tls_device_resync_rx(tls_ctx, sk, seq, rcd_sn);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index ec10041c6b7d..bbc52b088d29 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -450,7 +450,7 @@ static int tls_getsockopt(struct sock *sk, int level, int optname,
return do_tls_getsockopt(sk, optname, optval, optlen);
}
-static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
+static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
unsigned int optlen, int tx)
{
struct tls_crypto_info *crypto_info;
@@ -460,7 +460,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
int rc = 0;
int conf;
- if (!optval || (optlen < sizeof(*crypto_info))) {
+ if (sockptr_is_null(optval) || (optlen < sizeof(*crypto_info))) {
rc = -EINVAL;
goto out;
}
@@ -479,7 +479,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
goto out;
}
- rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info));
+ rc = copy_from_sockptr(crypto_info, optval, sizeof(*crypto_info));
if (rc) {
rc = -EFAULT;
goto err_crypto_info;
@@ -522,8 +522,9 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
goto err_crypto_info;
}
- rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info),
- optlen - sizeof(*crypto_info));
+ rc = copy_from_sockptr_offset(crypto_info + 1, optval,
+ sizeof(*crypto_info),
+ optlen - sizeof(*crypto_info));
if (rc) {
rc = -EFAULT;
goto err_crypto_info;
@@ -579,8 +580,8 @@ out:
return rc;
}
-static int do_tls_setsockopt(struct sock *sk, int optname,
- char __user *optval, unsigned int optlen)
+static int do_tls_setsockopt(struct sock *sk, int optname, sockptr_t optval,
+ unsigned int optlen)
{
int rc = 0;
@@ -600,7 +601,7 @@ static int do_tls_setsockopt(struct sock *sk, int optname,
}
static int tls_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct tls_context *ctx = tls_get_ctx(sk);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3385a7a0b231..181ea6fb56a6 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -714,8 +714,6 @@ static const struct proto_ops unix_stream_ops = {
#endif
.listen = unix_listen,
.shutdown = unix_shutdown,
- .setsockopt = sock_no_setsockopt,
- .getsockopt = sock_no_getsockopt,
.sendmsg = unix_stream_sendmsg,
.recvmsg = unix_stream_recvmsg,
.mmap = sock_no_mmap,
@@ -741,8 +739,6 @@ static const struct proto_ops unix_dgram_ops = {
#endif
.listen = sock_no_listen,
.shutdown = unix_shutdown,
- .setsockopt = sock_no_setsockopt,
- .getsockopt = sock_no_getsockopt,
.sendmsg = unix_dgram_sendmsg,
.recvmsg = unix_dgram_recvmsg,
.mmap = sock_no_mmap,
@@ -767,8 +763,6 @@ static const struct proto_ops unix_seqpacket_ops = {
#endif
.listen = unix_listen,
.shutdown = unix_shutdown,
- .setsockopt = sock_no_setsockopt,
- .getsockopt = sock_no_getsockopt,
.sendmsg = unix_seqpacket_sendmsg,
.recvmsg = unix_seqpacket_recvmsg,
.mmap = sock_no_mmap,
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 626bf9044418..27bbcfad9c17 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1202,8 +1202,6 @@ static const struct proto_ops vsock_dgram_ops = {
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = vsock_shutdown,
- .setsockopt = sock_no_setsockopt,
- .getsockopt = sock_no_getsockopt,
.sendmsg = vsock_dgram_sendmsg,
.recvmsg = vsock_dgram_recvmsg,
.mmap = sock_no_mmap,
@@ -1519,7 +1517,7 @@ static void vsock_update_buffer_size(struct vsock_sock *vsk,
static int vsock_stream_setsockopt(struct socket *sock,
int level,
int optname,
- char __user *optval,
+ sockptr_t optval,
unsigned int optlen)
{
int err;
@@ -1537,7 +1535,7 @@ static int vsock_stream_setsockopt(struct socket *sock,
err = -EINVAL; \
goto exit; \
} \
- if (copy_from_user(&_v, optval, sizeof(_v)) != 0) { \
+ if (copy_from_sockptr(&_v, optval, sizeof(_v)) != 0) { \
err = -EFAULT; \
goto exit; \
} \
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index cddf92c5d09e..90f0f82cd9ca 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -153,6 +153,11 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef)
control_freq = chandef->chan->center_freq;
switch (chandef->width) {
+ case NL80211_CHAN_WIDTH_1:
+ case NL80211_CHAN_WIDTH_2:
+ case NL80211_CHAN_WIDTH_4:
+ case NL80211_CHAN_WIDTH_8:
+ case NL80211_CHAN_WIDTH_16:
case NL80211_CHAN_WIDTH_5:
case NL80211_CHAN_WIDTH_10:
case NL80211_CHAN_WIDTH_20:
@@ -263,6 +268,21 @@ static int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c)
int width;
switch (c->width) {
+ case NL80211_CHAN_WIDTH_1:
+ width = 1;
+ break;
+ case NL80211_CHAN_WIDTH_2:
+ width = 2;
+ break;
+ case NL80211_CHAN_WIDTH_4:
+ width = 4;
+ break;
+ case NL80211_CHAN_WIDTH_8:
+ width = 8;
+ break;
+ case NL80211_CHAN_WIDTH_16:
+ width = 16;
+ break;
case NL80211_CHAN_WIDTH_5:
width = 5;
break;
@@ -911,6 +931,21 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
control_freq = chandef->chan->center_freq;
switch (chandef->width) {
+ case NL80211_CHAN_WIDTH_1:
+ width = 1;
+ break;
+ case NL80211_CHAN_WIDTH_2:
+ width = 2;
+ break;
+ case NL80211_CHAN_WIDTH_4:
+ width = 4;
+ break;
+ case NL80211_CHAN_WIDTH_8:
+ width = 8;
+ break;
+ case NL80211_CHAN_WIDTH_16:
+ width = 16;
+ break;
case NL80211_CHAN_WIDTH_5:
width = 5;
break;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index c623d9bf5096..1971d7e6eb55 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -803,10 +803,11 @@ int wiphy_register(struct wiphy *wiphy)
if (WARN_ON(!sband->n_channels))
return -EINVAL;
/*
- * on 60GHz band, there are no legacy rates, so
+ * on 60GHz or sub-1Ghz band, there are no legacy rates, so
* n_bitrates is 0
*/
- if (WARN_ON(band != NL80211_BAND_60GHZ &&
+ if (WARN_ON((band != NL80211_BAND_60GHZ &&
+ band != NL80211_BAND_S1GHZ) &&
!sband->n_bitrates))
return -EINVAL;
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index eac5aa1419fc..e4e363138279 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -78,6 +78,7 @@ const struct mesh_config default_mesh_config = {
.power_mode = NL80211_MESH_POWER_ACTIVE,
.dot11MeshAwakeWindowDuration = MESH_DEFAULT_AWAKE_WINDOW,
.plink_timeout = MESH_DEFAULT_PLINK_TIMEOUT,
+ .dot11MeshNolearn = false,
};
const struct mesh_setup default_mesh_setup = {
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0e07fb8585fb..814e23d3ce7c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4713,8 +4713,8 @@ static int nl80211_parse_he_bss_color(struct nlattr *attrs,
he_bss_color->color =
nla_get_u8(tb[NL80211_HE_BSS_COLOR_ATTR_COLOR]);
- he_bss_color->disabled =
- nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_DISABLED]);
+ he_bss_color->enabled =
+ !nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_DISABLED]);
he_bss_color->partial =
nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_PARTIAL]);
@@ -5395,6 +5395,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
PUT_SINFO(PEER_PM, peer_pm, u32);
PUT_SINFO(NONPEER_PM, nonpeer_pm, u32);
PUT_SINFO(CONNECTED_TO_GATE, connected_to_gate, u8);
+ PUT_SINFO(CONNECTED_TO_AS, connected_to_as, u8);
if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_BSS_PARAM)) {
bss_param = nla_nest_start_noflag(msg,
@@ -6885,7 +6886,11 @@ static int nl80211_get_mesh_config(struct sk_buff *skb,
nla_put_u32(msg, NL80211_MESHCONF_PLINK_TIMEOUT,
cur_params.plink_timeout) ||
nla_put_u8(msg, NL80211_MESHCONF_CONNECTED_TO_GATE,
- cur_params.dot11MeshConnectedToMeshGate))
+ cur_params.dot11MeshConnectedToMeshGate) ||
+ nla_put_u8(msg, NL80211_MESHCONF_NOLEARN,
+ cur_params.dot11MeshNolearn) ||
+ nla_put_u8(msg, NL80211_MESHCONF_CONNECTED_TO_AS,
+ cur_params.dot11MeshConnectedToAuthServer))
goto nla_put_failure;
nla_nest_end(msg, pinfoattr);
genlmsg_end(msg, hdr);
@@ -6943,6 +6948,8 @@ nl80211_meshconf_params_policy[NL80211_MESHCONF_ATTR_MAX+1] = {
[NL80211_MESHCONF_AWAKE_WINDOW] = { .type = NLA_U16 },
[NL80211_MESHCONF_PLINK_TIMEOUT] = { .type = NLA_U32 },
[NL80211_MESHCONF_CONNECTED_TO_GATE] = NLA_POLICY_RANGE(NLA_U8, 0, 1),
+ [NL80211_MESHCONF_NOLEARN] = NLA_POLICY_RANGE(NLA_U8, 0, 1),
+ [NL80211_MESHCONF_CONNECTED_TO_AS] = NLA_POLICY_RANGE(NLA_U8, 0, 1),
};
static const struct nla_policy
@@ -7055,6 +7062,9 @@ do { \
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConnectedToMeshGate, mask,
NL80211_MESHCONF_CONNECTED_TO_GATE,
nla_get_u8);
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConnectedToAuthServer, mask,
+ NL80211_MESHCONF_CONNECTED_TO_AS,
+ nla_get_u8);
/*
* Check HT operation mode based on
* IEEE 802.11-2016 9.4.2.57 HT Operation element.
@@ -7094,6 +7104,8 @@ do { \
NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, mask,
NL80211_MESHCONF_PLINK_TIMEOUT, nla_get_u32);
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshNolearn, mask,
+ NL80211_MESHCONF_NOLEARN, nla_get_u8);
if (mask_out)
*mask_out = mask;
@@ -7774,10 +7786,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
if (!rdev->ops->scan)
return -EOPNOTSUPP;
- if (rdev->scan_req || rdev->scan_msg) {
- err = -EBUSY;
- goto unlock;
- }
+ if (rdev->scan_req || rdev->scan_msg)
+ return -EBUSY;
if (info->attrs[NL80211_ATTR_SCAN_FREQ_KHZ]) {
if (!wiphy_ext_feature_isset(wiphy,
@@ -7790,10 +7800,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
if (scan_freqs) {
n_channels = validate_scan_freqs(scan_freqs);
- if (!n_channels) {
- err = -EINVAL;
- goto unlock;
- }
+ if (!n_channels)
+ return -EINVAL;
} else {
n_channels = ieee80211_get_num_supported_channels(wiphy);
}
@@ -7802,29 +7810,23 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp)
n_ssids++;
- if (n_ssids > wiphy->max_scan_ssids) {
- err = -EINVAL;
- goto unlock;
- }
+ if (n_ssids > wiphy->max_scan_ssids)
+ return -EINVAL;
if (info->attrs[NL80211_ATTR_IE])
ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
else
ie_len = 0;
- if (ie_len > wiphy->max_scan_ie_len) {
- err = -EINVAL;
- goto unlock;
- }
+ if (ie_len > wiphy->max_scan_ie_len)
+ return -EINVAL;
request = kzalloc(sizeof(*request)
+ sizeof(*request->ssids) * n_ssids
+ sizeof(*request->channels) * n_channels
+ ie_len, GFP_KERNEL);
- if (!request) {
- err = -ENOMEM;
- goto unlock;
- }
+ if (!request)
+ return -ENOMEM;
if (n_ssids)
request->ssids = (void *)&request->channels[n_channels];
@@ -8003,17 +8005,19 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
rdev->scan_req = request;
err = rdev_scan(rdev, request);
- if (!err) {
- nl80211_send_scan_start(rdev, wdev);
- if (wdev->netdev)
- dev_hold(wdev->netdev);
- } else {
+ if (err)
+ goto out_free;
+
+ nl80211_send_scan_start(rdev, wdev);
+ if (wdev->netdev)
+ dev_hold(wdev->netdev);
+
+ return 0;
+
out_free:
- rdev->scan_req = NULL;
- kfree(request);
- }
+ rdev->scan_req = NULL;
+ kfree(request);
- unlock:
return err;
}
@@ -9438,7 +9442,9 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
if (nla_len(info->attrs[NL80211_ATTR_PMK]) != WLAN_PMK_LEN)
return -EINVAL;
if (!wiphy_ext_feature_isset(&rdev->wiphy,
- NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK))
+ NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK) &&
+ !wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK))
return -EINVAL;
settings->psk = nla_data(info->attrs[NL80211_ATTR_PMK]);
}
@@ -10394,8 +10400,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
memcpy(dev->ieee80211_ptr->disconnect_bssid,
connect.bssid, ETH_ALEN);
else
- memset(dev->ieee80211_ptr->disconnect_bssid,
- 0, ETH_ALEN);
+ eth_zero_addr(dev->ieee80211_ptr->disconnect_bssid);
}
wdev_unlock(dev->ieee80211_ptr);
@@ -13266,13 +13271,13 @@ static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info)
if (!wdev_running(wdev))
return -ENETDOWN;
}
-
- if (!vcmd->doit)
- return -EOPNOTSUPP;
} else {
wdev = NULL;
}
+ if (!vcmd->doit)
+ return -EOPNOTSUPP;
+
if (info->attrs[NL80211_ATTR_VENDOR_DATA]) {
data = nla_data(info->attrs[NL80211_ATTR_VENDOR_DATA]);
len = nla_len(info->attrs[NL80211_ATTR_VENDOR_DATA]);
@@ -13479,7 +13484,7 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
if (err == -ENOBUFS || err == -ENOENT) {
genlmsg_cancel(skb, hdr);
break;
- } else if (err) {
+ } else if (err <= 0) {
genlmsg_cancel(skb, hdr);
goto out;
}
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 0d74a31ef0ab..35b8847a2f6d 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2384,7 +2384,7 @@ static void reg_set_request_processed(void)
/**
* reg_process_hint_core - process core regulatory requests
- * @pending_request: a pending core regulatory request
+ * @core_request: a pending core regulatory request
*
* The wireless subsystem can use this function to process
* a regulatory request issued by the regulatory core.
@@ -2493,6 +2493,7 @@ __reg_process_hint_driver(struct regulatory_request *driver_request)
/**
* reg_process_hint_driver - process driver regulatory requests
+ * @wiphy: the wireless device for the regulatory request
* @driver_request: a pending driver regulatory request
*
* The wireless subsystem can use this function to process
@@ -2593,6 +2594,7 @@ __reg_process_hint_country_ie(struct wiphy *wiphy,
/**
* reg_process_hint_country_ie - process regulatory requests from country IEs
+ * @wiphy: the wireless device for the regulatory request
* @country_ie_request: a regulatory request from a country IE
*
* The wireless subsystem can use this function to process
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 74ea4cfb39fb..e67a74488bbe 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -712,6 +712,16 @@ void cfg80211_bss_expire(struct cfg80211_registered_device *rdev)
__cfg80211_bss_expire(rdev, jiffies - IEEE80211_SCAN_RESULT_EXPIRE);
}
+void cfg80211_bss_flush(struct wiphy *wiphy)
+{
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+
+ spin_lock_bh(&rdev->bss_lock);
+ __cfg80211_bss_expire(rdev, jiffies);
+ spin_unlock_bh(&rdev->bss_lock);
+}
+EXPORT_SYMBOL(cfg80211_bss_flush);
+
const struct element *
cfg80211_find_elem_match(u8 eid, const u8 *ies, unsigned int len,
const u8 *match, unsigned int match_len,
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index b23cab016521..6e218a0acd4e 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -68,7 +68,8 @@
__field(u16, ht_opmode) \
__field(u32, dot11MeshHWMPactivePathToRootTimeout) \
__field(u16, dot11MeshHWMProotInterval) \
- __field(u16, dot11MeshHWMPconfirmationInterval)
+ __field(u16, dot11MeshHWMPconfirmationInterval) \
+ __field(bool, dot11MeshNolearn)
#define MESH_CFG_ASSIGN \
do { \
__entry->dot11MeshRetryTimeout = conf->dot11MeshRetryTimeout; \
@@ -109,6 +110,7 @@
conf->dot11MeshHWMProotInterval; \
__entry->dot11MeshHWMPconfirmationInterval = \
conf->dot11MeshHWMPconfirmationInterval; \
+ __entry->dot11MeshNolearn = conf->dot11MeshNolearn; \
} while (0)
#define CHAN_ENTRY __field(enum nl80211_band, band) \
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 4d3b76f94f55..26a977343c3b 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -102,6 +102,8 @@ u32 ieee80211_channel_to_freq_khz(int chan, enum nl80211_band band)
if (chan < 7)
return MHZ_TO_KHZ(56160 + chan * 2160);
break;
+ case NL80211_BAND_S1GHZ:
+ return 902000 + chan * 500;
default:
;
}
@@ -210,6 +212,12 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband)
WARN_ON(!sband->ht_cap.ht_supported);
WARN_ON((sband->ht_cap.mcs.rx_mask[0] & 0x1e) != 0x1e);
break;
+ case NL80211_BAND_S1GHZ:
+ /* Figure 9-589bd: 3 means unsupported, so != 3 means at least
+ * mandatory is ok.
+ */
+ WARN_ON((sband->s1g_cap.nss_mcs[0] & 0x3) == 0x3);
+ break;
case NUM_NL80211_BANDS:
default:
WARN_ON(1);
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index cac9e28d852b..aa918d7ff6bd 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -220,7 +220,6 @@ EXPORT_WEXT_HANDLER(cfg80211_wext_giwrange);
/**
* cfg80211_wext_freq - get wext frequency for non-"auto"
- * @dev: the net device
* @freq: the wext freq encoding
*
* Returns a frequency, or a negative error code, or 0 for auto.
diff --git a/net/x25/Kconfig b/net/x25/Kconfig
index e3ed23245a82..68729aa3a5d5 100644
--- a/net/x25/Kconfig
+++ b/net/x25/Kconfig
@@ -17,7 +17,7 @@ config X25
if you want that) and the lower level data link layer protocol LAPB
(say Y to "LAPB Data Link Driver" below if you want that).
- You can read more about X.25 at <http://www.sangoma.com/tutorials/x25/> and
+ You can read more about X.25 at <https://www.sangoma.com/tutorials/x25/> and
<http://docwiki.cisco.com/wiki/X.25>.
Information about X.25 for Linux is contained in the files
<file:Documentation/networking/x25.rst> and
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index d5b09bbff375..0bbb283f23c9 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -431,7 +431,7 @@ void x25_destroy_socket_from_timer(struct sock *sk)
*/
static int x25_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
int opt;
struct sock *sk = sock->sk;
@@ -445,7 +445,7 @@ static int x25_setsockopt(struct socket *sock, int level, int optname,
goto out;
rc = -EFAULT;
- if (get_user(opt, (int __user *)optval))
+ if (copy_from_sockptr(&opt, optval, sizeof(int)))
goto out;
if (opt)
diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
index 7d02532aad0d..fdae054b7dc1 100644
--- a/net/x25/x25_link.c
+++ b/net/x25/x25_link.c
@@ -270,7 +270,7 @@ void x25_link_device_up(struct net_device *dev)
/**
* __x25_remove_neigh - remove neighbour from x25_neigh_list
- * @nb - neigh to remove
+ * @nb: - neigh to remove
*
* Remove neighbour from x25_neigh_list. If it was there.
* Caller must hold x25_neigh_list_lock.
diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c
index b8e94d58d0f1..00e46c9a5280 100644
--- a/net/x25/x25_route.c
+++ b/net/x25/x25_route.c
@@ -142,7 +142,7 @@ struct net_device *x25_dev_get(char *devname)
/**
* x25_get_route - Find a route given an X.25 address.
- * @addr - address to find a route for
+ * @addr: - address to find a route for
*
* Find a route given an X.25 address.
*/
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 3700266229f6..c3231620d210 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -123,7 +123,7 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
addr = xp_get_handle(xskb);
err = xskq_prod_reserve_desc(xs->rx, addr, len);
if (err) {
- xs->rx_dropped++;
+ xs->rx_queue_full++;
return err;
}
@@ -274,8 +274,10 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)
rcu_read_lock();
list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) {
- if (!xskq_cons_peek_desc(xs->tx, desc, umem))
+ if (!xskq_cons_peek_desc(xs->tx, desc, umem)) {
+ xs->tx->queue_empty_descs++;
continue;
+ }
/* This is the backpressure mechanism for the Tx path.
* Reserve space in the completion queue and only proceed
@@ -387,6 +389,8 @@ static int xsk_generic_xmit(struct sock *sk)
sent_frame = true;
}
+ xs->tx->queue_empty_descs++;
+
out:
if (sent_frame)
sk->sk_write_space(sk);
@@ -698,7 +702,7 @@ struct xdp_umem_reg_v1 {
};
static int xsk_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, unsigned int optlen)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct xdp_sock *xs = xdp_sk(sk);
@@ -716,7 +720,7 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(entries))
return -EINVAL;
- if (copy_from_user(&entries, optval, sizeof(entries)))
+ if (copy_from_sockptr(&entries, optval, sizeof(entries)))
return -EFAULT;
mutex_lock(&xs->mutex);
@@ -743,7 +747,7 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
else if (optlen < sizeof(mr))
mr_size = sizeof(struct xdp_umem_reg_v1);
- if (copy_from_user(&mr, optval, mr_size))
+ if (copy_from_sockptr(&mr, optval, mr_size))
return -EFAULT;
mutex_lock(&xs->mutex);
@@ -770,7 +774,7 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
struct xsk_queue **q;
int entries;
- if (copy_from_user(&entries, optval, sizeof(entries)))
+ if (copy_from_sockptr(&entries, optval, sizeof(entries)))
return -EFAULT;
mutex_lock(&xs->mutex);
@@ -812,6 +816,12 @@ static void xsk_enter_umem_offsets(struct xdp_ring_offset_v1 *ring)
ring->desc = offsetof(struct xdp_umem_ring, desc);
}
+struct xdp_statistics_v1 {
+ __u64 rx_dropped;
+ __u64 rx_invalid_descs;
+ __u64 tx_invalid_descs;
+};
+
static int xsk_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -830,20 +840,36 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname,
switch (optname) {
case XDP_STATISTICS:
{
- struct xdp_statistics stats;
+ struct xdp_statistics stats = {};
+ bool extra_stats = true;
+ size_t stats_size;
- if (len < sizeof(stats))
+ if (len < sizeof(struct xdp_statistics_v1)) {
return -EINVAL;
+ } else if (len < sizeof(stats)) {
+ extra_stats = false;
+ stats_size = sizeof(struct xdp_statistics_v1);
+ } else {
+ stats_size = sizeof(stats);
+ }
mutex_lock(&xs->mutex);
stats.rx_dropped = xs->rx_dropped;
+ if (extra_stats) {
+ stats.rx_ring_full = xs->rx_queue_full;
+ stats.rx_fill_ring_empty_descs =
+ xs->umem ? xskq_nb_queue_empty_descs(xs->umem->fq) : 0;
+ stats.tx_ring_empty_descs = xskq_nb_queue_empty_descs(xs->tx);
+ } else {
+ stats.rx_dropped += xs->rx_queue_full;
+ }
stats.rx_invalid_descs = xskq_nb_invalid_descs(xs->rx);
stats.tx_invalid_descs = xskq_nb_invalid_descs(xs->tx);
mutex_unlock(&xs->mutex);
- if (copy_to_user(optval, &stats, sizeof(stats)))
+ if (copy_to_user(optval, &stats, stats_size))
return -EFAULT;
- if (put_user(sizeof(stats), optlen))
+ if (put_user(stats_size, optlen))
return -EFAULT;
return 0;
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 08b80669f649..a2044c245215 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -189,6 +189,7 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
for (;;) {
if (!xskq_cons_peek_addr_unchecked(pool->fq, &addr)) {
+ pool->fq->queue_empty_descs++;
xp_release(xskb);
return NULL;
}
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index 0163b26aaf63..21e9c2d123ee 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -76,6 +76,19 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
return err;
}
+static int xsk_diag_put_stats(const struct xdp_sock *xs, struct sk_buff *nlskb)
+{
+ struct xdp_diag_stats du = {};
+
+ du.n_rx_dropped = xs->rx_dropped;
+ du.n_rx_invalid = xskq_nb_invalid_descs(xs->rx);
+ du.n_rx_full = xs->rx_queue_full;
+ du.n_fill_ring_empty = xs->umem ? xskq_nb_queue_empty_descs(xs->umem->fq) : 0;
+ du.n_tx_invalid = xskq_nb_invalid_descs(xs->tx);
+ du.n_tx_ring_empty = xskq_nb_queue_empty_descs(xs->tx);
+ return nla_put(nlskb, XDP_DIAG_STATS, sizeof(du), &du);
+}
+
static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
struct xdp_diag_req *req,
struct user_namespace *user_ns,
@@ -118,6 +131,10 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO))
goto out_nlmsg_trim;
+ if ((req->xdiag_show & XDP_SHOW_STATS) &&
+ xsk_diag_put_stats(xs, nlskb))
+ goto out_nlmsg_trim;
+
mutex_unlock(&xs->mutex);
nlmsg_end(nlskb, nlh);
return 0;
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 5b5d24d2dd37..bf42cfd74b89 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -38,6 +38,7 @@ struct xsk_queue {
u32 cached_cons;
struct xdp_ring *ring;
u64 invalid_descs;
+ u64 queue_empty_descs;
};
/* The structure of the shared state of the rings are the same as the
@@ -354,6 +355,11 @@ static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q)
return q ? q->invalid_descs : 0;
}
+static inline u64 xskq_nb_queue_empty_descs(struct xsk_queue *q)
+{
+ return q ? q->queue_empty_descs : 0;
+}
+
struct xsk_queue *xskq_create(u32 nentries, bool umem_queue);
void xskq_destroy(struct xsk_queue *q_ops);
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index 1dc7208c71ba..8367adbbe9df 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -254,6 +254,7 @@ void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
spin_unlock_bh(&map->lock);
}
+static int xsk_map_btf_id;
const struct bpf_map_ops xsk_map_ops = {
.map_alloc = xsk_map_alloc,
.map_free = xsk_map_free,
@@ -264,4 +265,6 @@ const struct bpf_map_ops xsk_map_ops = {
.map_update_elem = xsk_map_update_elem,
.map_delete_elem = xsk_map_delete_elem,
.map_check_btf = map_check_no_btf,
+ .map_btf_name = "xsk_map",
+ .map_btf_id = &xsk_map_btf_id,
};
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index 100e29682b48..827ccdf2db57 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -15,6 +15,7 @@ static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb,
{
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf ||
!sk_rmem_schedule(sk, skb, skb->truesize)) {
+ XFRM_INC_STATS(sock_net(sk), LINUX_MIB_XFRMINERROR);
kfree_skb(skb);
return;
}
@@ -49,23 +50,51 @@ static void espintcp_rcv(struct strparser *strp, struct sk_buff *skb)
struct espintcp_ctx *ctx = container_of(strp, struct espintcp_ctx,
strp);
struct strp_msg *rxm = strp_msg(skb);
+ int len = rxm->full_len - 2;
u32 nonesp_marker;
int err;
+ /* keepalive packet? */
+ if (unlikely(len == 1)) {
+ u8 data;
+
+ err = skb_copy_bits(skb, rxm->offset + 2, &data, 1);
+ if (err < 0) {
+ XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR);
+ kfree_skb(skb);
+ return;
+ }
+
+ if (data == 0xff) {
+ kfree_skb(skb);
+ return;
+ }
+ }
+
+ /* drop other short messages */
+ if (unlikely(len <= sizeof(nonesp_marker))) {
+ XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR);
+ kfree_skb(skb);
+ return;
+ }
+
err = skb_copy_bits(skb, rxm->offset + 2, &nonesp_marker,
sizeof(nonesp_marker));
if (err < 0) {
+ XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR);
kfree_skb(skb);
return;
}
/* remove header, leave non-ESP marker/SPI */
if (!__pskb_pull(skb, rxm->offset + 2)) {
+ XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINERROR);
kfree_skb(skb);
return;
}
if (pskb_trim(skb, rxm->full_len - 2) != 0) {
+ XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINERROR);
kfree_skb(skb);
return;
}
@@ -91,7 +120,7 @@ static int espintcp_parse(struct strparser *strp, struct sk_buff *skb)
return err;
len = be16_to_cpu(blen);
- if (len < 6)
+ if (len < 2)
return -EINVAL;
return len;
@@ -109,8 +138,11 @@ static int espintcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
flags |= nonblock ? MSG_DONTWAIT : 0;
skb = __skb_recv_datagram(sk, &ctx->ike_queue, flags, &off, &err);
- if (!skb)
+ if (!skb) {
+ if (err == -EAGAIN && sk->sk_shutdown & RCV_SHUTDOWN)
+ return 0;
return err;
+ }
copied = len;
if (copied > skb->len)
@@ -213,7 +245,7 @@ retry:
return 0;
}
-static int espintcp_push_msgs(struct sock *sk)
+static int espintcp_push_msgs(struct sock *sk, int flags)
{
struct espintcp_ctx *ctx = espintcp_getctx(sk);
struct espintcp_msg *emsg = &ctx->partial;
@@ -227,12 +259,12 @@ static int espintcp_push_msgs(struct sock *sk)
ctx->tx_running = 1;
if (emsg->skb)
- err = espintcp_sendskb_locked(sk, emsg, 0);
+ err = espintcp_sendskb_locked(sk, emsg, flags);
else
- err = espintcp_sendskmsg_locked(sk, emsg, 0);
+ err = espintcp_sendskmsg_locked(sk, emsg, flags);
if (err == -EAGAIN) {
ctx->tx_running = 0;
- return 0;
+ return flags & MSG_DONTWAIT ? -EAGAIN : 0;
}
if (!err)
memset(emsg, 0, sizeof(*emsg));
@@ -257,7 +289,7 @@ int espintcp_push_skb(struct sock *sk, struct sk_buff *skb)
offset = skb_transport_offset(skb);
len = skb->len - offset;
- espintcp_push_msgs(sk);
+ espintcp_push_msgs(sk, 0);
if (emsg->len) {
kfree_skb(skb);
@@ -270,7 +302,7 @@ int espintcp_push_skb(struct sock *sk, struct sk_buff *skb)
emsg->len = len;
emsg->skb = skb;
- espintcp_push_msgs(sk);
+ espintcp_push_msgs(sk, 0);
return 0;
}
@@ -287,7 +319,7 @@ static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
char buf[2] = {0};
int err, end;
- if (msg->msg_flags)
+ if (msg->msg_flags & ~MSG_DONTWAIT)
return -EOPNOTSUPP;
if (size > MAX_ESPINTCP_MSG)
@@ -298,9 +330,10 @@ static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
lock_sock(sk);
- err = espintcp_push_msgs(sk);
+ err = espintcp_push_msgs(sk, msg->msg_flags & MSG_DONTWAIT);
if (err < 0) {
- err = -ENOBUFS;
+ if (err != -EAGAIN || !(msg->msg_flags & MSG_DONTWAIT))
+ err = -ENOBUFS;
goto unlock;
}
@@ -337,10 +370,9 @@ static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
tcp_rate_check_app_limited(sk);
- err = espintcp_push_msgs(sk);
+ err = espintcp_push_msgs(sk, msg->msg_flags & MSG_DONTWAIT);
/* this message could be partially sent, keep it */
- if (err < 0)
- goto unlock;
+
release_sock(sk);
return size;
@@ -374,7 +406,7 @@ static void espintcp_tx_work(struct work_struct *work)
lock_sock(sk);
if (!ctx->tx_running)
- espintcp_push_msgs(sk);
+ espintcp_push_msgs(sk, 0);
release_sock(sk);
}
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 626096bd0d29..edf11893dbe8 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -106,6 +106,7 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
struct sk_buff *skb2, *nskb, *pskb = NULL;
netdev_features_t esp_features = features;
struct xfrm_offload *xo = xfrm_offload(skb);
+ struct net_device *dev = skb->dev;
struct sec_path *sp;
if (!xo || (xo->flags & XFRM_XMIT))
@@ -119,6 +120,10 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND)
return skb;
+ /* This skb was already validated on the upper/virtual dev */
+ if ((x->xso.dev != dev) && (x->xso.real_dev == dev))
+ return skb;
+
local_irq_save(flags);
sd = this_cpu_ptr(&softnet_data);
err = !skb_queue_empty(&sd->xfrm_backlog);
@@ -131,25 +136,20 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
xo->flags |= XFRM_XMIT;
- if (skb_is_gso(skb)) {
- struct net_device *dev = skb->dev;
-
- if (unlikely(x->xso.dev != dev)) {
- struct sk_buff *segs;
+ if (skb_is_gso(skb) && unlikely(x->xso.dev != dev)) {
+ struct sk_buff *segs;
- /* Packet got rerouted, fixup features and segment it. */
- esp_features = esp_features & ~(NETIF_F_HW_ESP
- | NETIF_F_GSO_ESP);
+ /* Packet got rerouted, fixup features and segment it. */
+ esp_features = esp_features & ~(NETIF_F_HW_ESP | NETIF_F_GSO_ESP);
- segs = skb_gso_segment(skb, esp_features);
- if (IS_ERR(segs)) {
- kfree_skb(skb);
- atomic_long_inc(&dev->tx_dropped);
- return NULL;
- } else {
- consume_skb(skb);
- skb = segs;
- }
+ segs = skb_gso_segment(skb, esp_features);
+ if (IS_ERR(segs)) {
+ kfree_skb(skb);
+ atomic_long_inc(&dev->tx_dropped);
+ return NULL;
+ } else {
+ consume_skb(skb);
+ skb = segs;
}
}
@@ -261,6 +261,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
}
xso->dev = dev;
+ xso->real_dev = dev;
xso->num_exthdrs = 1;
xso->flags = xuo->flags;
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index bd984ff17c2d..37456d022cfa 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -42,7 +42,7 @@ struct xfrm_trans_cb {
#define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))
static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
-static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1];
+static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[2][AF_INET6 + 1];
static struct gro_cells gro_cells;
static struct net_device xfrm_napi_dev;
@@ -53,14 +53,14 @@ int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo)
{
int err = 0;
- if (WARN_ON(afinfo->family >= ARRAY_SIZE(xfrm_input_afinfo)))
+ if (WARN_ON(afinfo->family > AF_INET6))
return -EAFNOSUPPORT;
spin_lock_bh(&xfrm_input_afinfo_lock);
- if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL))
+ if (unlikely(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family]))
err = -EEXIST;
else
- rcu_assign_pointer(xfrm_input_afinfo[afinfo->family], afinfo);
+ rcu_assign_pointer(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family], afinfo);
spin_unlock_bh(&xfrm_input_afinfo_lock);
return err;
}
@@ -71,11 +71,11 @@ int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo)
int err = 0;
spin_lock_bh(&xfrm_input_afinfo_lock);
- if (likely(xfrm_input_afinfo[afinfo->family] != NULL)) {
- if (unlikely(xfrm_input_afinfo[afinfo->family] != afinfo))
+ if (likely(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family])) {
+ if (unlikely(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family] != afinfo))
err = -EINVAL;
else
- RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->family], NULL);
+ RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family], NULL);
}
spin_unlock_bh(&xfrm_input_afinfo_lock);
synchronize_rcu();
@@ -83,15 +83,15 @@ int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo)
}
EXPORT_SYMBOL(xfrm_input_unregister_afinfo);
-static const struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family)
+static const struct xfrm_input_afinfo *xfrm_input_get_afinfo(u8 family, bool is_ipip)
{
const struct xfrm_input_afinfo *afinfo;
- if (WARN_ON_ONCE(family >= ARRAY_SIZE(xfrm_input_afinfo)))
+ if (WARN_ON_ONCE(family > AF_INET6))
return NULL;
rcu_read_lock();
- afinfo = rcu_dereference(xfrm_input_afinfo[family]);
+ afinfo = rcu_dereference(xfrm_input_afinfo[is_ipip][family]);
if (unlikely(!afinfo))
rcu_read_unlock();
return afinfo;
@@ -100,9 +100,11 @@ static const struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family
static int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family, u8 protocol,
int err)
{
+ bool is_ipip = (protocol == IPPROTO_IPIP || protocol == IPPROTO_IPV6);
+ const struct xfrm_input_afinfo *afinfo;
int ret;
- const struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family);
+ afinfo = xfrm_input_get_afinfo(family, is_ipip);
if (!afinfo)
return -EAFNOSUPPORT;
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index b615729812e5..eb8181987620 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -48,21 +48,30 @@ static int xfrmi_dev_init(struct net_device *dev);
static void xfrmi_dev_setup(struct net_device *dev);
static struct rtnl_link_ops xfrmi_link_ops __read_mostly;
static unsigned int xfrmi_net_id __read_mostly;
+static const struct net_device_ops xfrmi_netdev_ops;
+
+#define XFRMI_HASH_BITS 8
+#define XFRMI_HASH_SIZE BIT(XFRMI_HASH_BITS)
struct xfrmi_net {
/* lists for storing interfaces in use */
- struct xfrm_if __rcu *xfrmi[1];
+ struct xfrm_if __rcu *xfrmi[XFRMI_HASH_SIZE];
};
#define for_each_xfrmi_rcu(start, xi) \
for (xi = rcu_dereference(start); xi; xi = rcu_dereference(xi->next))
+static u32 xfrmi_hash(u32 if_id)
+{
+ return hash_32(if_id, XFRMI_HASH_BITS);
+}
+
static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x)
{
struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
struct xfrm_if *xi;
- for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) {
+ for_each_xfrmi_rcu(xfrmn->xfrmi[xfrmi_hash(x->if_id)], xi) {
if (x->if_id == xi->p.if_id &&
(xi->dev->flags & IFF_UP))
return xi;
@@ -74,8 +83,7 @@ static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x)
static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb,
unsigned short family)
{
- struct xfrmi_net *xfrmn;
- struct xfrm_if *xi;
+ struct net_device *dev;
int ifindex = 0;
if (!secpath_exists(skb) || !skb->dev)
@@ -89,23 +97,26 @@ static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb,
ifindex = inet_sdif(skb);
break;
}
- if (!ifindex)
- ifindex = skb->dev->ifindex;
- xfrmn = net_generic(xs_net(xfrm_input_state(skb)), xfrmi_net_id);
+ if (ifindex) {
+ struct net *net = xs_net(xfrm_input_state(skb));
- for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) {
- if (ifindex == xi->dev->ifindex &&
- (xi->dev->flags & IFF_UP))
- return xi;
+ dev = dev_get_by_index_rcu(net, ifindex);
+ } else {
+ dev = skb->dev;
}
- return NULL;
+ if (!dev || !(dev->flags & IFF_UP))
+ return NULL;
+ if (dev->netdev_ops != &xfrmi_netdev_ops)
+ return NULL;
+
+ return netdev_priv(dev);
}
static void xfrmi_link(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
{
- struct xfrm_if __rcu **xip = &xfrmn->xfrmi[0];
+ struct xfrm_if __rcu **xip = &xfrmn->xfrmi[xfrmi_hash(xi->p.if_id)];
rcu_assign_pointer(xi->next , rtnl_dereference(*xip));
rcu_assign_pointer(*xip, xi);
@@ -116,7 +127,7 @@ static void xfrmi_unlink(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
struct xfrm_if __rcu **xip;
struct xfrm_if *iter;
- for (xip = &xfrmn->xfrmi[0];
+ for (xip = &xfrmn->xfrmi[xfrmi_hash(xi->p.if_id)];
(iter = rtnl_dereference(*xip)) != NULL;
xip = &iter->next) {
if (xi == iter) {
@@ -160,7 +171,7 @@ static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p)
struct xfrm_if *xi;
struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
- for (xip = &xfrmn->xfrmi[0];
+ for (xip = &xfrmn->xfrmi[xfrmi_hash(p->if_id)];
(xi = rtnl_dereference(*xip)) != NULL;
xip = &xi->next)
if (xi->p.if_id == p->if_id)
@@ -760,11 +771,14 @@ static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list)
struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
struct xfrm_if __rcu **xip;
struct xfrm_if *xi;
+ int i;
- for (xip = &xfrmn->xfrmi[0];
- (xi = rtnl_dereference(*xip)) != NULL;
- xip = &xi->next)
- unregister_netdevice_queue(xi->dev, &list);
+ for (i = 0; i < XFRMI_HASH_SIZE; i++) {
+ for (xip = &xfrmn->xfrmi[i];
+ (xi = rtnl_dereference(*xip)) != NULL;
+ xip = &xi->next)
+ unregister_netdevice_queue(xi->dev, &list);
+ }
}
unregister_netdevice_many(&list);
rtnl_unlock();
@@ -800,6 +814,33 @@ static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = {
.priority = 10,
};
+#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
+static int xfrmi6_rcv_tunnel(struct sk_buff *skb)
+{
+ const xfrm_address_t *saddr;
+ __be32 spi;
+
+ saddr = (const xfrm_address_t *)&ipv6_hdr(skb)->saddr;
+ spi = xfrm6_tunnel_spi_lookup(dev_net(skb->dev), saddr);
+
+ return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi, NULL);
+}
+
+static struct xfrm6_tunnel xfrmi_ipv6_handler __read_mostly = {
+ .handler = xfrmi6_rcv_tunnel,
+ .cb_handler = xfrmi_rcv_cb,
+ .err_handler = xfrmi6_err,
+ .priority = -1,
+};
+
+static struct xfrm6_tunnel xfrmi_ip6ip_handler __read_mostly = {
+ .handler = xfrmi6_rcv_tunnel,
+ .cb_handler = xfrmi_rcv_cb,
+ .err_handler = xfrmi6_err,
+ .priority = -1,
+};
+#endif
+
static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = {
.handler = xfrm4_rcv,
.input_handler = xfrm_input,
@@ -824,6 +865,27 @@ static struct xfrm4_protocol xfrmi_ipcomp4_protocol __read_mostly = {
.priority = 10,
};
+#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
+static int xfrmi4_rcv_tunnel(struct sk_buff *skb)
+{
+ return xfrm4_rcv_spi(skb, IPPROTO_IPIP, ip_hdr(skb)->saddr);
+}
+
+static struct xfrm_tunnel xfrmi_ipip_handler __read_mostly = {
+ .handler = xfrmi4_rcv_tunnel,
+ .cb_handler = xfrmi_rcv_cb,
+ .err_handler = xfrmi4_err,
+ .priority = -1,
+};
+
+static struct xfrm_tunnel xfrmi_ipip6_handler __read_mostly = {
+ .handler = xfrmi4_rcv_tunnel,
+ .cb_handler = xfrmi_rcv_cb,
+ .err_handler = xfrmi4_err,
+ .priority = -1,
+};
+#endif
+
static int __init xfrmi4_init(void)
{
int err;
@@ -837,9 +899,23 @@ static int __init xfrmi4_init(void)
err = xfrm4_protocol_register(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
if (err < 0)
goto xfrm_proto_comp_failed;
+#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
+ err = xfrm4_tunnel_register(&xfrmi_ipip_handler, AF_INET);
+ if (err < 0)
+ goto xfrm_tunnel_ipip_failed;
+ err = xfrm4_tunnel_register(&xfrmi_ipip6_handler, AF_INET6);
+ if (err < 0)
+ goto xfrm_tunnel_ipip6_failed;
+#endif
return 0;
+#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
+xfrm_tunnel_ipip6_failed:
+ xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET);
+xfrm_tunnel_ipip_failed:
+ xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
+#endif
xfrm_proto_comp_failed:
xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
xfrm_proto_ah_failed:
@@ -850,6 +926,10 @@ xfrm_proto_esp_failed:
static void xfrmi4_fini(void)
{
+#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
+ xfrm4_tunnel_deregister(&xfrmi_ipip6_handler, AF_INET6);
+ xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET);
+#endif
xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP);
@@ -868,9 +948,23 @@ static int __init xfrmi6_init(void)
err = xfrm6_protocol_register(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
if (err < 0)
goto xfrm_proto_comp_failed;
+#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
+ err = xfrm6_tunnel_register(&xfrmi_ipv6_handler, AF_INET6);
+ if (err < 0)
+ goto xfrm_tunnel_ipv6_failed;
+ err = xfrm6_tunnel_register(&xfrmi_ip6ip_handler, AF_INET);
+ if (err < 0)
+ goto xfrm_tunnel_ip6ip_failed;
+#endif
return 0;
+#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
+xfrm_tunnel_ip6ip_failed:
+ xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6);
+xfrm_tunnel_ipv6_failed:
+ xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
+#endif
xfrm_proto_comp_failed:
xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
xfrm_proto_ah_failed:
@@ -881,6 +975,10 @@ xfrm_proto_esp_failed:
static void xfrmi6_fini(void)
{
+#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
+ xfrm6_tunnel_deregister(&xfrmi_ip6ip_handler, AF_INET);
+ xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6);
+#endif
xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 564aa6492e7c..042ea9b40c7b 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -39,7 +39,7 @@
#ifdef CONFIG_XFRM_STATISTICS
#include <net/snmp.h>
#endif
-#ifdef CONFIG_INET_ESPINTCP
+#ifdef CONFIG_XFRM_ESPINTCP
#include <net/espintcp.h>
#endif
@@ -1433,14 +1433,10 @@ static void xfrm_policy_requeue(struct xfrm_policy *old,
spin_unlock_bh(&pq->hold_queue.lock);
}
-static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
- struct xfrm_policy *pol)
+static inline bool xfrm_policy_mark_match(const struct xfrm_mark *mark,
+ struct xfrm_policy *pol)
{
- if (policy->mark.v == pol->mark.v &&
- policy->priority == pol->priority)
- return true;
-
- return false;
+ return mark->v == pol->mark.v && mark->m == pol->mark.m;
}
static u32 xfrm_pol_bin_key(const void *data, u32 len, u32 seed)
@@ -1503,7 +1499,7 @@ static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
if (pol->type == policy->type &&
pol->if_id == policy->if_id &&
!selector_cmp(&pol->selector, &policy->selector) &&
- xfrm_policy_mark_match(policy, pol) &&
+ xfrm_policy_mark_match(&policy->mark, pol) &&
xfrm_sec_ctx_match(pol->security, policy->security) &&
!WARN_ON(delpol)) {
delpol = pol;
@@ -1538,7 +1534,7 @@ static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain,
if (pol->type == policy->type &&
pol->if_id == policy->if_id &&
!selector_cmp(&pol->selector, &policy->selector) &&
- xfrm_policy_mark_match(policy, pol) &&
+ xfrm_policy_mark_match(&policy->mark, pol) &&
xfrm_sec_ctx_match(pol->security, policy->security) &&
!WARN_ON(delpol)) {
if (excl)
@@ -1610,9 +1606,8 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
EXPORT_SYMBOL(xfrm_policy_insert);
static struct xfrm_policy *
-__xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id,
- u8 type, int dir,
- struct xfrm_selector *sel,
+__xfrm_policy_bysel_ctx(struct hlist_head *chain, const struct xfrm_mark *mark,
+ u32 if_id, u8 type, int dir, struct xfrm_selector *sel,
struct xfrm_sec_ctx *ctx)
{
struct xfrm_policy *pol;
@@ -1623,7 +1618,7 @@ __xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id,
hlist_for_each_entry(pol, chain, bydst) {
if (pol->type == type &&
pol->if_id == if_id &&
- (mark & pol->mark.m) == pol->mark.v &&
+ xfrm_policy_mark_match(mark, pol) &&
!selector_cmp(sel, &pol->selector) &&
xfrm_sec_ctx_match(ctx, pol->security))
return pol;
@@ -1632,11 +1627,10 @@ __xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id,
return NULL;
}
-struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
- u8 type, int dir,
- struct xfrm_selector *sel,
- struct xfrm_sec_ctx *ctx, int delete,
- int *err)
+struct xfrm_policy *
+xfrm_policy_bysel_ctx(struct net *net, const struct xfrm_mark *mark, u32 if_id,
+ u8 type, int dir, struct xfrm_selector *sel,
+ struct xfrm_sec_ctx *ctx, int delete, int *err)
{
struct xfrm_pol_inexact_bin *bin = NULL;
struct xfrm_policy *pol, *ret = NULL;
@@ -1703,9 +1697,9 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
}
EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
-struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id,
- u8 type, int dir, u32 id, int delete,
- int *err)
+struct xfrm_policy *
+xfrm_policy_byid(struct net *net, const struct xfrm_mark *mark, u32 if_id,
+ u8 type, int dir, u32 id, int delete, int *err)
{
struct xfrm_policy *pol, *ret;
struct hlist_head *chain;
@@ -1720,8 +1714,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id,
ret = NULL;
hlist_for_each_entry(pol, chain, byidx) {
if (pol->type == type && pol->index == id &&
- pol->if_id == if_id &&
- (mark & pol->mark.m) == pol->mark.v) {
+ pol->if_id == if_id && xfrm_policy_mark_match(mark, pol)) {
xfrm_pol_hold(pol);
if (delete) {
*err = security_xfrm_policy_delete(
@@ -2758,6 +2751,7 @@ static void xfrm_policy_queue_process(struct timer_list *t)
struct xfrm_policy_queue *pq = &pol->polq;
struct flowi fl;
struct sk_buff_head list;
+ __u32 skb_mark;
spin_lock(&pq->hold_queue.lock);
skb = skb_peek(&pq->hold_queue);
@@ -2767,7 +2761,12 @@ static void xfrm_policy_queue_process(struct timer_list *t)
}
dst = skb_dst(skb);
sk = skb->sk;
+
+ /* Fixup the mark to support VTI. */
+ skb_mark = skb->mark;
+ skb->mark = pol->mark.v;
xfrm_decode_session(skb, &fl, dst->ops->family);
+ skb->mark = skb_mark;
spin_unlock(&pq->hold_queue.lock);
dst_hold(xfrm_dst_path(dst));
@@ -2799,7 +2798,12 @@ static void xfrm_policy_queue_process(struct timer_list *t)
while (!skb_queue_empty(&list)) {
skb = __skb_dequeue(&list);
+ /* Fixup the mark to support VTI. */
+ skb_mark = skb->mark;
+ skb->mark = pol->mark.v;
xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
+ skb->mark = skb_mark;
+
dst_hold(xfrm_dst_path(skb_dst(skb)));
dst = xfrm_lookup(net, xfrm_dst_path(skb_dst(skb)), &fl, skb->sk, 0);
if (IS_ERR(dst)) {
@@ -4156,7 +4160,7 @@ void __init xfrm_init(void)
seqcount_init(&xfrm_policy_hash_generation);
xfrm_input_init();
-#ifdef CONFIG_INET_ESPINTCP
+#ifdef CONFIG_XFRM_ESPINTCP
espintcp_init();
#endif
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 98943f8d01aa..c6a4338a0d08 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -89,7 +89,8 @@ static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb)
if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
XFRM_SKB_CB(skb)->seq.output.low = ++x->replay.oseq;
XFRM_SKB_CB(skb)->seq.output.hi = 0;
- if (unlikely(x->replay.oseq == 0)) {
+ if (unlikely(x->replay.oseq == 0) &&
+ !(x->props.extra_flags & XFRM_SA_XFLAG_OSEQ_MAY_WRAP)) {
x->replay.oseq--;
xfrm_audit_state_replay_overflow(x, skb);
err = -EOVERFLOW;
@@ -168,7 +169,8 @@ static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb)
if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq;
XFRM_SKB_CB(skb)->seq.output.hi = 0;
- if (unlikely(replay_esn->oseq == 0)) {
+ if (unlikely(replay_esn->oseq == 0) &&
+ !(x->props.extra_flags & XFRM_SA_XFLAG_OSEQ_MAY_WRAP)) {
replay_esn->oseq--;
xfrm_audit_state_replay_overflow(x, skb);
err = -EOVERFLOW;
@@ -572,7 +574,8 @@ static int xfrm_replay_overflow_offload(struct xfrm_state *x, struct sk_buff *sk
XFRM_SKB_CB(skb)->seq.output.hi = 0;
xo->seq.hi = 0;
- if (unlikely(oseq < x->replay.oseq)) {
+ if (unlikely(oseq < x->replay.oseq) &&
+ !(x->props.extra_flags & XFRM_SA_XFLAG_OSEQ_MAY_WRAP)) {
xfrm_audit_state_replay_overflow(x, skb);
err = -EOVERFLOW;
@@ -611,7 +614,8 @@ static int xfrm_replay_overflow_offload_bmp(struct xfrm_state *x, struct sk_buff
XFRM_SKB_CB(skb)->seq.output.hi = 0;
xo->seq.hi = 0;
- if (unlikely(oseq < replay_esn->oseq)) {
+ if (unlikely(oseq < replay_esn->oseq) &&
+ !(x->props.extra_flags & XFRM_SA_XFLAG_OSEQ_MAY_WRAP)) {
xfrm_audit_state_replay_overflow(x, skb);
err = -EOVERFLOW;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 8be2d926acc2..69520ad3d83b 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2264,7 +2264,7 @@ static bool km_is_alive(const struct km_event *c)
return is_alive;
}
-int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
+int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen)
{
int err;
u8 *data;
@@ -2274,7 +2274,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
if (in_compat_syscall())
return -EOPNOTSUPP;
- if (!optval && !optlen) {
+ if (sockptr_is_null(optval) && !optlen) {
xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL);
__sk_dst_reset(sk);
@@ -2284,7 +2284,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
if (optlen <= 0 || optlen > PAGE_SIZE)
return -EMSGSIZE;
- data = memdup_user(optval, optlen);
+ data = memdup_sockptr(optval, optlen);
if (IS_ERR(data))
return PTR_ERR(data);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index e6cfaa680ef3..fbb7d9d06478 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1863,7 +1863,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
struct km_event c;
int delete;
struct xfrm_mark m;
- u32 mark = xfrm_mark_get(attrs, &m);
u32 if_id = 0;
p = nlmsg_data(nlh);
@@ -1880,8 +1879,11 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
if (attrs[XFRMA_IF_ID])
if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+ xfrm_mark_get(attrs, &m);
+
if (p->index)
- xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, delete, &err);
+ xp = xfrm_policy_byid(net, &m, if_id, type, p->dir,
+ p->index, delete, &err);
else {
struct nlattr *rt = attrs[XFRMA_SEC_CTX];
struct xfrm_sec_ctx *ctx;
@@ -1898,8 +1900,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err)
return err;
}
- xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir, &p->sel,
- ctx, delete, &err);
+ xp = xfrm_policy_bysel_ctx(net, &m, if_id, type, p->dir,
+ &p->sel, ctx, delete, &err);
security_xfrm_policy_free(ctx);
}
if (xp == NULL)
@@ -2166,7 +2168,6 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
u8 type = XFRM_POLICY_TYPE_MAIN;
int err = -ENOENT;
struct xfrm_mark m;
- u32 mark = xfrm_mark_get(attrs, &m);
u32 if_id = 0;
err = copy_from_user_policy_type(&type, attrs);
@@ -2180,8 +2181,11 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
if (attrs[XFRMA_IF_ID])
if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+ xfrm_mark_get(attrs, &m);
+
if (p->index)
- xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, 0, &err);
+ xp = xfrm_policy_byid(net, &m, if_id, type, p->dir, p->index,
+ 0, &err);
else {
struct nlattr *rt = attrs[XFRMA_SEC_CTX];
struct xfrm_sec_ctx *ctx;
@@ -2198,7 +2202,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err)
return err;
}
- xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir,
+ xp = xfrm_policy_bysel_ctx(net, &m, if_id, type, p->dir,
&p->sel, ctx, 0, &err);
security_xfrm_policy_free(ctx);
}